如何从URL打开PDF而不是从磁盘中打开?
类似于:
input1 = PdfFileReader(file("http://example.com/a.pdf", "rb"))
我想从网页上打开多个文件并下载它们的合并文件。
我认为urllib2可以帮助你得到你想要的内容。
from urllib2 import Request, urlopen
from pyPdf import PdfFileWriter, PdfFileReader
from StringIO import StringIO
url = "http://www.silicontao.com/ProgrammingGuide/other/beejnet.pdf"
writer = PdfFileWriter()
remoteFile = urlopen(Request(url)).read()
memoryFile = StringIO(remoteFile)
pdfFile = PdfFileReader(memoryFile)
for pageNum in xrange(pdfFile.getNumPages()):
currentPage = pdfFile.getPage(pageNum)
#currentPage.mergePage(watermark.getPage(0))
writer.addPage(currentPage)
outputStream = open("output.pdf","wb")
writer.write(outputStream)
outputStream.close()
outputStream = file("output.pdf","wb")
需要改成 outputStream = open("output.pdf","wb")
。 - Johnfrom io import StringIO
。 - Shriganesh Kolhe我认为现在可以使用Requests来简化它。
import io
import requests
from PyPDF2 import PdfReader
headers = {'User-Agent': 'Mozilla/5.0 (X11; Windows; Windows x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36'}
url = 'https://www.url_of_pdf_file.com/sample.pdf'
response = requests.get(url=url, headers=headers, timeout=120)
on_fly_mem_obj = io.BytesIO(response.content)
pdf_file = PdfReader(on_fly_mem_obj)
首先,您可以单独下载PDF文件,然后使用pypdf库进行阅读。
import urllib
url = 'http://example.com/a.pdf'
webFile = urllib.urlopen(url)
pdfFile = open(url.split('/')[-1], 'w')
pdfFile.write(webFile.read())
webFile.close()
pdfFile.close()
base = os.path.splitext(pdfFile)[0]
os.rename(pdfFile, base + ".pdf")
input1 = PdfFileReader(file(pdfFile, "rb"))
针对 Python 3.8 版本
import io
from urllib.request import Request, urlopen
from PyPDF2 import PdfFileReader
class GetPdfFromUrlMixin:
def get_pdf_from_url(self, url):
"""
:param url: url to get pdf file
:return: PdfFileReader object
"""
remote_file = urlopen(Request(url)).read()
memory_file = io.BytesIO(remote_file)
pdf_file = PdfFileReader(memory_file)
return pdf_file