他这个网站的资源F12能看到所有图片的链接,get请求直接就能下载,写的程序下载的
下面是gpt转的python 不知道能不能跑
[Python] 纯文本查看 复制代码 import requests
import re
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from PIL import Image
from pdf2image import convert_from_path
from fpdf import FPDF
# Constants
JS_URL = "/mobile/javascript/config.js"
IMG_URL = "/files/mobile/" # High resolution
# IMG_URL = "/files/thumb/" # Thumbnail
class Book:
def __init__(self, title, page_num, url):
self.title = title
self.page_num = page_num
self.url = url
def get_javascript(url):
response = requests.get(url)
return response.text
def extract_config_value(js_content, pattern):
r = re.compile(pattern)
m = r.search(js_content)
if m:
return m.group(1)
else:
return None
def download_image(image_url, index):
response = requests.get(image_url, stream=True)
if response.status_code == 200:
image_path = f"image-{index}.jpg"
with open(image_path, 'wb') as f:
f.write(response.content)
return image_path
else:
return None
def create_pdf(book):
pdf = FPDF(unit="pt", format=(1000, 1000)) # Adjust format based on typical image size
for i in range(1, book.page_num + 1):
image_url = f"{book.url}{i}.jpg"
image_path = download_image(image_url, i)
if image_path:
img = Image.open(image_path)
width, height = img.size
pdf.add_page()
pdf.image(image_path, x=0, y=0, w=width, h=height)
os.remove(image_path) # Clean up downloaded image
pdf_path = f"{book.title}.pdf"
pdf.output(pdf_path)
print(f"PDF generated: {pdf_path}")
def main():
urls = [
"https://dfz.zj.gov.cn/zlyz/ossfs//h5/ZS-Z-330383-2009-001-0101/",
# ... other URLs ...
]
books = []
for url in urls:
js_content = get_javascript(url + JS_URL)
book_title = extract_config_value(js_content, r"bookConfig\.bookTitle=\"([^\"]+)\"")
total_count = int(extract_config_value(js_content, r"bookConfig\.totalPageCount=(\d+)"))
print(f"Book Title: {book_title}")
print(f"Total Page Count: {total_count}")
book = Book(book_title, total_count, url + IMG_URL)
books.append(book)
with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
for book in books:
executor.submit(create_pdf, book)
if __name__ == "__main__":
main()
|