[md][md]## 免责声明
1、本贴仅作为技术讨论,本人不会利用以下技术盈利、或从事任何侵害该网站的行为。
2、如觉得此贴不妥,请联系本人将第一时间删除。
背景
应助悬赏贴 做的软件,可能有人需要,就发来大家一起探讨下。
网址:aHR0cHM6Ly9rZC5uc2ZjLmNuL2ZpbmFsUHJvamVjdEluaXQ

编程过程
本网址没法获取到图片的数量,所以循环遍历500页,直到图片资源不存在,404的时候才停止。
Python代码
UI代码
def __init__(self, root):
self.root = root
self.root.title("NSFC2PDF")
self.root.geometry("500x200")
# URL输入框
self.url_label = tk.Label(root, text="输入图片网页URL:")
self.url_label.pack(pady=5)
self.url_entry = tk.Entry(root, width=60)
self.url_entry.pack(pady=5)
# 进度条
self.progress_label = tk.Label(root, text="进度:")
self.progress_label.pack(pady=5)
self.progress_bar = Progressbar(root, length=400, mode="determinate")
self.progress_bar.pack(pady=5)
# 按钮
self.convert_button = tk.Button(root, text="转换为PDF", command=self.start_conversion)
self.convert_button.pack(pady=10)
# 临时图片存储路径
self.temp_images = []
python核心代码
添加了5次重试机制,防止网址卡顿部分图片 没有下载完成。
def download_images(self, id):
temp_dir = 'temp'
os.makedirs(temp_dir, exist_ok=True)
images = []
max_retries = 5 # 最大重试次数
headers = {
'Host': 'kd.nsfc.cn',
'Pragma': '-no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
'Accept': 'application/json, text/plain, */*',
'sec-ch-ua-mobile': '?0',
'Authorization': 'Bearer false',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36',
'sec-ch-ua-platform': '"Windows"',
'Origin': 'https://kd.nsfc.cn',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Content-Type': 'application/x-www-form-urlencoded',
}
try:
for i in range(1, 500):
temp_path = os.path.join(temp_dir, f"{i:05d}.png")
print(temp_path)
if os.path.isfile(temp_path):
images.append(temp_path)
print(temp_path, '已经存在')
continue
# 获取图片URL
data = {'id': id, 'index': str(i)}
response = requests.post(
'https://kd.nsfc.cn/api/baseQuery/completeProjectReport',
headers=headers,
data=data
)
# 处理API请求失败的情况
if response.status_code != 200:
print(f"API请求失败,状态码:{response.status_code}")
continue
bookjson = response.json()
url = bookjson.get('data', {}).get('url')
if not url:
print("没有更多图片了")
break
img_url = f'https://kd.nsfc.cn{url}'
print(f"正在处理第 {i} 张图片: {img_url}")
# 图片下载重试逻辑
success = False
for attempt in range(max_retries + 1):
try:
response_img = requests.get(img_url, headers=headers, timeout=10)
if response_img.status_code == 200:
success = True
break
elif response_img.status_code == 404:
print("所有图片下载完成")
return images
else:
print(f"尝试 {attempt + 1}/{max_retries} 失败,状态码:{response_img.status_code}")
time.sleep(1)
except Exception as e:
print(f"尝试 {attempt + 1}/{max_retries} 遇到异常:{str(e)}")
time.sleep(1)
if not success:
print(f"图片 {img_url} 下载失败,跳过...")
continue
# 保存图片
try:
img = Image.open(BytesIO(response_img.content))
img.save(temp_path)
print(f"保存成功:{temp_path}")
images.append(temp_path)
self.temp_images.append(temp_path)
self.progress_bar["value"] = i
self.root.update_idletasks()
except Exception as e:
print(f"图片保存失败:{str(e)}")
return images
except Exception as e:
messagebox.showerror("错误", f"发生未预期的错误:{str(e)}")
return []
合并图片到PDF
def merge_images_to_pdf(self, image_files, output_path):
# 创建新 PDF 文档
merged_pdf = fitz.open()
for img_path in image_files:
img_doc = fitz.open(img_path) # 将图片转为临时 PDF 文档
merged_pdf.insert_file(img_doc) # 插入到主文档中
img_doc.close()
merged_pdf.save(output_path)
merged_pdf.close()
完整代码
import os
import re
import tempfile
import time
import requests
import fitz # PyMuPDF
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter.ttk import Progressbar
from PIL import Image
from io import BytesIO
class ImageToPDFConverter:
def __init__(self, root):
self.root = root
self.root.title("NSFC2PDF")
self.root.geometry("500x200")
# URL输入框
self.url_label = tk.Label(root, text="输入图片网页URL:")
self.url_label.pack(pady=5)
self.url_entry = tk.Entry(root, width=60)
self.url_entry.pack(pady=5)
# 进度条
self.progress_label = tk.Label(root, text="进度:")
self.progress_label.pack(pady=5)
self.progress_bar = Progressbar(root, length=400, mode="determinate")
self.progress_bar.pack(pady=5)
# 按钮
self.convert_button = tk.Button(root, text="转换为PDF", command=self.start_conversion)
self.convert_button.pack(pady=10)
# 临时图片存储路径
self.temp_images = []
def download_images(self, id):
temp_dir = 'temp'
os.makedirs(temp_dir, exist_ok=True)
images = []
max_retries = 5 # 最大重试次数
headers = {
'Host': 'kd.nsfc.cn',
'Pragma': '-no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
'Accept': 'application/json, text/plain, */*',
'sec-ch-ua-mobile': '?0',
'Authorization': 'Bearer false',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36',
'sec-ch-ua-platform': '"Windows"',
'Origin': 'https://kd.nsfc.cn',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Content-Type': 'application/x-www-form-urlencoded',
}
try:
for i in range(1, 500):
temp_path = os.path.join(temp_dir, f"{i:05d}.png")
print(temp_path)
if os.path.isfile(temp_path):
images.append(temp_path)
print(temp_path, '已经存在')
continue
# 获取图片URL
data = {'id': id, 'index': str(i)}
response = requests.post(
'https://kd.nsfc.cn/api/baseQuery/completeProjectReport',
headers=headers,
data=data
)
# 处理API请求失败的情况
if response.status_code != 200:
print(f"API请求失败,状态码:{response.status_code}")
continue
bookjson = response.json()
url = bookjson.get('data', {}).get('url')
if not url:
print("没有更多图片了")
break
img_url = f'https://kd.nsfc.cn{url}'
print(f"正在处理第 {i} 张图片: {img_url}")
# 图片下载重试逻辑
success = False
for attempt in range(max_retries + 1):
try:
response_img = requests.get(img_url, headers=headers, timeout=10)
if response_img.status_code == 200:
success = True
break
elif response_img.status_code == 404:
print("所有图片下载完成")
return images
else:
print(f"尝试 {attempt + 1}/{max_retries} 失败,状态码:{response_img.status_code}")
time.sleep(1)
except Exception as e:
print(f"尝试 {attempt + 1}/{max_retries} 遇到异常:{str(e)}")
time.sleep(1)
if not success:
print(f"图片 {img_url} 下载失败,跳过...")
continue
# 保存图片
try:
img = Image.open(BytesIO(response_img.content))
img.save(temp_path)
print(f"保存成功:{temp_path}")
images.append(temp_path)
self.temp_images.append(temp_path)
self.progress_bar["value"] = i
self.root.update_idletasks()
except Exception as e:
print(f"图片保存失败:{str(e)}")
return images
except Exception as e:
messagebox.showerror("错误", f"发生未预期的错误:{str(e)}")
return []
def merge_images_to_pdf(self, image_files, output_path):
# 创建新 PDF 文档
merged_pdf = fitz.open()
for img_path in image_files:
img_doc = fitz.open(img_path) # 将图片转为临时 PDF 文档
merged_pdf.insert_file(img_doc) # 插入到主文档中
img_doc.close()
merged_pdf.save(output_path)
merged_pdf.close()
def start_conversion(self):
"""
启动整个转换流程
"""
url = self.url_entry.get().strip()
if not url:
messagebox.showwarning("警告", "请输入有效的URL")
return
# 使用正则表达式提取 id 的值
match = re.search(r'id=([a-f0-9]+)', url)
if match:
id = match.group(1)
if not id:
messagebox.showwarning("警告", "请输入有效的URL")
return
# 下载图片
self.progress_label.config(text="进度:下载图片中...")
self.root.update_idletasks()
image_paths = self.download_images(id)
print(len(image_paths))
if not image_paths:
self.progress_label.config(text="貌似没有图片...")
return
print('开始合并PDF')
headers = {
'Host': 'kd.nsfc.cn',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
'Accept': 'application/json, text/plain, */*',
'sec-ch-ua-mobile': '?0',
'Authorization': 'Bearer false',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36',
'sec-ch-ua-platform': '"Windows"',
'Origin': 'https://kd.nsfc.cn',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://kd.nsfc.cn/finalDetails?id=8ced794565c6f2633c4564445a5501ba',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Content-Type': 'application/x-www-form-urlencoded',
}
data = {
'projectID': id,
}
response = requests.post('https://kd.nsfc.cn/api/baseQuery/projectInfluenceAnalysisData', headers=headers,
data=data)
jsondata = response.json()
bookdata = jsondata.get('data')
if bookdata and len(bookdata)>0:
bookname = bookdata[0].get('chineseTitle')
if bookname:
output_path = f'{bookname}.pdf'
else:
# 弹出保存PDF对话框
self.progress_label.config(text="进度:保存PDF中...")
self.root.update_idletasks()
output_path = filedialog.asksaveasfilename(
defaultextension=".pdf",
filetypes=[("PDF文件", "*.pdf")],
title="保存PDF文件"
)
if not output_path:
return
# 合并图片为PDF
self.merge_images_to_pdf(image_paths, output_path)
# # 清理临时文件
for temp_file in self.temp_images:
if os.path.exists(temp_file):
os.remove(temp_file)
self.temp_images = []
messagebox.showinfo("成功", f"PDF已成功保存到:{output_path}")
self.progress_label.config(text="进度:")
self.progress_bar["value"] = 0
if __name__ == "__main__":
root = tk.Tk()
app = ImageToPDFConverter(root)
root.mainloop()
打包好的程序
百度云
https://pan.baidu.com/s/1wqN2ygMpuvPHpDi_B99CTw?pwd=52pj 提取码: 52pj
蓝奏云
https://2021.lanzoue.com/imZGA2t1e49a