[Python] 纯文本查看 复制代码
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import os
import threading
import queue
import traceback
from datetime import datetime
import fitz
from pdf2docx import Converter
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH
import logging
class PDFConverterModel:
def __init__(self):
self.pdf_file_path = ""
self.docx_file_path = ""
self.conversion_status = "准备就绪"
self.progress_value = 0
self.preserve_layout = True
self.extract_images = True
self.extract_tables = True
self.preserve_fonts = True
self.preserve_colors = True
self.queue = queue.Queue()
def browse_pdf(self):
return filedialog.askopenfilename(
title="选择PDF文件",
filetypes=[("PDF文件", "*.pdf"), ("所有文件", "*.*")]
)
def browse_docx(self):
return filedialog.asksaveasfilename(
title="保存DOCX文件",
defaultextension=".docx",
filetypes=[("Word文档", "*.docx"), ("所有文件", "*.*")]
)
def get_pdf_info(self, pdf_path):
try:
pdf_doc = fitz.open(pdf_path)
page_count = pdf_doc.page_count
metadata = pdf_doc.metadata
title = metadata.get('title', '未指定')
author = metadata.get('author', '未指定')
subject = metadata.get('subject', '未指定')
keywords = metadata.get('keywords', '未指定')
creator = metadata.get('creator', '未指定')
producer = metadata.get('producer', '未指定')
file_size = os.path.getsize(pdf_path)
size_str = self.format_file_size(file_size)
is_encrypted = pdf_doc.is_encrypted
info_text = f"文件名称: {os.path.basename(pdf_path)}\n"
info_text += f"文件大小: {size_str}\n"
info_text += f"页面总数: {page_count} 页\n"
info_text += f"是否加密: {'是' if is_encrypted else '否'}\n"
info_text += f"文档标题: {title}\n"
info_text += f"文档作者: {author}\n"
info_text += f"文档主题: {subject}\n"
info_text += f"关 键 词: {keywords}\n"
info_text += f"创建程序: {creator}\n"
info_text += f"生成程序: {producer}\n"
pdf_doc.close()
return info_text
except Exception as e:
return f"无法读取PDF文件信息: {str(e)}"
def format_file_size(self, size_bytes):
for unit in ['B', 'KB', 'MB', 'GB']:
if size_bytes < 1024.0:
return f"{size_bytes:.2f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.2f} TB"
def validate_input(self, pdf_path, docx_path):
errors = []
if not pdf_path:
errors.append("请选择要转换的PDF文件!")
if not docx_path:
errors.append("请设置DOCX输出文件路径!")
if pdf_path and not pdf_path.lower().endswith('.pdf'):
errors.append("选择的文件不是PDF格式!")
if pdf_path and not os.path.exists(pdf_path):
errors.append("PDF文件不存在!")
return errors
def check_pdf_encrypted(self, pdf_path):
try:
pdf_doc = fitz.open(pdf_path)
is_encrypted = pdf_doc.is_encrypted
pdf_doc.close()
return is_encrypted
except:
return False
def convert_pdf_to_docx(self, pdf_path, docx_path):
try:
self.queue.put(("status", "正在初始化转换器..."))
self.queue.put(("progress", 5))
cv = Converter(pdf_path)
kwargs = {
'start': 0,
'end': None,
'multi_processing': False,
}
if self.preserve_layout:
kwargs['layout_analysis'] = True
kwargs['layout_kwargs'] = {
'char_margin': 1.0,
'line_margin': 0.5,
'word_margin': 0.1,
'detect_vertical': True,
'all_texts': True,
}
if self.extract_tables:
kwargs['parse_table'] = True
self.queue.put(("status", "正在转换PDF文件,请稍候..."))
self.queue.put(("progress", 10))
cv.convert(docx_path, **kwargs)
cv.close()
if self.preserve_fonts or self.preserve_colors:
self.queue.put(("status", "正在优化文档格式..."))
self.queue.put(("progress", 95))
self.enhance_document_formatting(pdf_path, docx_path)
self.queue.put(("progress", 100))
self.queue.put(("status", "转换完成!"))
file_size = os.path.getsize(docx_path)
size_str = self.format_file_size(file_size)
success_msg = f"转换成功!\n\n输出文件: {docx_path}\n文件大小: {size_str}\n\n转换已完成,您可以打开文件查看结果。"
self.queue.put(("success", success_msg))
return True
except Exception as e:
error_msg = f"""
转换过程中出错:
错误信息: {str(e)}
请检查:
1. PDF文件是否完整且未损坏
2. 是否有足够的磁盘空间
3. 是否具有文件读写权限
详细错误信息已记录。
"""
self.queue.put(("error", error_msg))
self.queue.put(("status", "转换失败"))
self.log_error(pdf_path, str(e), traceback.format_exc())
return False
def enhance_document_formatting(self, pdf_path, docx_path):
try:
doc = Document(docx_path)
if len(doc.sections) > 0:
header = doc.sections[0].header
header_para = header.paragraphs[0]
header_para.text = f"从 '{os.path.basename(pdf_path)}' 转换 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
header_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
footer = doc.sections[0].footer
footer_para = footer.paragraphs[0]
footer_para.text = f"PDF转DOCX转换器 v1.1 - 第 {{PAGE}} 页 / {{NUMPAGES}} 页"
footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
doc.save(docx_path)
except Exception as e:
pass
def log_error(self, pdf_path, error_msg, traceback_info):
try:
with open("conversion_error.log", "a", encoding="utf-8") as f:
f.write(f"\n{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"PDF文件: {pdf_path}\n")
f.write(f"错误信息: {error_msg}\n")
f.write(f"详细跟踪:\n{traceback_info}\n")
f.write("-" * 80 + "\n")
except:
pass
class PDFConverterView:
def __init__(self, root):
self.root = root
self.root.title("PDF 转 DOCX 转换器")
self.root.geometry("1020x860")
self.root.resizable(False, False)
self.setup_styles()
self.create_widgets()
def setup_styles(self):
style = ttk.Style()
available_themes = style.theme_names()
if 'clam' in available_themes:
style.theme_use('clam')
self.bg_color = "#f0f0f0"
self.primary_color = "#2c3e50"
self.secondary_color = "#3498db"
self.success_color = "#27ae60"
self.warning_color = "#e67e22"
self.danger_color = "#e74c3c"
self.info_color = "#17a2b8"
self.button_font = ("微软雅黑", 10)
self.root.configure(bg=self.bg_color)
def create_widgets(self):
title_frame = tk.Frame(self.root, bg=self.primary_color, height=100)
title_frame.pack(fill=tk.X)
title_frame.pack_propagate(False)
title_label = tk.Label(
title_frame,
text="PDF 转 DOCX 转换器",
font=("微软雅黑", 24, "bold"),
bg=self.primary_color,
fg="white"
)
title_label.pack(expand=True, pady=(15, 0))
subtitle_label = tk.Label(
title_frame,
text="最大限度保留PDF原始格式、图片和布局",
font=("微软雅黑", 10),
bg=self.primary_color,
fg="#ecf0f1"
)
subtitle_label.pack(expand=True, pady=(0, 15))
self.notebook = ttk.Notebook(self.root)
self.notebook.pack(fill=tk.BOTH, expand=True, padx=20, pady=(15, 20))
self.conversion_tab = tk.Frame(self.notebook, bg=self.bg_color)
self.notebook.add(self.conversion_tab, text="文件转换")
self.options_tab = tk.Frame(self.notebook, bg=self.bg_color)
self.notebook.add(self.options_tab, text="转换选项")
self.create_conversion_tab()
self.create_options_tab()
self.notebook.bind("<<NotebookTabChanged>>", self.on_tab_changed)
version_label = tk.Label(self.root, text="PDF to DOCX Converter v1.1 | 保留格式版本",
font=("微软雅黑", 9), bg=self.bg_color, fg="#7f8c8d")
version_label.pack(side=tk.BOTTOM, pady=10)
self.create_bottom_buttons()
self.button_frame.pack(fill=tk.X, pady=(0, 15), padx=20)
def create_conversion_tab(self):
file_frame = tk.LabelFrame(self.conversion_tab, text=" 文件选择 ", font=("微软雅黑", 12, "bold"),
bg=self.bg_color, fg=self.primary_color, padx=20, pady=20)
file_frame.pack(fill=tk.X, pady=(0, 20), padx=10)
pdf_frame = tk.Frame(file_frame, bg=self.bg_color)
pdf_frame.pack(fill=tk.X, pady=(0, 15))
pdf_label = tk.Label(pdf_frame, text="PDF源文件:", font=("微软雅黑", 11),
bg=self.bg_color, anchor=tk.W, width=12)
pdf_label.pack(side=tk.LEFT)
self.pdf_entry = tk.Entry(pdf_frame, font=("微软雅黑", 10), state='readonly')
self.pdf_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(10, 10), ipady=6)
self.pdf_browse_btn = self.create_button(pdf_frame, "浏览...", "primary", 12)
self.pdf_browse_btn.pack(side=tk.RIGHT)
docx_frame = tk.Frame(file_frame, bg=self.bg_color)
docx_frame.pack(fill=tk.X)
docx_label = tk.Label(docx_frame, text="DOCX输出文件:", font=("微软雅黑", 11),
bg=self.bg_color, anchor=tk.W, width=12)
docx_label.pack(side=tk.LEFT)
self.docx_entry = tk.Entry(docx_frame, font=("微软雅黑", 10))
self.docx_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(10, 10), ipady=6)
self.docx_browse_btn = self.create_button(docx_frame, "浏览...", "primary", 12)
self.docx_browse_btn.pack(side=tk.RIGHT)
info_frame = tk.LabelFrame(self.conversion_tab, text=" PDF文件信息 ", font=("微软雅黑", 12, "bold"),
bg=self.bg_color, fg=self.primary_color, padx=20, pady=20)
info_frame.pack(fill=tk.X, pady=(0, 20), padx=10)
self.pdf_info_text = tk.Text(info_frame, height=8, font=("微软雅黑", 10),
bg="white", relief=tk.SUNKEN, bd=2)
self.pdf_info_text.pack(fill=tk.X)
self.pdf_info_text.config(state=tk.DISABLED)
progress_frame = tk.LabelFrame(self.conversion_tab, text=" 转换进度 ", font=("微软雅黑", 12, "bold"),
bg=self.bg_color, fg=self.primary_color, padx=20, pady=20)
progress_frame.pack(fill=tk.X, pady=(0, 20), padx=10)
self.status_label = tk.Label(progress_frame, font=("微软雅黑", 11),
bg=self.bg_color, anchor=tk.W)
self.status_label.pack(fill=tk.X, pady=(0, 15))
self.progress_bar = ttk.Progressbar(progress_frame, maximum=100,
length=100, mode='determinate')
self.progress_bar.pack(fill=tk.X)
def create_options_tab(self):
canvas = tk.Canvas(self.options_tab, bg=self.bg_color, highlightthickness=0)
scrollbar = tk.Scrollbar(self.options_tab, orient="vertical", command=canvas.yview)
scrollable_frame = tk.Frame(canvas, bg=self.bg_color)
scrollable_frame.bind(
"<Configure>",
lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
)
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
canvas.configure(yscrollcommand=scrollbar.set)
canvas.pack(side="left", fill="both", expand=True)
scrollbar.pack(side="right", fill="y")
layout_frame = tk.LabelFrame(scrollable_frame, text=" 布局保留选项 ", font=("微软雅黑", 12, "bold"),
bg=self.bg_color, fg=self.primary_color, padx=20, pady=20)
layout_frame.pack(fill=tk.X, pady=(15, 20), padx=10)
self.preserve_layout_var = tk.BooleanVar(value=True)
layout_check = tk.Checkbutton(layout_frame, text="保留原始页面布局和格式",
variable=self.preserve_layout_var,
font=("微软雅黑", 11), bg=self.bg_color,
anchor=tk.W, selectcolor=self.bg_color)
layout_check.pack(fill=tk.X, pady=(0, 10))
layout_info = tk.Label(layout_frame, text="(强烈推荐)尽可能保留PDF的原始页面布局,包括分栏、边距、页眉页脚等布局元素。启用此选项可以确保转换后的Word文档与原始PDF在视觉上保持一致。",
font=("微软雅黑", 10), bg=self.bg_color, fg="#7f8c8d", anchor=tk.W, wraplength=900, justify=tk.LEFT)
layout_info.pack(fill=tk.X, pady=(0, 15))
self.extract_images_var = tk.BooleanVar(value=True)
image_check = tk.Checkbutton(layout_frame, text="提取PDF中的图像",
variable=self.extract_images_var,
font=("微软雅黑", 11), bg=self.bg_color,
anchor=tk.W, selectcolor=self.bg_color)
image_check.pack(fill=tk.X, pady=(0, 10))
image_info = tk.Label(layout_frame, text="提取PDF中的所有图像并嵌入到Word文档中,保持原始分辨率。支持JPG、PNG、BMP等多种图像格式。对于包含大量图像的PDF文档,转换时间可能会相应增加。",
font=("微软雅黑", 10), bg=self.bg_color, fg="#7f8c8d", anchor=tk.W, wraplength=900, justify=tk.LEFT)
image_info.pack(fill=tk.X, pady=(0, 15))
self.extract_tables_var = tk.BooleanVar(value=True)
table_check = tk.Checkbutton(layout_frame, text="提取并保留表格结构",
variable=self.extract_tables_var,
font=("微软雅黑", 11), bg=self.bg_color,
anchor=tk.W, selectcolor=self.bg_color)
table_check.pack(fill=tk.X, pady=(0, 10))
table_info = tk.Label(layout_frame, text="识别PDF中的表格并转换为Word表格格式,保持表格边框和内容对齐。对于复杂的合并单元格表格,转换器会尝试保留原始结构,但某些复杂布局可能需要手动调整。",
font=("微软雅黑", 10), bg=self.bg_color, fg="#7f8c8d", anchor=tk.W, wraplength=900, justify=tk.LEFT)
table_info.pack(fill=tk.X, pady=(0, 15))
font_color_frame = tk.LabelFrame(scrollable_frame, text=" 字体和颜色选项 ", font=("微软雅黑", 12, "bold"),
bg=self.bg_color, fg=self.primary_color, padx=20, pady=20)
font_color_frame.pack(fill=tk.X, pady=(0, 20), padx=10)
self.preserve_fonts_var = tk.BooleanVar(value=True)
font_check = tk.Checkbutton(font_color_frame, text="保留字体样式和大小",
variable=self.preserve_fonts_var,
font=("微软雅黑", 11), bg=self.bg_color,
anchor=tk.W, selectcolor=self.bg_color)
font_check.pack(fill=tk.X, pady=(0, 10))
font_info = tk.Label(font_color_frame, text="尽可能识别并应用原始PDF中的字体样式和大小。注意:如果系统中缺少相应的字体,转换器会使用最接近的字体进行替换。建议在转换后检查字体是否显示正确。",
font=("微软雅黑", 10), bg=self.bg_color, fg="#7f8c8d", anchor=tk.W, wraplength=900, justify=tk.LEFT)
font_info.pack(fill=tk.X, pady=(0, 15))
self.preserve_colors_var = tk.BooleanVar(value=True)
color_check = tk.Checkbutton(font_color_frame, text="保留文本颜色和背景",
variable=self.preserve_colors_var,
font=("微软雅黑", 11), bg=self.bg_color,
anchor=tk.W, selectcolor=self.bg_color)
color_check.pack(fill=tk.X, pady=(0, 10))
color_info = tk.Label(font_color_frame, text="保留文本颜色、背景色、高亮和超链接等样式。此选项确保转换后的文档在颜色方面与原始PDF保持一致,增强文档的可读性和美观性。",
font=("微软雅黑", 10), bg=self.bg_color, fg="#7f8c8d", anchor=tk.W, wraplength=900, justify=tk.LEFT)
color_info.pack(fill=tk.X, pady=(0, 10))
note_frame = tk.LabelFrame(scrollable_frame, text=" 详细使用说明和注意事项 ", font=("微软雅黑", 12, "bold"),
bg=self.bg_color, fg=self.primary_color, padx=20, pady=20)
note_frame.pack(fill=tk.X, pady=(0, 20), padx=10)
notes = [
"1. 本转换器使用先进的pdf2docx算法,能够最大限度地保留PDF原始格式、布局和样式。",
"2. 对于包含复杂布局、大量图像或表格的PDF文件,转换过程可能需要较长时间,请耐心等待。",
"3. 转换后的Word文档可能需要手动微调以获得最佳效果,特别是对于包含复杂表格或特殊字符的文档。",
"4. 加密的PDF文件需要先解密才能转换,本转换器支持简单的密码保护PDF转换。",
"5. 建议转换后检查文档,特别是表格、图像和公式部分,确保转换结果符合预期。",
"6. 对于扫描版PDF(图像型PDF),转换效果取决于OCR识别质量,建议使用专业的OCR软件预处理。",
"7. 转换过程中请勿关闭程序,以免造成文件损坏或转换失败。",
"8. 确保系统中有足够的磁盘空间存储转换后的Word文档,特别是大型PDF文件。",
"9. 如果转换失败,请检查PDF文件是否完整、未损坏,并查看错误日志以获取更多信息。",
"10. 转换器支持中文字符和中文文件名,确保系统编码设置为UTF-8以获得最佳兼容性。",
]
note_text = tk.Text(note_frame, height=15, font=("微软雅黑", 10),
bg="white", relief=tk.SUNKEN, bd=1, wrap=tk.WORD)
note_text.pack(fill=tk.BOTH, expand=True)
for i, note in enumerate(notes, 1):
note_text.insert(tk.END, note + "\n\n")
note_text.config(state=tk.DISABLED)
def create_bottom_buttons(self):
self.button_frame = tk.Frame(self.root, bg=self.bg_color)
self.convert_btn = self.create_button(self.button_frame, "开始转换", "success", 15)
self.convert_btn.pack(side=tk.LEFT, padx=(0, 15))
self.clear_btn = self.create_button(self.button_frame, "清空", "warning", 15)
self.clear_btn.pack(side=tk.LEFT, padx=(0, 15))
self.about_btn = self.create_button(self.button_frame, "关于", "info", 15)
self.about_btn.pack(side=tk.LEFT, padx=(0, 15))
self.quit_btn = self.create_button(self.button_frame, "退出", "danger", 15)
self.quit_btn.pack(side=tk.RIGHT)
def create_button(self, parent, text, btn_type="primary", width=12):
color_map = {
"primary": self.secondary_color,
"success": self.success_color,
"warning": self.warning_color,
"danger": self.danger_color,
"info": self.info_color
}
bg_color = color_map.get(btn_type, self.secondary_color)
active_bg = self.darken_color(bg_color, 20)
return tk.Button(
parent,
text=text,
font=self.button_font,
bg=bg_color,
fg="white",
activebackground=active_bg,
activeforeground="white",
relief=tk.FLAT,
cursor="hand2",
width=width,
height=1
)
def darken_color(self, hex_color, percent):
hex_color = hex_color.lstrip('#')
rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
darkened = tuple(max(0, int(c * (100 - percent) / 100)) for c in rgb)
return f'#{darkened[0]:02x}{darkened[1]:02x}{darkened[2]:02x}'
def on_tab_changed(self, event):
selected_tab = self.notebook.index(self.notebook.select())
if selected_tab == 1:
self.button_frame.pack_forget()
else:
self.button_frame.pack(fill=tk.X, pady=(0, 15), padx=20)
def get_option_values(self):
return {
"preserve_layout": self.preserve_layout_var.get(),
"extract_images": self.extract_images_var.get(),
"extract_tables": self.extract_tables_var.get(),
"preserve_fonts": self.preserve_fonts_var.get(),
"preserve_colors": self.preserve_colors_var.get()
}
def set_pdf_path(self, path):
self.pdf_entry.config(state=tk.NORMAL)
self.pdf_entry.delete(0, tk.END)
self.pdf_entry.insert(0, path)
self.pdf_entry.config(state='readonly')
def set_docx_path(self, path):
self.docx_entry.delete(0, tk.END)
self.docx_entry.insert(0, path)
def set_pdf_info(self, info_text):
self.pdf_info_text.config(state=tk.NORMAL)
self.pdf_info_text.delete(1.0, tk.END)
if info_text:
self.pdf_info_text.insert(1.0, info_text)
self.pdf_info_text.config(state=tk.DISABLED)
def set_status(self, status_text):
self.status_label.config(text=status_text)
def set_progress(self, progress_value):
self.progress_bar['value'] = progress_value
def set_convert_button_state(self, enabled):
state = tk.NORMAL if enabled else tk.DISABLED
self.convert_btn.config(state=state)
def show_about_window(self, about_text, parent_window):
about_window = tk.Toplevel(parent_window)
about_window.title("关于 PDF 转 DOCX 转换器")
about_window.geometry("600x500")
about_window.resizable(False, False)
about_window.configure(bg=self.bg_color)
about_window.transient(parent_window)
about_window.grab_set()
about_window.update_idletasks()
parent_x = parent_window.winfo_x()
parent_y = parent_window.winfo_y()
parent_width = parent_window.winfo_width()
parent_height = parent_window.winfo_height()
window_width = about_window.winfo_width()
window_height = about_window.winfo_height()
x = parent_x + (parent_width - window_width) // 2
y = parent_y + (parent_height - window_height) // 2
about_window.geometry(f"+{x}+{y}")
title_label = tk.Label(about_window, text="关于 PDF 转 DOCX 转换器",
font=("微软雅黑", 18, "bold"), bg=self.bg_color, fg=self.primary_color)
title_label.pack(pady=(20, 10))
text_frame = tk.Frame(about_window, bg=self.bg_color)
text_frame.pack(fill=tk.BOTH, expand=True, padx=20, pady=10)
about_text_widget = tk.Text(text_frame, font=("微软雅黑", 10),
bg="white", relief=tk.SUNKEN, bd=1, wrap=tk.WORD)
about_text_widget.pack(fill=tk.BOTH, expand=True)
about_text_widget.insert(1.0, about_text)
about_text_widget.config(state=tk.DISABLED)
ok_button = self.create_button(about_window, "确定", "primary", 12)
ok_button.config(command=about_window.destroy)
ok_button.pack(pady=(10, 20))
class PDFConverterController:
def __init__(self, root):
self.view = PDFConverterView(root)
self.model = PDFConverterModel()
self.bind_events()
self.check_queue()
def bind_events(self):
self.view.pdf_browse_btn.config(command=self.on_browse_pdf)
self.view.docx_browse_btn.config(command=self.on_browse_docx)
self.view.convert_btn.config(command=self.on_convert)
self.view.clear_btn.config(command=self.on_clear)
self.view.about_btn.config(command=self.on_about)
self.view.quit_btn.config(command=self.view.root.quit)
def on_browse_pdf(self):
file_path = self.model.browse_pdf()
if file_path:
self.model.pdf_file_path = file_path
self.view.set_pdf_path(file_path)
if not self.model.docx_file_path:
docx_path = os.path.splitext(file_path)[0] + "_converted.docx"
self.model.docx_file_path = docx_path
self.view.set_docx_path(docx_path)
pdf_info = self.model.get_pdf_info(file_path)
self.view.set_pdf_info(pdf_info)
def on_browse_docx(self):
file_path = self.model.browse_docx()
if file_path:
self.model.docx_file_path = file_path
self.view.set_docx_path(file_path)
def on_convert(self):
options = self.view.get_option_values()
self.model.preserve_layout = options["preserve_layout"]
self.model.extract_images = options["extract_images"]
self.model.extract_tables = options["extract_tables"]
self.model.preserve_fonts = options["preserve_fonts"]
self.model.preserve_colors = options["preserve_colors"]
errors = self.model.validate_input(self.model.pdf_file_path, self.model.docx_file_path)
if errors:
messagebox.showerror("错误", "\n".join(errors))
return
if self.model.check_pdf_encrypted(self.model.pdf_file_path):
if not messagebox.askyesno("PDF加密", "该PDF文件已加密,转换可能无法完全提取内容。是否继续?"):
return
if os.path.exists(self.model.docx_file_path):
if not messagebox.askyesno("文件已存在", f"文件 {os.path.basename(self.model.docx_file_path)} 已存在,是否覆盖?"):
return
self.view.set_convert_button_state(False)
self.view.set_status("正在分析PDF文件...")
self.view.set_progress(0)
conversion_thread = threading.Thread(
target=self.perform_conversion,
daemon=True
)
conversion_thread.start()
def perform_conversion(self):
success = self.model.convert_pdf_to_docx(
self.model.pdf_file_path,
self.model.docx_file_path
)
if not success:
self.view.set_convert_button_state(True)
def on_clear(self):
self.model.pdf_file_path = ""
self.model.docx_file_path = ""
self.model.conversion_status = "准备就绪"
self.model.progress_value = 0
self.view.set_pdf_path("")
self.view.set_docx_path("")
self.view.set_status("准备就绪")
self.view.set_progress(0)
self.view.set_convert_button_state(True)
self.view.set_pdf_info("")
def on_about(self):
about_text = """
PDF 转 DOCX 转换器 v1.1
功能特点:
1. 最大限度保留PDF原始格式、布局和样式
2. 支持图像提取和嵌入,保持原始分辨率
3. 识别并转换表格为Word表格格式
4. 保留字体样式、大小和颜色
5. 转换过程实时进度显示
技术说明:
- 使用pdf2docx库进行高质量转换
- 使用PyMuPDF提取PDF元数据
- 多线程处理防止界面冻结
- 支持中文文件名和路径
系统要求:
- Windows 7/8/10/11 或 macOS 10.12+
- Microsoft Word 2007+ 或兼容软件
- 至少2GB可用内存
- 足够的磁盘空间存储转换文件
开发团队:Farmer Studio
发布日期:2026年1月
本软件仅供学习交流和研究使用,请勿用于商业用途。
"""
self.view.show_about_window(about_text, self.view.root)
def check_queue(self):
try:
while True:
msg_type, msg_value = self.model.queue.get_nowait()
if msg_type == "progress":
self.view.set_progress(msg_value)
elif msg_type == "status":
self.view.set_status(msg_value)
elif msg_type == "success":
self.view.set_status("转换完成")
self.view.set_progress(100)
self.view.set_convert_button_state(True)
messagebox.showinfo("转换成功", msg_value)
elif msg_type == "error":
self.view.set_status("转换失败")
self.view.set_convert_button_state(True)
messagebox.showerror("转换错误", msg_value)
except queue.Empty:
pass
self.view.root.after(100, self.check_queue)
def main():
root = tk.Tk()
PDFConverterController(root)
root.mainloop()
if __name__ == "__main__":
main()