[Python] 纯文本查看 复制代码
"""
PDF Outline Editor
新增功能:行加粗、状态栏、折叠额外条目、保存按钮、快捷键等。
"""
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from PIL import Image, ImageTk
import fitz
import re
import pathlib
import os
try:
from tkinterdnd2 import DND_FILES, TkinterDnD
DND_AVAILABLE = True
except ImportError:
DND_AVAILABLE = False
try:
from ctypes import windll
windll.shcore.SetProcessDpiAwareness(2)
except Exception:
pass
# ---------------- 设置 ----------------
DEFAULT_COLLAPSE = 2 # 默认折叠 的 beyond 层级 (2:第一层,第二层,默认展开),0 / None 为全展开
SET_PAEG_MODE = "UseOutlines" # 打开时显示大纲面板, 其他可选:UseNone, UseThumbs, FullScreen, UseAttachments, UseOC
# ---------------- 常量 ----------------
ICON_ADD = "img/add.png"
ICON_DEL = "img/delete.png"
ICON_INDENT = "img/indent.png"
ICON_UNINDENT = "img/unindent.png"
LLM_PROMPT = """帮我提取下目录信息,output in the following format
## The Format example 解析示例:
```
# Introduction 1
## Methods 10
### Methods_one 12
## Experiments 20
### Experiments_one 22
# End 30
```"""
TERMINOLOGY = """【术语解释】
目录页码:从OCR或原始目录中直接读取的页码,可能与PDF实际页数不符。
实际页码:PDF文件中真实的页面编号(从1开始),用于最终写入大纲。
偏移(Offset) = 实际页码 - 目录页码,通常为固定值。
额外条目:不在原目录体系中的自定义条目,可自由指定实际页码。"""
class OutlineEntry:
def __init__(self, level, title, page, is_extra=False):
self.level = level
self.title = title
self.page = page
self.actual_page = page
self.is_extra = is_extra
self.calc_offset = None
class OffsetManager:
def __init__(self):
self.mappings = []
self.extra_entries = [] # 每项: (title, actual_page, position) position="head"或"tail"
def add_mapping(self, cat_page, actual_page):
self.mappings.append((cat_page, actual_page))
def remove_mapping(self, index):
if 0 <= index < len(self.mappings):
del self.mappings[index]
def add_extra(self, title, actual_page, position="head"):
self.extra_entries.append((title, actual_page, position))
def remove_extra(self, index):
if 0 <= index < len(self.extra_entries):
del self.extra_entries[index]
def get_offset(self):
if not self.mappings:
return 0
offsets = [actual - cat for cat, actual in self.mappings]
if len(set(offsets)) == 1:
return offsets[0]
else:
messagebox.showwarning("偏移不一致", "多个对照对的页码差值不一致,请检查后重新输入。")
return None
def apply_offset(self, entries):
offset = self.get_offset()
if offset is None:
return False
for entry in entries:
if not entry.is_extra:
entry.actual_page = entry.page + offset
return True
class PDFOutlineEditor:
def __init__(self, root):
self.root = root
self.current_file = None
self.max_page_number = 0
self.entries = []
self.edit_mode = "indent"
self.offset_mgr = OffsetManager()
self.use_icons = all(os.path.exists(ic) for ic in [ICON_ADD, ICON_DEL, ICON_INDENT, ICON_UNINDENT])
if self.use_icons:
self.icons = {
'add': ImageTk.PhotoImage(Image.open(ICON_ADD).resize((20,20))),
'del': ImageTk.PhotoImage(Image.open(ICON_DEL).resize((20,20))),
'indent': ImageTk.PhotoImage(Image.open(ICON_INDENT).resize((20,20))),
'unindent': ImageTk.PhotoImage(Image.open(ICON_UNINDENT).resize((20,20)))
}
self.create_widgets()
self.setup_dnd()
self.bind_shortcuts()
def create_widgets(self):
main_pane = tk.PanedWindow(self.root, orient=tk.HORIZONTAL)
main_pane.pack(fill=tk.BOTH, expand=1)
left_frame = tk.Frame(main_pane)
self.build_file_panel(left_frame)
self.build_offset_panel(left_frame)
main_pane.add(left_frame)
right_frame = tk.Frame(main_pane)
self.build_editor_panel(right_frame)
main_pane.add(right_frame)
# 底部状态栏
self.status_bar = tk.Label(self.root, text="总页数:未加载", bd=1, relief=tk.SUNKEN, anchor=tk.W)
self.status_bar.pack(side=tk.BOTTOM, fill=tk.X)
self.build_menu()
def build_menu(self):
menubar = tk.Menu(self.root)
self.root.config(menu=menubar)
file_menu = tk.Menu(menubar, tearoff=0)
file_menu.add_command(label="选择PDF", command=self.load_pdf)
file_menu.add_command(label="保存到PDF", command=self.save_pdf, accelerator="Ctrl+S")
file_menu.add_separator()
file_menu.add_command(label="退出", command=self.root.quit)
menubar.add_cascade(label="文件", menu=file_menu)
import_menu = tk.Menu(menubar, tearoff=0)
import_menu.add_command(label="导入现成OCR版本...", command=self.open_ocr_subwin)
menubar.add_cascade(label="导入", menu=import_menu)
help_menu = tk.Menu(menubar, tearoff=0)
help_menu.add_command(label="显示LLM OCR提示词", command=self.show_prompt)
help_menu.add_separator()
help_menu.add_command(label="软件术语解释", command=self.show_terminology)
menubar.add_cascade(label="帮助", menu=help_menu)
def build_file_panel(self, parent):
frame = tk.LabelFrame(parent, text="文件操作")
tk.Button(frame, text="选择PDF", command=self.load_pdf).pack(pady=5, padx=5)
self.file_label = tk.Label(frame, text="未选择文件", fg="gray")
self.file_label.pack()
if DND_AVAILABLE:
self.drop_target = tk.Label(frame, text="拖放PDF文件至此", relief="sunken")
self.drop_target.pack(pady=10, ipadx=20, ipady=10)
frame.pack(pady=10, padx=5, fill=tk.X)
def build_offset_panel(self, parent):
frame = tk.LabelFrame(parent, text="页码补偿 & 预览", padx=12, pady=12)
# 对照表区域
map_frame = tk.Frame(frame)
tk.Label(map_frame, text="目录页码 -> 实际页码").grid(row=0, column=0, columnspan=2)
self.map_listbox = tk.Listbox(map_frame, height=4, width=30)
self.map_listbox.grid(row=1, column=0, rowspan=3, sticky='nsew')
scrollbar = tk.Scrollbar(map_frame, orient=tk.VERTICAL, command=self.map_listbox.yview)
scrollbar.grid(row=1, column=1, rowspan=3, sticky='ns')
self.map_listbox.config(yscrollcommand=scrollbar.set)
tk.Label(map_frame, text="目录页码:").grid(row=1, column=2, sticky='w')
self.cat_page_entry = tk.Entry(map_frame, width=10)
self.cat_page_entry.grid(row=1, column=3)
tk.Label(map_frame, text="实际页码:").grid(row=2, column=2, sticky='w')
self.actual_page_entry = tk.Entry(map_frame, width=10)
self.actual_page_entry.grid(row=2, column=3)
tk.Button(map_frame, text="添加对照", command=self.add_mapping).grid(row=3, column=2, columnspan=2)
tk.Button(map_frame, text="删除选中", command=self.del_mapping).grid(row=4, column=2, columnspan=2)
map_frame.pack(pady=5)
# 额外条目区域(可折叠)
self.extra_visible = False
self.extra_frame_container = tk.Frame(frame)
self.toggle_btn = tk.Button(frame, text="展开额外条目 ▸", command=self.toggle_extra_panel)
self.toggle_btn.pack(pady=5, anchor='w')
extra_frame = tk.Frame(self.extra_frame_container)
tk.Label(extra_frame, text="额外条目(不在目录体系中)").grid(row=0, column=0, columnspan=2)
self.extra_listbox = tk.Listbox(extra_frame, height=3, width=30)
self.extra_listbox.grid(row=1, column=0, rowspan=4, sticky='nsew')
scrollbar2 = tk.Scrollbar(extra_frame, orient=tk.VERTICAL, command=self.extra_listbox.yview)
scrollbar2.grid(row=1, column=1, rowspan=4, sticky='ns')
self.extra_listbox.config(yscrollcommand=scrollbar2.set)
tk.Label(extra_frame, text="标题:").grid(row=1, column=2, sticky='w')
self.extra_title_entry = tk.Entry(extra_frame, width=10)
self.extra_title_entry.grid(row=1, column=3)
tk.Label(extra_frame, text="实际页码:").grid(row=2, column=2, sticky='w')
self.extra_page_entry = tk.Entry(extra_frame, width=10)
self.extra_page_entry.grid(row=2, column=3)
tk.Label(extra_frame, text="位置:").grid(row=3, column=2, sticky='w')
self.extra_pos_var = tk.StringVar(value="head")
tk.OptionMenu(extra_frame, self.extra_pos_var, "head", "tail").grid(row=3, column=3)
tk.Button(extra_frame, text="添加条目", command=self.add_extra_entry).grid(row=4, column=2, columnspan=2)
tk.Button(extra_frame, text="删除选中", command=self.del_extra_entry).grid(row=5, column=2, columnspan=2)
extra_frame.pack(pady=5)
frame.pack(pady=10, padx=5, fill=tk.X)
# 预览按钮
tk.Button(parent, text="预览前三条目截图", command=self.preview_pages).pack(pady=5)
def toggle_extra_panel(self):
if self.extra_visible:
self.extra_frame_container.pack_forget()
self.toggle_btn.config(text="展开额外条目 ▸")
else:
self.extra_frame_container.pack(pady=5)
self.toggle_btn.config(text="折叠额外条目 ▾")
self.extra_visible = not self.extra_visible
def build_editor_panel(self, parent):
frame = tk.Frame(parent)
toolbar = tk.Frame(frame)
self.create_tool_button(toolbar, 'add', "添加条目", self.add_entry)
self.create_tool_button(toolbar, 'del', "删除条目", self.delete_entry)
self.create_tool_button(toolbar, 'indent', "增加缩进", self.indent_entry)
self.create_tool_button(toolbar, 'unindent', "减少缩进", self.unindent_entry)
self.mode_btn = tk.Button(toolbar, text="Markdown版", command=self.toggle_mode)
self.mode_btn.pack(side=tk.RIGHT, padx=5)
toolbar.pack(fill=tk.X, pady=2)
# 带滚动条的文本编辑器
text_frame = tk.Frame(frame)
text_frame.pack(fill=tk.BOTH, expand=1, padx=5, pady=5)
self.text_editor = tk.Text(text_frame, wrap=tk.NONE, undo=True, width=50, height=35)
scroll_y = tk.Scrollbar(text_frame, orient=tk.VERTICAL, command=self.text_editor.yview)
self.text_editor.configure(yscrollcommand=scroll_y.set)
self.text_editor.grid(row=0, column=0, sticky='nsew')
scroll_y.grid(row=0, column=1, sticky='ns')
text_frame.grid_rowconfigure(0, weight=1)
text_frame.grid_columnconfigure(0, weight=1)
# 加粗tag
self.text_editor.tag_configure("bold", font=("TkDefaultFont", 10, "bold"))
self.text_editor.bind("<Button-1>", self.on_click_select_line)
self.text_editor.bind("<Double-1>", self.on_double_click_edit)
# 右侧原有保存按钮保留(也可用)
tk.Button(frame, text="保存到PDF", command=self.save_pdf).pack(pady=5)
frame.pack(fill=tk.BOTH, expand=1)
def create_tool_button(self, parent, icon_key, text, command):
if self.use_icons:
btn = tk.Button(parent, image=self.icons[icon_key], command=command)
else:
btn = tk.Button(parent, text=text, command=command)
btn.pack(side=tk.LEFT, padx=2, pady=2)
def setup_dnd(self):
if DND_AVAILABLE:
try:
self.root.drop_target_register(DND_FILES)
self.root.dnd_bind('<<Drop>>', self.handle_drop)
except Exception:
pass
def bind_shortcuts(self):
self.root.bind('<Tab>', lambda e: self.indent_entry())
self.root.bind('<Shift-Tab>', lambda e: self.unindent_entry())
self.root.bind('<Control-s>', lambda e: self.save_pdf())
self.text_editor.bind('<Tab>', self.focus_indent)
self.text_editor.bind('<Shift-Tab>', self.focus_unindent)
def focus_indent(self, event):
self.indent_entry()
return "break"
def focus_unindent(self, event):
self.unindent_entry()
return "break"
def handle_drop(self, event):
filepath = event.data.strip('{}')
if filepath.lower().endswith('.pdf'):
self.load_pdf(filepath)
def load_pdf(self, path=None):
if not path:
path = filedialog.askopenfilename(filetypes=[("PDF文件", "*.pdf")])
if not path:
return
try:
with fitz.open(path) as doc:
self.max_page_number = doc.page_count
toc = doc.get_toc()
self.entries = []
for level, title, page in toc:
self.entries.append(OutlineEntry(level, title, page))
self.current_file = path
self.file_label.config(text=os.path.basename(path), fg='blue')
self.status_bar.config(text=f"总页数:{self.max_page_number}")
# 清理 offset 管理器
self.offset_mgr = OffsetManager()
self.refresh_map_listbox()
self.refresh_extra_listbox()
self.cat_page_entry.delete(0, tk.END)
self.actual_page_entry.delete(0, tk.END)
self.extra_title_entry.delete(0, tk.END)
self.extra_page_entry.delete(0, tk.END)
self.refresh_display()
except Exception as e:
messagebox.showerror("错误", f"无法读取PDF:\n{str(e)}")
def save_pdf(self):
# 确保最新编辑内容
self.parse_display()
if not self.current_file:
messagebox.showwarning("提示", "请先选择PDF文件")
return
if not self.entries:
messagebox.showwarning("提示", "大纲为空")
return
if not self.offset_mgr.apply_offset(self.entries):
return
if not self.validate_hierarchy():
return
toc = []
# 先处理位置为"head"的额外条目
head_extras = [e for e in self.offset_mgr.extra_entries if e[2] == "head"]
tail_extras = [e for e in self.offset_mgr.extra_entries if e[2] == "tail"]
for title, page, _ in head_extras:
toc.append([1, title, page])
for entry in self.entries:
page = entry.actual_page
if page > self.max_page_number:
page = self.max_page_number
elif page < 1:
page = 1
toc.append([entry.level, entry.title, page])
for title, page, _ in tail_extras:
toc.append([1, title, page])
file_path = pathlib.Path(self.current_file)
save_path = filedialog.asksaveasfilename(
defaultextension=".pdf",
initialdir=file_path.parent,
initialfile=file_path.name,
filetypes=[("PDF文件", "*.pdf")]
)
if not save_path:
return
try:
with fitz.open(self.current_file) as doc:
doc.set_toc(toc,collapse=DEFAULT_COLLAPSE) #
# 打开时显示大纲面板
doc.set_pagemode(SET_PAEG_MODE)
doc.save(save_path)
messagebox.showinfo("成功", "文件保存成功")
except Exception as e:
messagebox.showerror("错误", f"保存失败:\n{str(e)}")
def validate_hierarchy(self):
levels = [entry.level for entry in self.entries]
problems = []
prev = 0
for i, lvl in enumerate(levels):
if lvl - prev > 1:
problems.append(f"第{i+1}行:从{prev}级跳至{lvl}级")
prev = lvl
if problems:
messagebox.showwarning("层级跳跃警告",
"发现以下层级跳跃,可能导致PDF大纲显示异常:\n" + "\n".join(problems))
return True
def refresh_display(self):
self.text_editor.delete(1.0, tk.END)
if self.edit_mode == "indent":
text = self.entries_to_indent()
else:
text = self.entries_to_markdown()
self.text_editor.insert(1.0, text)
# 对一级标题加粗
self.apply_bold_tags()
def parse_display(self):
raw = self.text_editor.get(1.0, tk.END)
if self.edit_mode == "indent":
self.entries = self.parse_indent_text(raw)
else:
self.entries = self.parse_markdown_text(raw)
def entries_to_indent(self):
lines = []
for e in self.entries:
indent = " " * (e.level - 1)
lines.append(f"{indent}{e.title} ({e.page})")
return "\n".join(lines)
def entries_to_markdown(self):
lines = []
for e in self.entries:
prefix = "#" * e.level
lines.append(f"{prefix} {e.title} {e.page}")
return "\n".join(lines)
def parse_indent_text(self, text):
entries = []
for line in text.splitlines():
line = line.rstrip()
if not line:
continue
match = re.match(r'^(\s*)\S', line)
if not match:
continue
spaces = len(match.group(1))
level = spaces // 2 + 1
content = line.lstrip()
m = re.match(r'^(.+)\s+\((\d+)\)$', content)
if not m:
m = re.match(r'^(.+)\s+(\d+)$', content)
if m:
title, page = m.group(1).strip(), int(m.group(2))
entries.append(OutlineEntry(level, title, page))
return entries
def parse_markdown_text(self, text):
entries = []
for line in text.splitlines():
line = line.strip()
if not line or not line.startswith('#'):
continue
m = re.match(r'^(#+)\s+(.+)\s+(\d+)$', line)
if m:
level = len(m.group(1))
title = m.group(2).strip()
page = int(m.group(3))
entries.append(OutlineEntry(level, title, page))
return entries
def apply_bold_tags(self):
self.text_editor.tag_remove("bold", "1.0", tk.END)
for line_num in range(1, int(self.text_editor.index('end').split('.')[0])):
line_text = self.text_editor.get(f"{line_num}.0", f"{line_num}.end")
if self.edit_mode == "indent":
# 无缩进或以0个空格开头的(一级)
if not line_text.startswith(" "):
self.text_editor.tag_add("bold", f"{line_num}.0", f"{line_num}.end")
else:
if line_text.startswith("# ") and not line_text.startswith("##"):
self.text_editor.tag_add("bold", f"{line_num}.0", f"{line_num}.end")
def toggle_mode(self):
self.parse_display()
if self.edit_mode == "indent":
self.edit_mode = "markdown"
self.mode_btn.config(text="缩进版")
else:
self.edit_mode = "indent"
self.mode_btn.config(text="Markdown版")
self.refresh_display()
def add_entry(self):
self.text_editor.insert(tk.END, "\n新条目 (1)")
self.parse_display()
def delete_entry(self):
try:
sel = self.text_editor.tag_ranges("sel")
if sel:
self.text_editor.delete(sel[0], sel[1])
self.parse_display()
else:
messagebox.showinfo("提示", "请先点击行选择")
except Exception:
pass
def indent_entry(self):
self.adjust_indent(2)
def unindent_entry(self):
self.adjust_indent(-2)
def adjust_indent(self, delta):
try:
start_line = int(self.text_editor.index(tk.SEL_FIRST).split('.')[0])
end_line = int(self.text_editor.index(tk.SEL_LAST).split('.')[0])
except tk.TclError:
cursor_line = int(self.text_editor.index(tk.INSERT).split('.')[0])
start_line = end_line = cursor_line
for line_num in range(start_line, end_line + 1):
line_text = self.text_editor.get(f"{line_num}.0", f"{line_num}.end")
current_indent = len(line_text) - len(line_text.lstrip(' '))
new_indent = max(0, current_indent + delta)
new_text = ' ' * new_indent + line_text.lstrip(' ')
self.text_editor.replace(f"{line_num}.0", f"{line_num}.end", new_text)
self.text_editor.tag_remove("sel", "1.0", tk.END)
self.text_editor.tag_add("sel", f"{start_line}.0", f"{end_line}.end")
self.parse_display()
def on_click_select_line(self, event):
index = self.text_editor.index(f"@{event.x},{event.y}")
line_start = index.split('.')[0] + ".0"
line_end = index.split('.')[0] + ".end"
self.text_editor.tag_remove("sel", "1.0", tk.END)
self.text_editor.tag_add("sel", line_start, line_end)
def on_double_click_edit(self, event):
index = self.text_editor.index(f"@{event.x},{event.y}")
line_num = int(index.split('.')[0])
line_text = self.text_editor.get(f"{line_num}.0", f"{line_num}.end")
if self.edit_mode == "indent":
title, page = self.parse_indent_line(line_text)
level = (len(line_text) - len(line_text.lstrip(' '))) // 2 + 1
else:
title, page, level = self.parse_markdown_line(line_text)
if title is None:
return
dialog = EditEntryDialog(self.root, title, page, level, self.max_page_number)
self.root.wait_window(dialog.top)
if dialog.result:
new_title, new_page, new_level = dialog.result
if self.edit_mode == "indent":
new_line = " " * (new_level - 1) + f"{new_title} ({new_page})"
else:
new_line = "#" * new_level + f" {new_title} {new_page}"
self.text_editor.replace(f"{line_num}.0", f"{line_num}.end", new_line)
self.parse_display()
def parse_indent_line(self, text):
content = text.lstrip()
m = re.match(r'^(.+)\s+\((\d+)\)$', content)
if m:
return m.group(1).strip(), int(m.group(2))
return None, None
def parse_markdown_line(self, text):
m = re.match(r'^(#+)\s+(.+)\s+(\d+)$', text.strip())
if m:
level = len(m.group(1))
return m.group(2).strip(), int(m.group(3)), level
return None, None, None
def open_ocr_subwin(self):
sub = tk.Toplevel(self.root)
sub.title("导入现成OCR版本")
sub.geometry("500x400")
tk.Label(sub, text="粘贴OCR内容(格式:页码 (p...): 标题)").pack(pady=5)
text = tk.Text(sub, wrap=tk.WORD, width=60, height=15)
text.pack(padx=10, pady=5, fill=tk.BOTH, expand=True)
# 示例文本
EXAMPLE = (
"1 (p1): 示例\n"
"3 (p2): 第1章 标题\n"
"3 (p2-1): 第一节 标题\n"
"5 (p2-2-1): 第二节 第一子节 标题\n"
)
text.insert("1.0", EXAMPLE)
text.tag_configure("placeholder", foreground="grey")
text.tag_add("placeholder", "1.0", "end")
def on_focus_in(event):
if text.get("1.0", "end-1c").strip() == EXAMPLE.strip():
text.delete("1.0", "end")
text.config(foreground="black")
def on_focus_out(event):
if not text.get("1.0", "end-1c").strip():
text.insert("1.0", EXAMPLE)
text.tag_add("placeholder", "1.0", "end")
text.bind("<FocusIn>", on_focus_in)
text.bind("<FocusOut>", on_focus_out)
def do_import():
raw = text.get(1.0, tk.END)
# 如果内容仍是示例,视为空
if raw.strip() == EXAMPLE.strip():
raw = ""
new_entries = self.parse_ocr_text(raw)
if not new_entries:
messagebox.showwarning("错误", "未能解析出有效条目")
return
self.entries = new_entries
self.refresh_display()
sub.destroy()
tk.Button(sub, text="确定导入", command=do_import).pack(pady=10)
def parse_ocr_text(self, raw):
entries = []
lines = raw.splitlines()
non_empty = 0
valid = 0
for line in lines:
line = line.strip()
if not line:
continue
non_empty += 1
# 匹配: 页码 (p层级链): 标题
m = re.match(r'(\d+)\s+\(p([\d\-]+)\):\s*(.*)', line)
if not m:
continue
valid += 1
cat_page = int(m.group(1))
p_chain = m.group(2) # 如 "2-1-1"
title = m.group(3).strip()
level = len(p_chain.split('-')) # 段数即层级
entries.append(OutlineEntry(level, title, cat_page))
if valid < non_empty:
messagebox.showwarning("解析警告",
f"输入了 {non_empty} 个非空行,仅解析出 {valid} 个条目,请检查格式。")
return entries
def add_mapping(self):
try:
cat = int(self.cat_page_entry.get())
actual = int(self.actual_page_entry.get())
except ValueError:
messagebox.showwarning("错误", "请输入有效数字")
return
self.offset_mgr.add_mapping(cat, actual)
self.refresh_map_listbox()
self.cat_page_entry.delete(0, tk.END)
self.actual_page_entry.delete(0, tk.END)
def del_mapping(self):
sel = self.map_listbox.curselection()
if sel:
self.offset_mgr.remove_mapping(sel[0])
self.refresh_map_listbox()
def refresh_map_listbox(self):
self.map_listbox.delete(0, tk.END)
for cat, actual in self.offset_mgr.mappings:
self.map_listbox.insert(tk.END, f"{cat} -> {actual}")
def add_extra_entry(self):
title = self.extra_title_entry.get().strip()
if not title:
messagebox.showwarning("错误", "请输入标题")
return
try:
page = int(self.extra_page_entry.get())
except ValueError:
messagebox.showwarning("错误", "请输入有效页码")
return
position = self.extra_pos_var.get() # "head" or "tail"
self.offset_mgr.add_extra(title, page, position)
self.refresh_extra_listbox()
self.extra_title_entry.delete(0, tk.END)
self.extra_page_entry.delete(0, tk.END)
def del_extra_entry(self):
sel = self.extra_listbox.curselection()
if sel:
self.offset_mgr.remove_extra(sel[0])
self.refresh_extra_listbox()
def refresh_extra_listbox(self):
self.extra_listbox.delete(0, tk.END)
for title, page, pos in self.offset_mgr.extra_entries:
self.extra_listbox.insert(tk.END, f"[{'从头' if pos=='head' else '从尾'}] {title} : {page}")
def preview_pages(self):
# 同步最新的编辑器内容
self.parse_display()
if not self.current_file or not self.entries:
messagebox.showwarning("提示", "请先加载PDF并确保大纲不为空")
return
target_entries = []
for level in range(1, 5):
for e in self.entries:
if e.level == level and len(target_entries) < 3:
target_entries.append(e)
if len(target_entries) >= 3:
break
if not target_entries:
messagebox.showwarning("提示", "没有有效条目")
return
preview = tk.Toplevel(self.root)
preview.title("预览前三条目对应页面")
canvas = tk.Canvas(preview, width=600, height=800)
canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar = tk.Scrollbar(preview, orient=tk.VERTICAL, command=canvas.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
canvas.configure(yscrollcommand=scrollbar.set)
inner = tk.Frame(canvas)
canvas.create_window((0,0), window=inner, anchor='nw')
try:
doc = fitz.open(self.current_file)
offset = self.offset_mgr.get_offset()
if offset is None and self.offset_mgr.mappings:
offset = 0
elif offset is None:
offset = 0
for i, entry in enumerate(target_entries):
actual = entry.page + offset if not entry.is_extra else entry.actual_page
page_idx = max(0, min(actual - 1, doc.page_count - 1))
page = doc.load_page(page_idx)
pix = page.get_pixmap(dpi=100)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
img_tk = ImageTk.PhotoImage(img)
label_frame = tk.Frame(inner)
tk.Label(label_frame, text=f"条目: {entry.title}", font=('Arial', 10, 'bold')).pack()
tk.Label(label_frame, text=f"目录页码: {entry.page} 实际页码: {actual}").pack()
lbl_img = tk.Label(label_frame, image=img_tk)
lbl_img.image = img_tk
lbl_img.pack()
label_frame.pack(pady=10)
doc.close()
except Exception as e:
messagebox.showerror("错误", f"预览失败: {str(e)}")
inner.update_idletasks()
canvas.config(scrollregion=canvas.bbox("all"))
def show_prompt(self):
prompt_win = tk.Toplevel(self.root)
prompt_win.title("LLM OCR 提示词")
text = tk.Text(prompt_win, wrap=tk.WORD, width=70, height=15)
text.insert(1.0, LLM_PROMPT)
text.pack(padx=10, pady=10)
def copy_to_clipboard():
self.root.clipboard_clear()
self.root.clipboard_append(LLM_PROMPT)
# messagebox.showinfo("已复制", "提示词已复制到剪贴板")
tk.Button(prompt_win, text="复制到剪贴板", command=copy_to_clipboard).pack(pady=5)
def show_terminology(self):
term_win = tk.Toplevel(self.root)
term_win.title("软件术语解释")
text = tk.Text(term_win, wrap=tk.WORD, width=60, height=10)
text.insert(1.0, TERMINOLOGY)
text.config(state=tk.DISABLED)
text.pack(padx=10, pady=10)
class EditEntryDialog:
def __init__(self, parent, title, page, level, max_page):
self.top = tk.Toplevel(parent)
self.top.title("编辑条目")
self.result = None
tk.Label(self.top, text="标题:").grid(row=0, column=0, padx=5, pady=5, sticky='w')
self.title_var = tk.StringVar(value=title)
tk.Entry(self.top, textvariable=self.title_var, width=30).grid(row=0, column=1, padx=5, pady=5)
tk.Label(self.top, text="页码:").grid(row=1, column=0, padx=5, pady=5, sticky='w')
self.page_var = tk.IntVar(value=page)
tk.Spinbox(self.top, from_=1, to=max_page or 9999, textvariable=self.page_var, width=8).grid(row=1, column=1, padx=5, pady=5, sticky='w')
tk.Label(self.top, text="层级:").grid(row=2, column=0, padx=5, pady=5, sticky='w')
self.level_var = tk.IntVar(value=level)
tk.Spinbox(self.top, from_=1, to=5, textvariable=self.level_var, width=5).grid(row=2, column=1, padx=5, pady=5, sticky='w')
btn_frame = tk.Frame(self.top)
tk.Button(btn_frame, text="确定", command=self.on_ok).pack(side=tk.LEFT, padx=5)
tk.Button(btn_frame, text="取消", command=self.top.destroy).pack(side=tk.LEFT, padx=5)
btn_frame.grid(row=3, column=0, columnspan=2, pady=10)
def on_ok(self):
self.result = (self.title_var.get().strip(), self.page_var.get(), self.level_var.get())
self.top.destroy()
if __name__ == "__main__":
if DND_AVAILABLE:
root = TkinterDnD.Tk()
else:
root = tk.Tk()
root.title("PDF大纲编辑器 Pro")
root.geometry("900x700")
app = PDFOutlineEditor(root)
root.mainloop()