HTML 合并工具
起因是鄙人希望对多个html实现合并,主要是方便单文件的阅读和查看
查阅到有个开源工具sigil(是关于epub合并的),但是使用起来太过于高阶,有一定门槛
于是就有了这么一个稍微友好一些的带图形界面的小工具,用于将多个独立的 HTML 文件合并为 带悬浮侧边栏的单文件 HTML和 EPUB ,自动生成章节目录(书签)。
望大佬们不吝赐教
界面图
功能特点
- ✅ 图形界面,拖拽式操作(按钮添加文件/文件夹)
- ✅ 支持手动排序、删除、重命名章节标题
- ✅ 同时输出 EPUB(标准电子书,含 NCX 导航)和 合并 HTML(左侧悬浮可折叠目录)
- ✅ 平滑滚动导航,桌面/移动端自动适配
- ✅ 纯 Python 实现,轻量无依赖(需安装
beautifulsoup4, ebooklib, lxml)
安装依赖
pip install beautifulsoup4 ebooklib lxml
菜鸟留言
鄙人的小工具,目前并未进行严格的测试,只是我个人自用发现是可以实现指定目标的。
未来,可能会增加对于「非单个html文件」的处理(即,目前这部分并没有进行开发)
如果有大佬不吝赐教,在鄙人的这版简陋版上进行魔改和优化,也非常欢迎哈~
代码
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import threading
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from bs4 import BeautifulSoup
from ebooklib import epub
def clean_html_body(soup):
body = soup.find('body')
if body:
for tag in body(['script', 'style']):
tag.decompose()
return body.decode_contents()
# fallback
for tag in soup(['script', 'style']):
tag.decompose()
return soup.decode_contents()
def generate_epub(file_items, output_path, book_title, lang='zh'):
book = epub.EpubBook()
identifier = re.sub(r'[^a-zA-Z0-9]', '_', book_title) + '_merged'
book.set_identifier(identifier)
book.set_title(book_title)
book.set_language(lang)
chapters = []
spine = ['nav']
toc = []
for idx, (filepath, title) in enumerate(file_items, 1):
with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
raw = f.read()
soup = BeautifulSoup(raw, 'lxml')
body_content = clean_html_body(soup)
ch = epub.EpubHtml(title=title, file_name=f'chap_{idx}.xhtml', lang=lang)
ch.content = f'<h1>{title}</h1>\n{body_content}'
book.add_item(ch)
chapters.append(ch)
spine.append(ch)
toc.append(epub.Link(f'chap_{idx}.xhtml', title, f'chap_{idx}'))
book.toc = toc
book.spine = spine
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
epub.write_epub(output_path, book, {})
return True
def generate_merged_html(file_items, output_path, book_title):
toc_items = []
parts = []
for idx, (filepath, title) in enumerate(file_items, 1):
with open(filepath, 'r', encoding='utf-8', errors='replace') as f:
raw = f.read()
soup = BeautifulSoup(raw, 'lxml')
body_content = clean_html_body(soup)
anchor = f'ch{idx}'
toc_items.append(f'<li><a href="#{anchor}" onclick="scrollToAnchor(\'{anchor}\');return false;">{title}</a></li>')
parts.append(f'<div id="{anchor}" class="chapter">\n<h1>{title}</h1>\n{body_content}\n</div>')
sidebar_html = f'''
<div id="sidebar">
<div id="sidebar-toggle" onclick="toggleSidebar()">☰</div>
<div id="sidebar-content">
<h2>目录</h2>
<ul>
{"".join(toc_items)}
</ul>
</div>
</div>
'''
# 完整的 HTML 文档,包含 CSS 和 JS
full_html = f'''<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{book_title}</title>
<style>
body {{
font-family: serif;
line-height: 1.6;
max-width: 800px;
margin: auto;
padding: 20px 20px 20px 280px; /* 为侧边栏留出空间 */
transition: padding-left 0.3s;
}}
h1 {{ color: #333; }}
/* 侧边栏容器 */
#sidebar {{
position: fixed;
top: 0;
left: 0;
width: 260px;
height: 100vh;
background: #f5f5f5;
border-right: 1px solid #ccc;
box-shadow: 2px 0 5px rgba(0,0,0,0.1);
z-index: 1000;
transition: transform 0.3s ease;
transform: translateX(0);
}}
/* 折叠状态 */
#sidebar.collapsed {{
transform: translateX(-260px);
}}
#sidebar.collapsed #sidebar-toggle {{
right: -40px;
border-radius: 0 4px 4px 0;
}}
/* 折叠时正文回缩 */
body.sidebar-collapsed {{
padding-left: 20px;
}}
/* 切换按钮 */
#sidebar-toggle {{
position: absolute;
top: 10px;
right: -40px;
width: 40px;
height: 40px;
background: #f5f5f5;
border: 1px solid #ccc;
border-left: none;
cursor: pointer;
font-size: 16px;
line-height: 40px;
text-align: center;
border-radius: 0 4px 4px 0;
box-shadow: 2px 0 5px rgba(0,0,0,0.1);
user-select: none;
transition: right 0.3s;
}}
#sidebar-toggle:hover {{
background: #e0e0e0;
}}
/* 目录内容 */
#sidebar-content {{
padding: 15px;
height: 100%;
overflow-y: auto;
}}
#sidebar-content h2 {{
margin-top: 0;
font-size: 18px;
border-bottom: 1px solid #ccc;
padding-bottom: 5px;
}}
#sidebar-content ul {{
list-style: none;
padding: 0;
margin: 0;
}}
#sidebar-content li {{
margin: 6px 0;
}}
#sidebar-content a {{
text-decoration: none;
color: #333;
display: block;
padding: 4px 8px;
border-radius: 3px;
transition: background 0.2s;
}}
#sidebar-content a:hover {{
background: #ddd;
color: #000;
}}
/* 移动端适配:默认隐藏侧边栏,通过汉堡按钮唤出 */
@media (max-width: 768px) {{
body {{
padding-left: 20px;
}}
#sidebar {{
transform: translateX(-260px);
}}
#sidebar.collapsed {{
transform: translateX(-260px);
}}
/* 移动端显式覆盖:需要时通过JS添加类来显示 */
#sidebar.mobile-visible {{
transform: translateX(0);
}}
}}
</style>
</head>
<body class="sidebar-visible">
{sidebar_html}
<div id="content">
{''.join(parts)}
</div>
<script>
function toggleSidebar() {{
var sidebar = document.getElementById('sidebar');
var body = document.body;
if (window.innerWidth <= 768) {{
// 移动端行为:切换 mobile-visible 类
sidebar.classList.toggle('mobile-visible');
}} else {{
// 桌面端:切换 collapsed 类
sidebar.classList.toggle('collapsed');
body.classList.toggle('sidebar-collapsed');
}}
}}
function scrollToAnchor(anchor) {{
var el = document.getElementById(anchor);
if (el) {{
el.scrollIntoView({{ behavior: 'smooth' }});
// 移动端点击后自动折叠侧边栏
if (window.innerWidth <= 768) {{
document.getElementById('sidebar').classList.remove('mobile-visible');
}}
}}
}}
// 点击正文区域时,移动端自动折叠侧边栏
document.getElementById('content').addEventListener('click', function() {{
if (window.innerWidth <= 768) {{
document.getElementById('sidebar').classList.remove('mobile-visible');
}}
}});
</script>
</body>
</html>'''
with open(output_path, 'w', encoding='utf-8') as f:
f.write(full_html)
return True
# ---------- 图形界面 ----------
class MergerApp:
def __init__(self, root):
self.root = root
self.root.title("HTML 合并工具")
self.root.geometry("700x550")
self.root.resizable(True, True)
# 存储数据:[(文件路径, 显示标题), ...]
self.file_list = []
self.create_widgets()
def create_widgets(self):
# 顶部按钮区
btn_frame = ttk.Frame(self.root)
btn_frame.pack(fill=tk.X, padx=10, pady=5)
ttk.Button(btn_frame, text="添加文件", command=self.add_files).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="添加文件夹", command=self.add_folder).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="清空列表", command=self.clear_list).pack(side=tk.LEFT, padx=5)
# 排序按钮
order_frame = ttk.Frame(self.root)
order_frame.pack(fill=tk.X, padx=10, pady=5)
ttk.Button(order_frame, text="上移", command=self.move_up).pack(side=tk.LEFT, padx=5)
ttk.Button(order_frame, text="下移", command=self.move_down).pack(side=tk.LEFT, padx=5)
ttk.Button(order_frame, text="删除选中", command=self.delete_selected).pack(side=tk.LEFT, padx=5)
# 文件列表(Treeview 带两列:标题、文件名)
columns = ('title', 'filename')
self.tree = ttk.Treeview(self.root, columns=columns, show='headings', selectmode='browse')
self.tree.heading('title', text='章节标题(双击编辑)')
self.tree.heading('filename', text='原文件名')
self.tree.column('title', width=300)
self.tree.column('filename', width=300)
self.tree.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
self.tree.bind('<Double-1>', self.on_double_click)
# 下方设置区
set_frame = ttk.LabelFrame(self.root, text="输出设置")
set_frame.pack(fill=tk.X, padx=10, pady=5)
ttk.Label(set_frame, text="书名/合集标题:").grid(row=0, column=0, sticky=tk.W, padx=5, pady=2)
self.title_entry = ttk.Entry(set_frame, width=40)
self.title_entry.grid(row=0, column=1, sticky=tk.W, padx=5, pady=2)
self.title_entry.insert(0, "我的合集")
ttk.Label(set_frame, text="输出目录:").grid(row=1, column=0, sticky=tk.W, padx=5, pady=2)
self.outdir_var = tk.StringVar()
ttk.Entry(set_frame, textvariable=self.outdir_var, width=40).grid(row=1, column=1, sticky=tk.W, padx=5, pady=2)
ttk.Button(set_frame, text="浏览...", command=self.choose_output_dir).grid(row=1, column=2, padx=5, pady=2)
# 生成按钮
gen_frame = ttk.Frame(self.root)
gen_frame.pack(fill=tk.X, padx=10, pady=10)
ttk.Button(gen_frame, text="生成 EPUB + HTML", command=self.start_generation).pack(side=tk.LEFT, padx=10)
self.status_var = tk.StringVar(value="就绪")
ttk.Label(gen_frame, textvariable=self.status_var).pack(side=tk.LEFT, padx=20)
# ---------- 文件操作 ----------
def add_files(self):
files = filedialog.askopenfilenames(
title="选择 HTML 文件",
filetypes=[("HTML 文件", "*.html *.htm"), ("所有文件", "*.*")]
)
for f in files:
self.add_item(f)
def add_folder(self):
folder = filedialog.askdirectory(title="选择包含 HTML 的文件夹")
if not folder:
return
for f in os.listdir(folder):
if f.lower().endswith(('.html', '.htm')):
self.add_item(os.path.join(folder, f))
def add_item(self, filepath):
# 避免重复
if any(filepath == item[0] for item in self.file_list):
return
basename = os.path.basename(filepath)
title = os.path.splitext(basename)[0].replace('_', ' ').replace('-', ' ').title()
self.file_list.append((filepath, title))
self.tree.insert('', tk.END, values=(title, basename))
def clear_list(self):
self.file_list.clear()
for item in self.tree.get_children():
self.tree.delete(item)
def move_up(self):
selected = self.tree.selection()
if not selected:
return
idx = self.tree.index(selected[0])
if idx == 0:
return
# 交换列表
self.file_list[idx], self.file_list[idx-1] = self.file_list[idx-1], self.file_list[idx]
# 刷新显示
self.refresh_tree()
# 保持选中项
children = self.tree.get_children()
self.tree.selection_set(children[idx-1])
def move_down(self):
selected = self.tree.selection()
if not selected:
return
idx = self.tree.index(selected[0])
if idx >= len(self.file_list) - 1:
return
self.file_list[idx], self.file_list[idx+1] = self.file_list[idx+1], self.file_list[idx]
self.refresh_tree()
children = self.tree.get_children()
self.tree.selection_set(children[idx+1])
def delete_selected(self):
selected = self.tree.selection()
if not selected:
return
idx = self.tree.index(selected[0])
self.tree.delete(selected[0])
del self.file_list[idx]
def refresh_tree(self):
for item in self.tree.get_children():
self.tree.delete(item)
for fp, title in self.file_list:
basename = os.path.basename(fp)
self.tree.insert('', tk.END, values=(title, basename))
def on_double_click(self, event):
""" 双击编辑标题 """
selected = self.tree.selection()
if not selected:
return
idx = self.tree.index(selected[0])
item = self.tree.item(selected[0])
old_title = item['values'][0]
# 弹出简单输入框
win = tk.Toplevel(self.root)
win.title("编辑章节标题")
ttk.Label(win, text="新标题:").pack(padx=10, pady=5)
entry = ttk.Entry(win, width=40)
entry.pack(padx=10, pady=5)
entry.insert(0, old_title)
entry.select_range(0, tk.END)
def save():
new_title = entry.get().strip()
if new_title:
self.file_list[idx] = (self.file_list[idx][0], new_title)
self.tree.item(selected[0], values=(new_title, self.file_list[idx][0]))
win.destroy()
ttk.Button(win, text="确定", command=save).pack(pady=5)
win.transient(self.root)
win.grab_set()
self.root.wait_window(win)
def choose_output_dir(self):
folder = filedialog.askdirectory(title="选择输出文件夹")
if folder:
self.outdir_var.set(folder)
# ---------- 生成处理 ----------
def start_generation(self):
if not self.file_list:
messagebox.showwarning("无文件", "请先添加至少一个 HTML 文件。")
return
out_dir = self.outdir_var.get().strip()
if not out_dir:
out_dir = os.path.expanduser("~\\Desktop")
self.outdir_var.set(out_dir)
book_title = self.title_entry.get().strip()
if not book_title:
book_title = "合集"
# 禁用按钮,防止重复点击
self.root.config(cursor="watch")
self.status_var.set("正在生成,请稍候...")
def run():
try:
# 准备文件项目(路径, 标题)
items = list(self.file_list) # 复制一份
safe_title = re.sub(r'[\\/:*?"<>|]', '_', book_title)
epub_path = os.path.join(out_dir, safe_title + '.epub')
html_path = os.path.join(out_dir, safe_title + '_merged.html')
# 生成 EPUB
generate_epub(items, epub_path, book_title)
# 生成合并 HTML
generate_merged_html(items, html_path, book_title)
self.root.after(0, lambda: self.on_generation_done(epub_path, html_path))
except Exception as e:
self.root.after(0, lambda: self.on_generation_error(str(e)))
threading.Thread(target=run, daemon=True).start()
def on_generation_done(self, epub_path, html_path):
self.root.config(cursor="")
self.status_var.set("生成完成!")
messagebox.showinfo("成功", f"文件已生成:\n{epub_path}\n{html_path}")
def on_generation_error(self, error_msg):
self.root.config(cursor="")
self.status_var.set("生成失败")
messagebox.showerror("错误", f"生成过程中发生错误:\n{error_msg}")
# ---------- 启动 ----------
if __name__ == "__main__":
root = tk.Tk()
app = MergerApp(root)
root.mainloop()