txt小说章节标题转换统一修改工具

zwjtr93 · 发表于 2025-4-2 15:12

小说章节标题转换统一修改工具，功能如下图所示，主要是用来统一下载的小说内章节名方便拆书分析等正则表达式正确识别来使用的，功能都如何下图所示

下载地址：
https://wwlo.lanzouo.com/iPUmC2sgcpuj
密码:hb1j

楼下会分享python的源码

zwjtr93 · 发表于 2025-4-2 18:53

本帖最后由 zwjtr93 于 2025-4-2 18:54 编辑

正则表达式的内容和选择越多在长篇文本内选取到错误内容的可能性也越多，现在默认的正则表达式对单独的数字如“1.”、“第1”、“1章”等比较通用的都能正确识别，如果涉及汉字数字和其他命名方式的可以自行编写正则表达式，实在不会编写就把章节名称发给AI问下这个章节名的正则表达式应该如何编写就好。

zwjtr93 · 发表于 2025-4-2 15:13

以下为源码：

[Python] 纯文本查看 复制代码

001

002

003

004

005

006

007

008

009

010

011

012

013

014

015

016

017

018

019

020

021

022

023

024

025

026

027

028

029

030

031

032

033

034

035

036

037

038

039

040

041

042

043

044

045

046

047

048

049

050

051

052

053

054

055

056

057

058

059

060

061

062

063

064

065

066

067

068

069

070

071

072

073

074

075

076

077

078

079

080

081

082

083

084

085

086

087

088

089

090

091

092

093

094

095

096

097

098

099

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

import tkinter as tk
from tkinter import filedialog, scrolledtext, messagebox, ttk
import re
import os
import pathlib
import datetime
 
class NovelEditorApp:
    def __init__(self, master):
        self.master = master
        master.title("小说文本章节&内容修改器")
        master.geometry("750x650") # 调整窗口大小
 
        # --- 状态变量 ---
        self.original_file_path = None
        self.current_file_path = None # 指向当前要处理的文件（可能是原始文件或上次修改后的文件）
        self.modification_count = 0
 
        # --- GUI 组件 ---
 
        # 顶部框架 (文件选择)
        top_frame = ttk.Frame(master, padding="10")
        top_frame.pack(fill=tk.X)
 
        ttk.Button(top_frame, text="选择小说 TXT 文件", command=self.select_file).pack(side=tk.LEFT, padx=5)
        self.file_label = ttk.Label(top_frame, text="尚未选择文件", width=70, relief="sunken", anchor="w")
        self.file_label.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
 
        # 中间主框架 (分隔为左右两部分)
        main_frame = ttk.Frame(master, padding="10")
        main_frame.pack(fill=tk.BOTH, expand=True)
 
        # 左侧框架 (功能区)
        left_frame = ttk.Frame(main_frame, padding="10", relief="groove", borderwidth=2)
        left_frame.pack(side=tk.LEFT, fill=tk.Y, padx=(0, 10))
 
        # --- 章节标题修改 ---
        chapter_frame = ttk.LabelFrame(left_frame, text="章节标题统一化", padding="10")
        chapter_frame.pack(fill=tk.X, pady=10)
 
        ttk.Label(chapter_frame, text="识别模式 (正则表达式):").pack(anchor="w")
        self.chapter_find_pattern = tk.StringVar(value=r"^\s*(?:第?\s*(\d+)\s*章?\.?)\s*(.*)") # 默认模式
        ttk.Entry(chapter_frame, textvariable=self.chapter_find_pattern, width=35).pack(fill=tk.X, pady=(0, 5))
        ttk.Label(chapter_frame, text="说明: 使用正则表达式匹配章节标题行。\n必须包含一个捕获组 `(\d+)` 来提取章节数字。\n `(.*)` 可捕获标题数字后的内容(可选)。", justify=tk.LEFT, foreground="gray").pack(anchor="w")
 
        ttk.Label(chapter_frame, text="替换格式:").pack(anchor="w", pady=(10, 0))
        self.chapter_replace_format = tk.StringVar(value="第{num}章 {title}") # 默认格式
        ttk.Entry(chapter_frame, textvariable=self.chapter_replace_format, width=35).pack(fill=tk.X, pady=(0, 5))
        ttk.Label(chapter_frame, text="说明: 使用 `{num}` 代表捕获的章节数字。\n使用 `{title}` 代表捕获的章节数字后的内容。\n例如: `第{num}章` 或 `第{num}章 {title}`", justify=tk.LEFT, foreground="gray").pack(anchor="w")
 
        ttk.Button(chapter_frame, text="执行章节标题修改", command=self.modify_chapters).pack(pady=10)
 
        # --- 内容删除 ---
        remove_frame = ttk.LabelFrame(left_frame, text="批量内容删除", padding="10")
        remove_frame.pack(fill=tk.X, pady=10)
 
        ttk.Label(remove_frame, text="要删除的内容 (可以是文字、数字、符号或正则表达式):").pack(anchor="w")
        self.remove_pattern = tk.StringVar(value="") # 默认无
        ttk.Entry(remove_frame, textvariable=self.remove_pattern, width=35).pack(fill=tk.X, pady=(0, 5))
        ttk.Label(remove_frame, text="说明: 输入你想删除的精确文本或一个正则表达式。\n例如，删除所有数字 54646: 输入 `54646`\n删除所有连续的数字: 输入 `\d+`\n删除所有空白行: 输入 `^\s*$`", justify=tk.LEFT, foreground="gray").pack(anchor="w")
 
        ttk.Button(remove_frame, text="执行内容删除", command=self.remove_content).pack(pady=10)
 
        # 右侧框架 (说明和日志)
        right_frame = ttk.Frame(main_frame, padding="10")
        right_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
 
        ttk.Label(right_frame, text="操作说明和日志:", font=("Arial", 12, "bold")).pack(anchor="w")
        self.log_text = scrolledtext.ScrolledText(right_frame, wrap=tk.WORD, height=25, width=50)
        self.log_text.pack(fill=tk.BOTH, expand=True, pady=5)
        self.log_text.insert(tk.END, self.get_instructions())
        self.log_text.config(state=tk.DISABLED) # 初始设为只读
 
    def get_instructions(self):
        return """欢迎使用小说文本章节&内容修改器！
 
基本流程:
1.  点击 "选择小说 TXT 文件" 按钮选择你要处理的 txt 文件。
2.  (可选) 在 "章节标题统一化" 区域:
    * 检查或修改 "识别模式"。这是一个正则表达式，用于查找小说中的章节标题行。默认模式 `^\s*(?:第?\s*(\d+)\s*章?\.?)\s*(.*)` 能识别 "1.", "第1章", " 第 1 章 ", "1" 等多种形式，并提取数字 `(\d+)` 和后面的标题 `(.*)`。
    * 检查或修改 "替换格式"。这是新章节标题的格式。`{num}` 会被替换为识别到的章节数字，`{title}` 会被替换为识别到的章节数字后的内容（如果有的话）。
    * 点击 "执行章节标题修改" 按钮。
3.  (可选) 在 "批量内容删除" 区域:
    * 输入你想要批量删除的 "内容"。这可以是具体的文字、数字、符号，也可以是一个正则表达式。
    * 点击 "执行内容删除" 按钮。
4.  每次执行操作后，都会生成一个新的文件，文件名类似 "原始文件名_modified_N.txt" (N为数字)，保存在原始文件所在的目录下。
5.  后续操作会基于上一步修改生成的文件进行。日志区会显示详细的操作信息和新文件名。
6.  原始文件始终会被保留。
 
注意事项:
* 正则表达式是一种强大的文本匹配工具，如果不熟悉，请谨慎修改默认值或查阅相关资料。
* 捕获组 `()` 在正则表达式中用于提取特定部分，如 `(\d+)` 提取数字。
* 处理大文件可能需要一些时间。
* 确保你的 TXT 文件使用常见的编码（如 UTF-8）。
 
------------------------------------
"""
 
    def _log(self, message):
        """向日志区域添加信息"""
        now = datetime.datetime.now().strftime("%H:%M:%S")
        self.log_text.config(state=tk.NORMAL) # 允许编辑
        self.log_text.insert(tk.END, f"[{now}] {message}\n")
        self.log_text.see(tk.END) # 滚动到底部
        self.log_text.config(state=tk.DISABLED) # 禁止编辑
 
    def select_file(self):
        """打开文件选择对话框"""
        filepath = filedialog.askopenfilename(
            title="请选择一个 TXT 文件",
            filetypes=(("Text files", "*.txt"), ("All files", "*.*"))
        )
        if filepath:
            self.original_file_path = pathlib.Path(filepath)
            self.current_file_path = self.original_file_path # 初始当前文件为原始文件
            self.modification_count = 0 # 重置修改计数器
            self.file_label.config(text=str(self.original_file_path))
            self._log(f"已选择文件: {self.original_file_path}")
            self._log("现在可以执行修改操作了。后续操作将基于此文件或其修改版本。")
        else:
            self._log("未选择文件。")
 
    def _generate_output_path(self):
        """生成新的输出文件路径，避免覆盖"""
        if not self.original_file_path:
            return None
        self.modification_count += 1
        base_name = self.original_file_path.stem # 文件名（不含扩展名）
        dir_name = self.original_file_path.parent # 目录
        new_filename = f"{base_name}_modified_{self.modification_count}.txt"
        return dir_name / new_filename
 
    def modify_chapters(self):
        """修改章节标题"""
        if not self.current_file_path or not self.current_file_path.exists():
            messagebox.showerror("错误", "请先选择一个有效的文件！")
            self._log("错误：未选择文件或文件不存在。")
            return
 
        find_pattern_str = self.chapter_find_pattern.get()
        replace_format_str = self.chapter_replace_format.get()
 
        if not find_pattern_str or not replace_format_str:
            messagebox.showerror("错误", "请提供章节识别模式和替换格式！")
            self._log("错误：章节识别模式或替换格式为空。")
            return
 
        try:
            # 编译正则表达式，检查是否有效，并确保至少有一个捕获组
            find_regex = re.compile(find_pattern_str)
            if find_regex.groups < 1:
                 messagebox.showerror("错误", "识别模式的正则表达式必须至少包含一个捕获组 `(\d+)` 来提取章节数字！")
                 self._log("错误：识别模式缺少捕获组 `(\d+)`。")
                 return
        except re.error as e:
            messagebox.showerror("正则表达式错误", f"章节识别模式无效: {e}")
            self._log(f"错误：章节识别模式正则表达式无效 - {e}")
            return
 
        output_path = self._generate_output_path()
        if not output_path:
             return # Should not happen if file was selected
 
        self._log(f"开始处理章节标题...")
        self._log(f"读取文件: {self.current_file_path}")
        self._log(f"识别模式: {find_pattern_str}")
        self._log(f"替换格式: {replace_format_str}")
        self._log(f"写入文件: {output_path}")
 
        lines_processed = 0
        chapters_found = 0
        try:
            with open(self.current_file_path, 'r', encoding='utf-8', errors='ignore') as infile, \
                 open(output_path, 'w', encoding='utf-8') as outfile:
                for line in infile:
                    lines_processed += 1
                    match = find_regex.match(line)
                    if match:
                        chapters_found += 1
                        try:
                            num_str = match.group(1) # 第一个捕获组是数字
                            num = int(num_str) # 尝试转换为整数
 
                            # 尝试获取标题（第二个捕获组，如果存在）
                            title = ""
                            if find_regex.groups > 1:
                                title = match.group(2).strip() if match.group(2) else ""
 
                            # 格式化新标题
                            # 使用字典传递参数给 format，避免 KeyError
                            format_args = {'num': num, 'title': title}
                            new_title = replace_format_str.format(**format_args)
                            outfile.write(new_title + '\n')
                        except ValueError:
                             self._log(f"警告: 在行 {lines_processed} 找到匹配，但捕获组 '{num_str}' 不是有效数字，该行已跳过修改。")
                             outfile.write(line) # 写入原始行
                        except IndexError:
                            self._log(f"警告: 正则表达式配置可能不正确，无法在行 {lines_processed} 提取所需内容，该行已跳过修改。")
                            outfile.write(line) # 写入原始行
                        except KeyError as ke:
                            messagebox.showerror("格式化错误", f"替换格式 '{replace_format_str}' 中的占位符 {ke} 无法被匹配！\n请确保识别模式能捕获对应内容，或调整替换格式。")
                            self._log(f"错误：替换格式中的占位符 {ke} 无法匹配。")
                            # 清理已创建的输出文件
                            outfile.close()
                            os.remove(output_path)
                            self.modification_count -= 1 # 回滚计数
                            return
                    else:
                        outfile.write(line) # 非章节标题行，直接写入
 
            self._log(f"处理完成！共处理 {lines_processed} 行，找到并修改了 {chapters_found} 个章节标题。")
            self._log(f"结果已保存到: {output_path}")
            self.current_file_path = output_path # 更新当前文件路径为新生成的文件
 
        except FileNotFoundError:
            messagebox.showerror("错误", f"文件未找到: {self.current_file_path}")
            self._log(f"错误：文件 {self.current_file_path} 未找到。")
            self.modification_count -= 1 # 回滚计数
        except Exception as e:
            messagebox.showerror("处理失败", f"处理文件时发生错误: {e}")
            self._log(f"错误：处理文件时发生异常 - {e}")
            # 尝试清理可能不完整的输出文件
            if output_path and output_path.exists():
                 try:
                     os.remove(output_path)
                     self._log(f"已删除不完整的输出文件: {output_path}")
                 except OSError:
                     self._log(f"警告：无法删除不完整的输出文件: {output_path}")
            self.modification_count -= 1 # 回滚计数
 
    def remove_content(self):
        """删除指定内容"""
        if not self.current_file_path or not self.current_file_path.exists():
            messagebox.showerror("错误", "请先选择一个有效的文件！")
            self._log("错误：未选择文件或文件不存在。")
            return
 
        pattern_to_remove = self.remove_pattern.get()
 
        if not pattern_to_remove:
            messagebox.showwarning("提示", "请输入要删除的内容或模式。")
            self._log("提示：删除内容为空，操作未执行。")
            return
 
        output_path = self._generate_output_path()
        if not output_path:
             return
 
        self._log(f"开始删除内容...")
        self._log(f"读取文件: {self.current_file_path}")
        self._log(f"删除模式/文本: {pattern_to_remove}")
        self._log(f"写入文件: {output_path}")
 
        lines_processed = 0
        removals_made = 0 # 记录实际发生替换的次数（不是行数）
        try:
            # 尝试将输入视为正则表达式
            try:
                remove_regex = re.compile(pattern_to_remove)
                is_regex = True
                self._log("删除模式被解释为正则表达式。")
            except re.error:
                # 如果不是有效正则，则视为普通文本进行精确匹配替换
                is_regex = False
                self._log("删除模式被解释为普通文本。")
 
            with open(self.current_file_path, 'r', encoding='utf-8', errors='ignore') as infile, \
                 open(output_path, 'w', encoding='utf-8') as outfile:
                for line in infile:
                    lines_processed += 1
                    original_line = line
                    if is_regex:
                        # 使用正则表达式替换
                        modified_line, count = remove_regex.subn("", line)
                        removals_made += count
                    else:
                        # 使用普通文本替换
                        modified_line = line.replace(pattern_to_remove, "")
                        if modified_line != original_line:
                            # 计算替换次数比较麻烦，简单标记发生过修改
                             removals_made += original_line.count(pattern_to_remove) # 近似计数
 
                    outfile.write(modified_line)
 
            self._log(f"处理完成！共处理 {lines_processed} 行。")
            if removals_made > 0:
                 self._log(f"大约进行了 {removals_made} 次内容删除。")
            else:
                 self._log("未找到或删除任何匹配的内容。")
            self._log(f"结果已保存到: {output_path}")
            self.current_file_path = output_path # 更新当前文件路径
 
        except FileNotFoundError:
            messagebox.showerror("错误", f"文件未找到: {self.current_file_path}")
            self._log(f"错误：文件 {self.current_file_path} 未找到。")
            self.modification_count -= 1 # 回滚计数
        except Exception as e:
            messagebox.showerror("处理失败", f"处理文件时发生错误: {e}")
            self._log(f"错误：处理文件时发生异常 - {e}")
            # 尝试清理可能不完整的输出文件
            if output_path and output_path.exists():
                 try:
                     os.remove(output_path)
                     self._log(f"已删除不完整的输出文件: {output_path}")
                 except OSError:
                     self._log(f"警告：无法删除不完整的输出文件: {output_path}")
            self.modification_count -= 1 # 回滚计数
 
 
if __name__ == "__main__":
    root = tk.Tk()
    app = NovelEditorApp(root)
    root.mainloop()

szbulldog · 发表于 2025-4-2 16:01

提示: 作者被禁止或删除内容自动屏蔽

sky8894 · 发表于 2025-4-2 16:24

很好的小工具

52PJ070 · 发表于 2025-4-2 16:34

楼主的小工具挺多啊，感谢楼主分享！论坛有你更精彩。

KarlCoke · 发表于 2025-4-2 16:54

这个豪，很实用

dukhchina · 发表于 2025-4-2 17:37

很实用，对于一些章节混乱的小说帮大忙了~

Ip852340 · 发表于 2025-4-2 18:13

很实用，感谢楼主分享

coconut2032 · 发表于 2025-4-2 18:15

还要自己写正则表达式就麻烦了点。最好做个表达式列表让用户自己挑。

wantall · 发表于 2025-4-2 18:15

链接打不开，可能因为用的是移动的网络？

帐号		自动登录	找回密码
密码			注册[Register]

szbulldog szbulldog 当前离线好友阅读权限 0 听众最后登录 1970-1-1 头像被屏蔽	3^# szbulldog 发表于 2025-4-2 16:01 《站点帮助文档》有什么问题来这里看看吧，这里有你想知道的内容！提示: 作者被禁止或删除内容自动屏蔽
	呼吁大家发布原创作品添加吾爱破解论坛标识！
	回复支持举报

[Windows] txt小说章节标题转换统一修改工具

免费评分