吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 1012|回复: 14
收起左侧

[原创工具] 批量文件分割器—— 支持JSON/TXT/CSV,界面清爽

[复制链接]
Zero清风 发表于 2025-4-29 14:19
本帖最后由 Zero清风 于 2025-4-30 09:14 编辑

部分ai训练不支持太大的文件上传,搞了个分割软件,分割大文件
功能特点:
  • 支持批量分割 .json / .txt / .csv 文件
  • 支持按记录条数文件大小(MB) 双条件切分
  • 支持批量多个文件同时处理
  • 分割进度实时可视化,操作流畅不卡顿
  • 支持中断,分割过程中可随时退出
下载链接:https://azg.lanzouq.com/iLXeL2uu7ogf
下载链接.txt (36 Bytes, 下载次数: 6)

{7C836E9D-7BF5-4622-A81D-8F36AE9F4331}.png {69A1B5DD-B522-4E84-B9EB-A7212CB54F6A}.png {5BAE1F3B-20D0-4724-BED3-CFF6A8F5104C}.png
[Asm] 纯文本查看 复制代码
001
002
003
004
005
006
007
008
009
010
011
012
013
014
015
016
017
018
019
020
021
022
023
024
025
026
027
028
029
030
031
032
033
034
035
036
037
038
039
040
041
042
043
044
045
046
047
048
049
050
051
052
053
054
055
056
057
058
059
060
061
062
063
064
065
066
067
068
069
070
071
072
073
074
075
076
077
078
079
080
081
082
083
084
085
086
087
088
089
090
091
092
093
094
095
096
097
098
099
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import os
import sys
import json
import threading
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter.ttk import Progressbar
 
def resource_path(relative_path):
    """适配打包后的资源路径"""
    if hasattr(sys, '_MEIPASS'):
        return os.path.join(sys._MEIPASS, relative_path)
    return os.path.join(os.path.abspath("."), relative_path)
 
class FileSplitterApp:
    def __init__(self, root):
        self.root = root
        self.root.title("分割器极速版 v1.0")  # 自定义窗口标题
        self.root.geometry("600x400")
 
        # 关键:设置窗口左上角小图标
        self.root.iconbitmap(resource_path("mylogo.ico"))
 
        self.input_files = []
        self.records_per_file = tk.StringVar(value="5000")
        self.max_size_mb = tk.StringVar(value="10")
        self.stop_flag = False  # 中断控制标志
 
        # UI布局
        tk.Label(root, text="请选择文件(支持批量 JSON/TXT/CSV):").pack(pady=5)
        tk.Button(root, text="选择文件", command=self.select_files).pack(pady=5)
 
        tk.Label(root, text="每份最多记录数(条目/行)(可留空):").pack(pady=5)
        tk.Entry(root, textvariable=self.records_per_file).pack(pady=5)
 
        tk.Label(root, text="每份最大大小(MB)(可留空):").pack(pady=5)
        tk.Entry(root, textvariable=self.max_size_mb).pack(pady=5)
 
        tk.Button(root, text="开始分割", command=self.start_split).pack(pady=10)
        tk.Button(root, text="退出程序", command=self.exit_program).pack(pady=5)
 
        tk.Label(root, text="当前进度:").pack(pady=5)
        self.progress = Progressbar(root, length=500, mode="determinate")
        self.progress.pack(pady=5)
 
    def select_files(self):
        self.input_files = filedialog.askopenfilenames(
            title="选择文件",
            filetypes=[("支持文件", "*.json *.txt *.csv")]
        )
 
    def start_split(self):
        if not self.input_files:
            messagebox.showerror("错误", "请先选择至少一个文件!")
            return
 
        try:
            records_input = self.records_per_file.get().strip()
            size_input = self.max_size_mb.get().strip()
 
            records = int(records_input) if records_input else None
            size_mb = float(size_input) if size_input else None
 
            if records is None and size_mb is None:
                messagebox.showerror("错误", "请至少填写记录数或大小其中一个条件!")
                return
        except ValueError:
            messagebox.showerror("错误", "记录数应为整数,大小应为数字!")
            return
 
        self.stop_flag = False
        threading.Thread(target=self.batch_split, args=(records, size_mb)).start()
 
    def batch_split(self, records_per_file, max_size_mb):
        try:
            for file_path in self.input_files:
                if self.stop_flag:
                    print("中断处理(批量阶段)")
                    return
 
                file_base_name, file_ext = os.path.splitext(os.path.basename(file_path))
                output_folder = os.path.join(os.path.dirname(file_path), file_base_name)
                os.makedirs(output_folder, exist_ok=True)
 
                if file_ext.lower() == ".json":
                    self.split_json(file_path, file_base_name, output_folder, records_per_file, max_size_mb)
                elif file_ext.lower() in [".txt", ".csv"]:
                    self.split_text(file_path, file_base_name, output_folder, records_per_file, max_size_mb)
                else:
                    messagebox.showwarning("警告", f"暂不支持的文件类型: {file_ext}")
        except Exception as e:
            if not self.stop_flag:
                messagebox.showerror("错误", f"处理失败: {e}")
 
    def split_json(self, file_path, file_base_name, output_folder, records_per_file, max_size_mb):
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
 
        total_records = len(data)
        self.progress["maximum"] = total_records
 
        chunk = []
        size_counter = 0
        count = 0
 
        for idx, item in enumerate(data):
            if self.stop_flag:
                print("中断处理(JSON)")
                return
 
            chunk.append(item)
            size_counter += len(json.dumps(item, ensure_ascii=False).encode('utf-8')) + 2
 
            if (records_per_file and len(chunk) >= records_per_file) or \
               (max_size_mb and size_counter >= max_size_mb * 1024 * 1024):
                out_file = os.path.join(output_folder, f"{file_base_name}_part_{count:03}.json")
                with open(out_file, "w", encoding="utf-8") as f_out:
                    json.dump(chunk, f_out, ensure_ascii=False, indent=2)
                chunk = []
                size_counter = 0
                count += 1
 
            if idx % 100 == 0:
                self.progress["value"] = idx
                self.root.update_idletasks()
 
        if chunk:
            out_file = os.path.join(output_folder, f"{file_base_name}_part_{count:03}.json")
            with open(out_file, "w", encoding="utf-8") as f_out:
                json.dump(chunk, f_out, ensure_ascii=False, indent=2)
 
    def split_text(self, file_path, file_base_name, output_folder, records_per_file, max_size_mb):
        with open(file_path, "r", encoding="utf-8") as f:
            lines = f.readlines()
 
        total_records = len(lines)
        self.progress["maximum"] = total_records
 
        chunk = []
        size_counter = 0
        count = 0
 
        for idx, line in enumerate(lines):
            if self.stop_flag:
                print("中断处理(TXT)")
                return
 
            chunk.append(line)
            size_counter += len(line.encode('utf-8'))
 
            if (records_per_file and len(chunk) >= records_per_file) or \
               (max_size_mb and size_counter >= max_size_mb * 1024 * 1024):
                out_file = os.path.join(output_folder, f"{file_base_name}_part_{count:03}.txt")
                with open(out_file, "w", encoding="utf-8") as f_out:
                    f_out.writelines(chunk)
                chunk = []
                size_counter = 0
                count += 1
 
            if idx % 100 == 0:
                self.progress["value"] = idx
                self.root.update_idletasks()
 
        if chunk:
            out_file = os.path.join(output_folder, f"{file_base_name}_part_{count:03}.txt")
            with open(out_file, "w", encoding="utf-8") as f_out:
                f_out.writelines(chunk)
 
    def exit_program(self):
        if messagebox.askokcancel("退出确认", "确定要退出程序吗?正在处理中也会立即终止!"):
            self.stop_flag = True
            self.root.destroy()
 
if __name__ == "__main__":
    root = tk.Tk()
    app = FileSplitterApp(root)
    root.mainloop()

免费评分

参与人数 4吾爱币 +10 热心值 +4 收起 理由
caihuachaorou87 + 1 + 1 谢谢@Thanks!
daian + 1 + 1 谢谢@Thanks!
schtg + 1 + 1 谢谢@Thanks!
风之暇想 + 7 + 1 感谢发布原创作品,吾爱破解论坛因你更精彩!

查看全部评分

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

wbsdashen 发表于 2025-5-1 18:21
太牛了大佬。晚上来试试
loveyunye 发表于 2025-5-2 07:44
gztf 发表于 2025-5-2 08:40
250737355 发表于 2025-5-2 19:43
我最近在搞,直播话术训练,您在搞什么的
yujianlixi 发表于 2025-5-3 13:34
又是仰慕学习的一日
wau 发表于 2025-5-3 21:55
学习学习,不错!
schtg 发表于 2025-5-4 06:30
这个新颖,谢谢!
CHIANTII 发表于 2025-5-5 21:58
看着有点意思
fuum2pimbeb 发表于 2025-5-7 17:40

这个新颖,谢谢!
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2025-5-21 03:04

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表