import re
import csv
from datetime import datetime
import os
import tkinter as tk
from tkinter import filedialog
from openpyxl import Workbook
def read_and_split_file(file_path):
"""
读取文件并同时提取配置部分和非配置部分
返回:(config_lines, data_lines)
"""
try:
config_lines = []
data_lines = []
in_config = False
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line.startswith('/*'):
in_config = True
elif line.endswith('*/'):
in_config = False
elif line and not (line.startswith("'''") or "'''" in line):
if in_config:
config_lines.append(line)
else:
data_lines.append(line)
return config_lines, data_lines
except Exception as e:
print(f"读取文件失败: {e}")
return [], []
def load_configuration_from_lines(config_lines):
"""
从配置行加载用户配置
返回:(category_mapping, special_categories, member_format_map)
"""
# 初始化结果容器
category_mapping = {}
special_categories = []
member_format_map = {}
# 处理每一行配置
for line in config_lines:
parts = line.split('\t', 1)
if len(parts) >= 1:
category = parts[0].strip()
member_part = parts[1].strip() if len(parts) > 1 else ''
if not member_part:
special_categories.append(category)
else:
# 处理¥开头的成员格式
yuan_matches = re.finditer(r'(¥[^(]+?)\(([^)]+?)\)', member_part)
for match in yuan_matches:
prefix, items_str = match.groups()
for item in filter(bool, map(str.strip, items_str.split(';'))):
full = f"{prefix}({item})"
simplified = f"¥{item}"
category_mapping.update({full: category, simplified: category})
member_format_map[simplified] = full
# 处理#开头的成员格式
hash_matches = re.finditer(r'(#[^(]+?)\(([^)]+?)\)', member_part)
for match in hash_matches:
prefix, items_str = match.groups()
for item in filter(bool, map(str.strip, items_str.split(';'))):
full = f"{prefix}({item})"
simplified = f"#{item}"
category_mapping.update({full: category, simplified: category})
member_format_map[simplified] = full
# 处理单独的科目
standalone_match = re.search(r'^([^¥#]+?)$', member_part)
if standalone_match:
standalone = standalone_match.group(1).strip()
if standalone:
category_mapping[standalone] = category
return category_mapping, special_categories, member_format_map
def parse_transactions_from_lines(data_lines, special_categories=None):
"""
从数据行解析交易记录
新格式:
- 日期行:YYYYMMDD
- 交易行:+/-金额 成员 // 描述
- 支持同一天内多个交易块,每个块可有独立描述
"""
if special_categories is None:
special_categories = []
transactions = []
current_date = ''
current_description = ''
for line in data_lines:
# 匹配日期行:8位数字
date_match = re.match(r'(\d{8})$', line)
if date_match:
# 格式化为 YYYY-MM-DD
date_str = date_match.group(1)
current_date = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:8]}"
current_description = ''
continue
# 解析交易行
if current_date:
# 尝试提取描述(如果有的话)
desc_match = re.search(r'//\s*(.+)', line)
if desc_match:
description = desc_match.group(1).strip()
if description:
current_description = description
# 移除描述部分,只保留交易数据
line = line.split('//')[0].strip()
# 匹配交易数据:+/-金额 成员
transaction_match = re.match(r'([+-]\d+)\s+(.+)', line)
if transaction_match:
amount_str = transaction_match.group(1)
member = transaction_match.group(2).strip()
# 构建交易记录
transaction = {
'日期': current_date,
'内容': current_description,
'金额': float(amount_str),
'成员': member if member not in special_categories else '',
'科目iii': member if member in special_categories else ''
}
transactions.append(transaction)
return transactions
def load_configuration(file_path):
"""
从文件中加载用户配置(保持向后兼容)
返回:(category_mapping, special_categories, member_format_map)
"""
try:
config_lines, _ = read_and_split_file(file_path)
return load_configuration_from_lines(config_lines)
except Exception as e:
print(f"加载配置失败: {e}")
return {}, [], {}
def parse_transactions(file_path, special_categories=None):
"""
解析交易记录(保持向后兼容)
"""
if special_categories is None:
special_categories = []
try:
_, data_lines = read_and_split_file(file_path)
return parse_transactions_from_lines(data_lines, special_categories)
except Exception as e:
print(f"解析交易记录失败: {e}")
return []
def add_category_info(transactions, category_mapping, member_format_map=None):
"""
添加科目信息并规范化成员格式
"""
if member_format_map is None:
member_format_map = {}
processed_transactions = []
for transaction in transactions:
member = transaction.get('成员', '')
# 规范化成员格式
standardized_member = member_format_map.get(member, member)
transaction['成员'] = standardized_member
# 只有当科目iii为空时才进行查找
if not transaction['科目iii']:
# 查找科目信息
category = category_mapping.get(standardized_member, '')
# 如果没找到,尝试反向查找
if not category:
for cfg_member, cfg_category in category_mapping.items():
if standardized_member == cfg_category:
category = cfg_category
transaction['成员'] = ''
break
transaction['科目iii'] = category
processed_transactions.append(transaction)
return processed_transactions
def select_file():
"""
让用户选择文件的交互函数
"""
root = tk.Tk()
root.withdraw() # 隐藏主窗口
# 打开文件选择对话框
file_path = filedialog.askopenfilename(
title="选择记账文件",
filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")]
)
root.destroy() # 关闭Tkinter窗口
return file_path
def generate_excel(transactions, output_file):
"""
生成Excel文件,返回成功状态 - 使用csv预处理提高性能
"""
try:
import csv
import io
# 首先使用CSV格式预处理所有数据
csv_buffer = io.StringIO()
writer = csv.writer(csv_buffer)
# 写入表头
headers = ['日期', '内容', '金额', '成员', '科目iii']
writer.writerow(headers)
# 写入所有数据行
for transaction in transactions:
row = [
transaction.get('日期', ''),
transaction.get('内容', ''),
transaction.get('金额', ''),
transaction.get('成员', ''),
transaction.get('科目iii', '')
]
writer.writerow(row)
# 创建工作簿并加载CSV数据
wb = Workbook()
ws = wb.active
ws.title = "交易记录"
# 重置缓冲区指针并读取所有行
csv_buffer.seek(0)
csv_reader = csv.reader(csv_buffer)
# 将CSV数据批量写入Excel
for row_idx, row in enumerate(csv_reader):
for col_idx, value in enumerate(row):
ws.cell(row=row_idx+1, column=col_idx+1, value=value)
# 保存文件
wb.save(output_file)
print(f"Excel文件已生成: {output_file}")
return True
except Exception as e:
print(f"生成Excel文件失败: {e}")
return False
def main():
"""
主函数 - 优化版,文件只读取一次
"""
# 让用户选择文件
file_path = select_file()
if not file_path:
print("未选择文件,程序退出")
return
# 只读取文件一次
config_lines, data_lines = read_and_split_file(file_path)
# 从配置行加载配置
category_mapping, special_categories, member_format_map = load_configuration_from_lines(config_lines)
# 从数据行解析交易记录
transactions = parse_transactions_from_lines(data_lines, special_categories)
# 添加科目信息
transactions = add_category_info(transactions, category_mapping, member_format_map)
# 生成Excel文件名和路径(保存到与txt文件同目录)
file_dir = os.path.dirname(file_path)
file_name = os.path.splitext(os.path.basename(file_path))[0]
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
# 构建Excel文件路径
excel_file = os.path.join(file_dir, f'{file_name}_{timestamp}.xlsx')
# 生成Excel文件
if generate_excel(transactions, excel_file):
print(f"Excel文件 '{excel_file}' 已成功生成")
print("任务完成!")
else:
print("任务失败!")
if __name__ == "__main__":
main()