本帖最后由 renminwansui 于 2026-6-4 14:58 编辑
- 工具使用目的:批量改发票pdf名称为金额+发票号码+销售方,方便金额核对,方便牛马(自嘲)报销
- 使用步骤:双击运行exe,选择文件夹即可
- 场景:报销的时候为了识别出发票是哪个(程序批量识别金额和销售方且自动回填)
- 成品更新:
v2版本更新:https://95879587.lanzouu.com/isOYb3r18nbe 废弃
v3版本更新:https://95879587.lanzouu.com/iVC763r1el6d 废弃
V5版本更新:增加多种发票情况识别,增加名称加入发票号,下载地址https://95879587.lanzouu.com/iYgTw3r3ugpa
![]()
源码:[Python] 纯文本查看 复制代码 import sys[list=1]
[*]import os
import re
import pdfplumber
def main():
if len(sys.argv) > 1:
folder_path = sys.argv[1]
else:
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
folder_path = filedialog.askdirectory(title="请选择包含发票PDF的文件夹")
root.destroy()
if not folder_path:
print("未选择文件夹,程序退出。")
input("按回车键退出...")
sys.exit(0)
if not os.path.isdir(folder_path):
print(f"错误: {folder_path} 不是有效的文件夹")
input("按回车键退出...")
sys.exit(1)
print("=" * 50)
print(" 发票PDF批量重命名工具")
print("=" * 50)
print(f"\n已选择文件夹: {folder_path}\n")
pdf_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.pdf')]
if not pdf_files:
print("未找到PDF文件")
input("\n按回车键退出...")
sys.exit(0)
print(f"找到 {len(pdf_files)} 个PDF文件,开始处理...\n")
success_count = 0
fail_count = 0
for filename in pdf_files:
filepath = os.path.join(folder_path, filename)
try:
with pdfplumber.open(filepath) as pdf:
text = ''
for page in pdf.pages:
text += page.extract_text() or ''
if not text:
print(f"警告: {filename} 无法提取文本内容(可能是扫描件)")
fail_count += 1
continue
amount = ''
amount_patterns = [
r'[金额合计¥¥]\s*[::]?\s*([0-9]+[.,][0-9]{2})',
r'价税合计[¥¥]?\s*[::]?\s*([0-9]+[.,][0-9]{2})',
r'合计[¥¥]?\s*[::]?\s*([0-9]+[.,][0-9]{2})',
r'¥\s*([0-9]+[.,][0-9]{2})',
r'¥\s*([0-9]+[.,][0-9]{2})',
]
for pattern in amount_patterns:
matches = re.findall(pattern, text)
if matches:
amount = matches[0]
break
amount = re.sub(r'[^0-9.,]', '', amount)
if amount:
amount = amount.replace(',', '')
company = ''
company_patterns = [
r'名\s*称[::]\s*([^\n]{2,30})',
r'销方[::]\s*([^\n]{2,30})',
r'销售方[::]\s*([^\n]{2,30})',
r'纳税人名称[::]\s*([^\n]{2,30})',
r'销售方纳税人识别号[::\s]*([^\n]{2,30})',
]
for pattern in company_patterns:
matches = re.findall(pattern, text)
if matches:
company = matches[0].strip()
break
company = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9()\(\)]', '', company)
company = company[:20]
if amount and company:
new_name = f'{amount}元_{company}_发票.pdf'
elif amount:
new_name = f'{amount}元_销售方主体_发票.pdf'
elif company:
new_name = f'未知金额_{company}_发票.pdf'
else:
new_name = f'未识别发票_{filename}'
base_name = new_name
counter = 1
while os.path.exists(os.path.join(folder_path, new_name)):
name_part = os.path.splitext(base_name)[0]
ext_part = os.path.splitext(base_name)[1]
new_name = f'{name_part}_{counter}{ext_part}'
counter += 1
new_path = os.path.join(folder_path, new_name)
os.rename(filepath, new_path)
print(f'✓ 重命名: {filename}')
print(f' -> {new_name}\n')
success_count += 1
except Exception as e:
print(f'✗ 处理失败: {filename}')
print(f' 错误: {str(e)}\n')
fail_count += 1
print("-" * 50)
print(f"处理完成!成功: {success_count}, 失败: {fail_count}")
print("-" * 50)
input("\n按回车键退出...")
if __name__ == "__main__":
main()
|