import zipfile
import xmltodict
import os, shutil, time
from tkinter import filedialog
class Mygetofd():
def unzip_file(self, zip_path, unzip_path=None):
if not unzip_path:
unzip_path = zip_path.split('.')[0]
with zipfile.ZipFile(zip_path, 'r') as f:
for file in f.namelist():
f.extract(file, path=unzip_path)
return unzip_path
def parse_ofd(self, path):
file_path = self.unzip_file(path)
pages_directory = os.path.join(file_path, "Doc_0", "Attachs")
for root, dirs, files in os.walk(pages_directory):
for file in files:
if "rai_issuer" in file:
xml_path = os.path.join(root, file)
with open(xml_path, "r", encoding="utf-8") as f:
_text = f.read()
tree = xmltodict.parse(_text)
tree_data = tree['xbrl']
dic_data = self.result_data(tree_data, path)
print(dic_data)
self.delete_file(file_path)
def delete_file(self, folder_path):
shutil.rmtree(folder_path.replace('/', '\\'))
def result_data(self, data, path):
result_dic = {}
result_dic["文件路径"] = path
result_dic["发票号码"] = data.get("rai:ElectronicInvoiceRailwayETicketNumber", "").get("#text", "")
result_dic["发票类型"] = data.get("rai:TypeOfVoucher", "").get("#text", "")
result_dic["开票日期"] = data.get("rai:DateOfIssue", "").get("#text", "")
result_dic["购买方名称"] = data.get("rai:NameOfPurchaser", "").get("#text", "")
result_dic["统一社会信用代码"] = data.get("rai:UnifiedSocialCreditCodeOfPurchaser", "").get("#text", "")
result_dic["不含税金额"] = data.get("rai:TotalAmountExcludingTax", "").get("#text", "")
result_dic["税额"] = data.get("rai:TaxAmount", "").get("#text", "")
result_dic["电子客票号"] = data.get("rai:ETicketNumber", "").get("#text", "")
result_dic["出发站"] = data.get("rai:DepartureStation", "").get("#text", "")
result_dic["目的站"] = data.get("rai:DestinationStation", "").get("#text", "")
result_dic["车次号"] = data.get("rai:TrainNumber", "").get("#text", "")
result_dic["出发日期"] = data.get("rai:TravelDate", "").get("#text", "")
result_dic["发车时间"] = data.get("rai:DepartureTime", "").get("#text", "")
result_dic["座次"] = data.get("rai:SeatLevel", "").get("#text", "")
result_dic["座位号"] = data.get("rai:Seat", "").get("#text", "")
result_dic["乘车人"] = data.get("rai:Name", "").get("#text", "")
result_dic["证件号"] = data.get("rai:IdNumber", "").get("#text", "")
result_dic["票价"] = data.get("rai:Fare", "").get("#text", "")
return result_dic
if __name__ == '__main__':
myofdfile = filedialog.askopenfilename(defaultextension='.ofd')
my_getter = Mygetofd()
_data_dict = my_getter.parse_ofd(myofdfile)