import
tkinter as tk
from
tkinter
import
filedialog
from
tkinter
import
messagebox
from
functools
import
partial
from
threading
import
Thread
import
pandas as pd
import
re
import
requests
from
lxml
import
etree
from
openpyxl
import
load_workbook
from
concurrent.futures
import
ThreadPoolExecutor
from
openpyxl.styles
import
Font, NamedStyle
def
get_word_info(word):
url
=
f
'https://www.youdao.com/w/eng/{word}'
try
:
paraphrase
=
""
data
=
requests.get(url).text
html
=
etree.HTML(data)
British_pronunciation
=
html.xpath(
'//*[@id="phrsListTab"]/h2/div/span[1]/span/text()'
)[
0
]
American_pronunciation
=
html.xpath(
'//*[@id="phrsListTab"]/h2/div/span[2]/span/text()'
)[
0
]
li_elements
=
html.xpath(
'//*[@id="phrsListTab"]/div/ul'
)
for
li
in
li_elements:
paraphrase
=
'
'.join(li.xpath('
.
/
/
text()'))
return
British_pronunciation, American_pronunciation, paraphrase
except
Exception as e:
print
(e, word)
return
None
def
process_text_file(file_path):
with
open
(file_path,
'r'
) as
file
:
content
=
file
.read()
words
=
re.split(r
"\b[,.:?!()'\"\s\n\t\r]+?\b"
, content)
lowercase_words
=
[word.lower()
for
word
in
words]
unique_words
=
list
(
set
(lowercase_words))
sorted_words
=
sorted
(unique_words)
filtered_words
=
[word
for
word
in
sorted_words
if
"'"
not
in
word
and
not
re.search(r'[\u4e00
-
\u9fff]
', word) and not re.search(r'
\d', word)]
df
=
pd.DataFrame(filtered_words, columns
=
[
'Words'
])
output_file
=
file_path.replace(
'.txt'
,
'.xlsx'
)
df.to_excel(output_file, index
=
False
)
workbook
=
load_workbook(output_file)
worksheet
=
workbook.active
worksheet.cell(row
=
1
, column
=
2
, value
=
"British_pronunciation"
)
worksheet.cell(row
=
1
, column
=
3
, value
=
"American_pronunciation"
)
worksheet.cell(row
=
1
, column
=
4
, value
=
"paraphrase"
)
bold_style
=
NamedStyle(name
=
"bold_style"
)
bold_style.font
=
Font(bold
=
True
)
worksheet.cell(row
=
1
, column
=
2
).style
=
bold_style
worksheet.cell(row
=
1
, column
=
3
).style
=
bold_style
worksheet.cell(row
=
1
, column
=
4
).style
=
bold_style
with ThreadPoolExecutor() as executor:
futures
=
[executor.submit(get_word_info, word)
for
word
in
filtered_words]
row_index
=
2
for
future, row
in
zip
(futures, worksheet.iter_rows(min_row
=
2
, max_col
=
4
)):
word
=
row[
0
].value
word_info
=
future.result()
if
word_info:
British_pronunciation, American_pronunciation, paraphrase
=
word_info
worksheet.cell(row
=
row_index, column
=
2
).value
=
British_pronunciation
worksheet.cell(row
=
row_index, column
=
3
).value
=
American_pronunciation
worksheet.cell(row
=
row_index, column
=
4
).value
=
paraphrase
else
:
if
word.endswith((
's'
,
'ed'
,
'ing'
)):
word_without_suffix
=
re.sub(r
'(s|d|ing)$'
, '', word)
word_info
=
get_word_info(word_without_suffix)
if
word_info:
British_pronunciation, American_pronunciation, paraphrase
=
word_info
worksheet.cell(row
=
row_index, column
=
2
).value
=
British_pronunciation
worksheet.cell(row
=
row_index, column
=
3
).value
=
American_pronunciation
worksheet.cell(row
=
row_index, column
=
4
).value
=
paraphrase
row_index
+
=
1
workbook.save(output_file)
messagebox.showinfo(
'Success'
,
'Process completed successfully.'
)
def
browse_file(file_entry):
file_path
=
filedialog.askopenfilename(filetypes
=
[(
'Text Files'
,
'*.txt'
)])
if
file_path:
file_entry.delete(
0
, tk.END)
file_entry.insert(tk.END, file_path)
def
execute_function(file_entry):
file_path
=
file_entry.get()
if
not
file_path:
messagebox.showerror(
'Error'
,
'Please select a file.'
)
return
execute_button.config(state
=
tk.DISABLED)
thread
=
Thread(target
=
process_text_file, args
=
(file_path,))
thread.start()
window
=
tk.Tk()
window.title(
'英文文章切割为单词 V1.0'
)
window.configure(bg
=
'sky blue'
)
file_label
=
tk.Label(window, text
=
'Select a text file:'
, bg
=
'sky blue'
)
file_label.pack()
file_entry
=
tk.Entry(window, width
=
50
)
file_entry.pack()
browse_button
=
tk.Button(window, text
=
'Browse'
, command
=
partial(browse_file, file_entry))
browse_button.pack()
execute_button
=
tk.Button(window, text
=
'Execute'
, command
=
partial(execute_function, file_entry))
execute_button.pack()
window.mainloop()