import
re
import
string
import
os
def
count_text_elements(file_path):
chinese_chars
=
0
english_words
=
0
punctuation
=
0
english_letters
=
0
arabic_numbers
=
0
chinese_punctuation
=
',。!?;:“”‘’()【】《》—~…¥'
all_punctuation
=
set
(string.punctuation
+
chinese_punctuation)
content
=
None
encodings
=
[
'utf-8'
,
'gbk'
,
'gb2312'
,
'cp1252'
]
for
encoding
in
encodings:
try
:
with
open
(file_path,
'r'
, encoding
=
encoding) as f:
content
=
f.read()
break
except
UnicodeDecodeError:
continue
if
content
is
None
:
raise
ValueError(f
'无法用以下编码读取文件:{encodings}'
)
for
char
in
content:
if
'\u4e00'
<
=
char <
=
'\u9fff'
:
chinese_chars
+
=
1
elif
char
in
all_punctuation:
punctuation
+
=
1
elif
'a'
<
=
char.lower() <
=
'z'
:
english_letters
+
=
1
elif
char.isdigit():
arabic_numbers
+
=
1
word_pattern
=
re.
compile
(r
'\b[a-zA-Z]+\b'
)
english_words
=
len
(word_pattern.findall(content))
total
=
chinese_chars
+
punctuation
+
english_letters
+
arabic_numbers
return
{
'chinese_chars'
: chinese_chars,
'english_words'
: english_words,
'punctuation'
: punctuation,
'english_letters'
: english_letters,
'arabic_numbers'
: arabic_numbers,
'total'
: total
}
if
__name__
=
=
'__main__'
:
script_dir
=
os.path.dirname(os.path.abspath(__file__))
user_input
=
input
(
'请输入要统计的txt文件路径(绝对路径或仅文件名,仅文件名时默认程序所在目录): '
)
if
'\\'
not
in
user_input:
file_path
=
os.path.join(script_dir, user_input)
else
:
file_path
=
user_input
try
:
result
=
count_text_elements(file_path)
print
(
'统计结果:'
)
print
(f
'汉字数量:{result["chinese_chars"]}'
)
print
(f
'英文单词数量:{result["english_words"]}'
)
print
(f
'标点符号数量:{result["punctuation"]}'
)
print
(f
'英文字母数量:{result["english_letters"]}'
)
print
(f
'阿拉伯数字数量:{result["arabic_numbers"]}'
)
print
(f
'总数量(汉字+标点+字母+数字):{result["total"]}'
)
except
FileNotFoundError:
print
(f
'错误:文件 {file_path} 未找到,请检查输入(绝对路径或程序所在目录下的文件名)。'
)
except
ValueError as e:
print
(f
'错误:{str(e)},请确认文件编码是否为常见类型(如UTF-8/GBK)。'
)
except
Exception as e:
print
(f
'发生未知错误:{str(e)}'
)