import
csv, jieba, re
from
itertools
import
islice
import
pandas as pd
import
imageio
import
matplotlib.pyplot as plt
from
wordcloud
import
WordCloud, ImageColorGenerator
def
csv_to_txt():
chat_csv
=
open
(
"my_chat.csv"
,
"r"
,encoding
=
"utf-8"
)
my_chat
=
csv.reader(chat_csv)
fp
=
open
(
"chat.txt"
,
"w+"
, encoding
=
"utf-8"
)
for
line
in
islice(my_chat,
1
,
None
):
if
re.search(
"[\u4e00-\u9fa5]"
, line[
8
])
and
len
(line[
8
]) <
50
:
fp.write(line[
8
]
+
"\n"
)
fp.close()
def
cut_words():
fp
=
open
(
"chat.txt"
,
"r"
, encoding
=
"utf-8"
)
content
=
fp.read()
fp.close()
jieba.load_userdict(
"custom_dict.txt"
)
words
=
jieba.cut(content)
word_L
=
[]
with
open
(
"stopwords.txt"
,
'r'
, encoding
=
"utf-8"
) as ss:
stopwords
=
ss.read()
for
word
in
words:
if
word
not
in
stopwords
and
word !
=
'\n'
and
len
(word) >
1
:
word_L.append(word)
return
word_L
def
word_cloud(words):
count_word_df
=
pd.DataFrame({
"word"
:words}).groupby([
"word"
]).size()
count_word_dt
=
count_word_df.to_dict()
mk
=
imageio.imread(
'my_pic.jpg'
)
mk_color
=
ImageColorGenerator(mk)
wx_wc
=
WordCloud(
background_color
=
"white"
,
mode
=
"RGB"
,
mask
=
mk,
font_path
=
"青鸟华光简胖头鱼.TTF"
,
scale
=
3
,
)
wx_wc
=
wx_wc.generate_from_frequencies(count_word_dt)
wx_wc.to_file(
"wordcloud.png"
)
plt.axis(
"off"
)
plt.imshow(wx_wc.recolor(color_func
=
mk_color))
plt.savefig(
'pltwordcloud.png'
, dpi
=
400
)
def
run():
csv_to_txt()
words
=
cut_words()
word_cloud(words)
if
__name__
=
=
'__main__'
:
run()