词云图生成器
本帖最后由 苏紫方璇 于 2025-7-28 15:17 编辑做了一个从excel中提取列数据生成词云图的脚本
效果图片放在附件了
import sys
import re
import os
import platform
from collections import Counter
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QLabel, QLineEdit, QPushButton, QComboBox, QStatusBar,
QFileDialog, QMessageBox, QGroupBox, QTextEdit, QCheckBox)
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from PyQt5.QtGui import QPixmap, QFont
import jieba
import pandas as pd
import matplotlib
from wordcloud import WordCloud
matplotlib.use('Qt5Agg')# 使用Qt5后端
class WordCloudThread(QThread):
"""词云生成线程"""
finished = pyqtSignal(object)
error = pyqtSignal(str)
progress = pyqtSignal(str)
def __init__(self, text, user_dict=None, stop_words=None, custom_words=None):
super().__init__()
self.text = text
self.user_dict = user_dict
self.stop_words = stop_words or set()
self.custom_words = custom_words or set()
def run(self):
try:
self.progress.emit("正在处理文本...")
# 加载用户词典
if self.user_dict:
for word in self.user_dict:
jieba.add_word(word.strip())
# 处理文本
text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z]', ' ', self.text)
# 添加自定义词语
if self.custom_words:
for word in self.custom_words:
jieba.add_word(word.strip())
text += f" {word}" * 3# 增加自定义词权重
# 分词
words = jieba.cut(text)
# 过滤停用词
default_stopwords = {'的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一'}
stopwords = default_stopwords.union(set(self.stop_words))
filtered_words =
self.progress.emit("正在生成词云...")
# 生成词频统计
word_freq = Counter(filtered_words)
# 生成词云
wc = WordCloud(
font_path=self.get_font_path(),
background_color='white',
width=800,
height=600,
max_words=200,
collocations=False,
prefer_horizontal=1,
scale=2# 提高清晰度
)
wc.generate_from_frequencies(word_freq)
self.finished.emit(wc)
except Exception as e:
self.error.emit(str(e))
def get_font_path(self):
"""获取系统字体路径"""
system = platform.system()
if system == "Windows":
return "C:/Windows/Fonts/simhei.ttf"
elif system == "Darwin":
return "/System/Library/Fonts/PingFang.ttc"
else:
return "/usr/share/fonts/wqy/wqy-microhei.ttc"
class ExcelWordCloudGenerator(QMainWindow):
def __init__(self):
super().__init__()
self.file_path = ""
self.columns = []
self.words = []
self.current_wc = None
self.current_pixmap = None
self.user_dict = set()
self.stop_words = set()
self.custom_words = set()
# 初始化jieba分词
self.init_jieba()
self.initUI()
def init_jieba(self):
"""初始化jieba分词"""
# 创建缓存目录
cache_dir = os.path.expanduser('~/.jieba_cache')
os.makedirs(cache_dir, exist_ok=True)
jieba.dt.tmp_dir = cache_dir
# 仅在Linux/Mac上启用并行分词
if platform.system() in ['Linux', 'Darwin']:
jieba.enable_parallel(4)
def initUI(self):
"""初始化界面"""
self.setWindowTitle("Excel词云生成器(增强版)")
self.setGeometry(100, 100, 900, 700)
# 中心部件
central_widget = QWidget()
self.setCentralWidget(central_widget)
main_layout = QVBoxLayout(central_widget)
# 创建功能区域
self.createFileSelectionArea(main_layout)
self.createColumnSelectionArea(main_layout)
self.createSettingsArea(main_layout)
self.createButtonArea(main_layout)
self.createDisplayArea(main_layout)
# 状态栏
self.statusBar = QStatusBar()
self.setStatusBar(self.statusBar)
self.statusBar.showMessage("就绪")
def createFileSelectionArea(self, parent_layout):
"""文件选择区域"""
group = QGroupBox("文件选择")
layout = QHBoxLayout()
self.file_path_edit = QLineEdit()
self.file_path_edit.setReadOnly(True)
browse_btn = QPushButton("浏览...")
browse_btn.clicked.connect(self.browseFile)
layout.addWidget(QLabel("Excel文件:"))
layout.addWidget(self.file_path_edit, 1)
layout.addWidget(browse_btn)
group.setLayout(layout)
parent_layout.addWidget(group)
def createColumnSelectionArea(self, parent_layout):
"""列选择区域"""
group = QGroupBox("列选择")
layout = QHBoxLayout()
self.column_combo = QComboBox()
self.column_combo.setEditable(False)
layout.addWidget(QLabel("分析列:"))
layout.addWidget(self.column_combo, 1)
group.setLayout(layout)
parent_layout.addWidget(group)
def createSettingsArea(self, parent_layout):
"""设置区域"""
group = QGroupBox("词云设置")
layout = QVBoxLayout()
# 用户词典
user_dict_layout = QHBoxLayout()
self.user_dict_edit = QTextEdit()
self.user_dict_edit.setPlaceholderText("每行一个专有名词(如:人工智能、大数据)")
self.user_dict_edit.setMaximumHeight(80)
user_dict_layout.addWidget(QLabel("专有名词:"))
user_dict_layout.addWidget(self.user_dict_edit, 1)
# 停用词
stop_words_layout = QHBoxLayout()
self.stop_words_edit = QTextEdit()
self.stop_words_edit.setPlaceholderText("每行一个停用词(如:然后、因此)")
self.stop_words_edit.setMaximumHeight(80)
stop_words_layout.addWidget(QLabel("停用词:"))
stop_words_layout.addWidget(self.stop_words_edit, 1)
# 自定义强调词
custom_words_layout = QHBoxLayout()
self.custom_words_edit = QTextEdit()
self.custom_words_edit.setPlaceholderText("每行一个需要强调的词(会增大权重)")
self.custom_words_edit.setMaximumHeight(80)
custom_words_layout.addWidget(QLabel("强调词:"))
custom_words_layout.addWidget(self.custom_words_edit, 1)
# 其他选项
options_layout = QHBoxLayout()
self.transparent_bg_check = QCheckBox("透明背景")
self.horizontal_only_check = QCheckBox("仅水平排列")
options_layout.addWidget(self.transparent_bg_check)
options_layout.addWidget(self.horizontal_only_check)
options_layout.addStretch(1)
layout.addLayout(user_dict_layout)
layout.addLayout(stop_words_layout)
layout.addLayout(custom_words_layout)
layout.addLayout(options_layout)
group.setLayout(layout)
parent_layout.addWidget(group)
def createButtonArea(self, parent_layout):
"""按钮区域"""
layout = QHBoxLayout()
self.generate_btn = QPushButton("生成词云")
self.generate_btn.clicked.connect(self.generateWordcloud)
self.save_btn = QPushButton("保存词云")
self.save_btn.clicked.connect(self.saveWordcloud)
self.save_btn.setEnabled(False)
self.export_words_btn = QPushButton("导出词频")
self.export_words_btn.clicked.connect(self.exportWordFreq)
self.export_words_btn.setEnabled(False)
layout.addWidget(self.generate_btn)
layout.addWidget(self.save_btn)
layout.addWidget(self.export_words_btn)
layout.addStretch(1)
parent_layout.addLayout(layout)
def createDisplayArea(self, parent_layout):
"""词云显示区域"""
self.display_group = QGroupBox("词云预览")
self.display_layout = QVBoxLayout()
self.display_label = QLabel("请选择Excel文件并生成词云")
self.display_label.setAlignment(Qt.AlignCenter)
self.display_label.setMinimumHeight(450)
self.display_label.setStyleSheet("border: 1px solid #ccc; background-color: white;")
self.display_layout.addWidget(self.display_label)
self.display_group.setLayout(self.display_layout)
parent_layout.addWidget(self.display_group, 1)
def browseFile(self):
"""浏览Excel文件"""
file_path, _ = QFileDialog.getOpenFileName(
self, "选择Excel文件", "", "Excel files (*.xlsx *.xls *.csv)"
)
if file_path:
self.file_path = file_path
self.file_path_edit.setText(file_path)
self.loadColumns()
self.statusBar.showMessage(f"已加载文件: {os.path.basename(file_path)}")
def loadColumns(self):
"""加载列名"""
try:
if self.file_path.endswith('.csv'):
df = pd.read_csv(self.file_path)
else:
df = pd.read_excel(self.file_path)
self.columns = df.columns.tolist()
self.column_combo.clear()
self.column_combo.addItems(self.columns)
except Exception as e:
QMessageBox.critical(self, "错误", f"加载文件失败: {str(e)}")
def readExcel(self, column_name):
"""读取Excel内容"""
try:
if self.file_path.endswith('.csv'):
df = pd.read_csv(self.file_path)
else:
df = pd.read_excel(self.file_path)
if column_name and column_name in df.columns:
return df.dropna().astype(str).str.cat(sep=' ')
return ""
except Exception as e:
QMessageBox.critical(self, "错误", f"读取失败: {str(e)}")
return ""
def generateWordcloud(self):
"""生成词云"""
if not self.file_path:
QMessageBox.warning(self, "警告", "请先选择Excel文件")
return
# 获取用户设置
self.user_dict = set(self.user_dict_edit.toPlainText().split('\n'))
self.stop_words = set(self.stop_words_edit.toPlainText().split('\n'))
self.custom_words = set(self.custom_words_edit.toPlainText().split('\n'))
# 读取文本
column_name = self.column_combo.currentText()
text = self.readExcel(column_name)
if not text:
QMessageBox.warning(self, "警告", "无有效文本数据")
return
# 禁用按钮,显示进度
self.generate_btn.setEnabled(False)
self.save_btn.setEnabled(False)
self.export_words_btn.setEnabled(False)
self.statusBar.showMessage("正在生成词云...")
# 创建并启动工作线程
self.thread = WordCloudThread(
text=text,
user_dict=self.user_dict,
stop_words=self.stop_words,
custom_words=self.custom_words
)
self.thread.finished.connect(self.onWordcloudGenerated)
self.thread.error.connect(self.onGenerationError)
self.thread.progress.connect(self.statusBar.showMessage)
self.thread.start()
def onWordcloudGenerated(self, wc):
"""词云生成完成"""
self.current_wc = wc
# 设置背景颜色
if self.transparent_bg_check.isChecked():
background_color = None
else:
background_color = 'white'
# 重新生成图像
wc.background_color = background_color
wc.prefer_horizontal = float(self.horizontal_only_check.isChecked())
# 转换为QPixmap
img = wc.to_image()
from io import BytesIO
buffer = BytesIO()
img.save(buffer, format='PNG')
buffer.seek(0)
pixmap = QPixmap()
pixmap.loadFromData(buffer.read())
self.current_pixmap = pixmap
# 显示词云
self.showWordcloud(pixmap)
# 启用按钮
self.generate_btn.setEnabled(True)
self.save_btn.setEnabled(True)
self.export_words_btn.setEnabled(True)
self.statusBar.showMessage("词云生成成功")
def onGenerationError(self, error_msg):
"""生成错误处理"""
QMessageBox.critical(self, "错误", f"生成失败: {error_msg}")
self.statusBar.showMessage("生成失败")
self.generate_btn.setEnabled(True)
def showWordcloud(self, pixmap):
"""显示词云"""
if pixmap:
scaled_pixmap = pixmap.scaled(
self.display_label.width() - 20,
self.display_label.height() - 20,
Qt.KeepAspectRatio,
Qt.SmoothTransformation
)
self.display_label.setPixmap(scaled_pixmap)
self.display_label.setText("")
def saveWordcloud(self):
"""保存词云"""
if not self.current_wc:
QMessageBox.warning(self, "警告", "请先生成词云")
return
default_filename = f"{os.path.splitext(os.path.basename(self.file_path))}_词云.png"
save_path, _ = QFileDialog.getSaveFileName(
self, "保存词云", default_filename, "PNG files (*.png);;JPEG files (*.jpg);;PDF files (*.pdf)"
)
if save_path:
try:
if save_path.endswith('.png'):
self.current_wc.to_file(save_path)
elif save_path.endswith('.jpg'):
self.current_wc.to_file(save_path)
elif save_path.endswith('.pdf'):
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 8))
plt.imshow(self.current_wc, interpolation='bilinear')
plt.axis("off")
plt.savefig(save_path, format='pdf', bbox_inches='tight')
plt.close()
self.statusBar.showMessage(f"已保存至: {save_path}")
QMessageBox.information(self, "成功", f"保存成功:\n{save_path}")
except Exception as e:
QMessageBox.critical(self, "错误", f"保存失败: {str(e)}")
def exportWordFreq(self):
"""导出词频统计"""
if not self.current_wc:
QMessageBox.warning(self, "警告", "请先生成词云")
return
# 获取词频
word_freq = self.current_wc.words_
if not word_freq:
QMessageBox.warning(self, "警告", "无词频数据")
return
# 转换为DataFrame
df = pd.DataFrame.from_dict(word_freq, orient='index', columns=['频率'])
df.index.name = '词语'
df = df.sort_values('频率', ascending=False)
# 保存文件
default_filename = f"{os.path.splitext(os.path.basename(self.file_path))}_词频.xlsx"
save_path, _ = QFileDialog.getSaveFileName(
self, "导出词频", default_filename, "Excel files (*.xlsx);;CSV files (*.csv)"
)
if save_path:
try:
if save_path.endswith('.xlsx'):
df.to_excel(save_path)
else:
df.to_csv(save_path)
self.statusBar.showMessage(f"词频已导出至: {save_path}")
QMessageBox.information(self, "成功", f"词频导出成功:\n{save_path}")
except Exception as e:
QMessageBox.critical(self, "错误", f"导出失败: {str(e)}")
def resizeEvent(self, event):
"""窗口大小改变时调整词云大小"""
super().resizeEvent(event)
if self.current_pixmap:
self.showWordcloud(self.current_pixmap)
if __name__ == "__main__":
app = QApplication(sys.argv)
app.setFont(QFont("Microsoft YaHei", 10))# 设置更好的默认字体
window = ExcelWordCloudGenerator()
window.show()
sys.exit(app.exec_()) 【公告】发帖代码插入以及添加链接教程(有福利)
https://www.52pojie.cn/thread-713042-1-1.html
(出处: 吾爱破解论坛)
可以给个py文件吗 感谢分享,刚好学习学习 厉害,学习了 能生成别的形状吗?圆形,心形什么的 歆风 发表于 2025-7-25 18:04
能生成别的形状吗?圆形,心形什么的
你把我那段代码复制粘贴给豆包让它给你做一个:lol wordcloud开源库还在更新,良心库 LoveIsOver 发表于 2025-7-25 21:36
你把我那段代码复制粘贴给豆包让它给你做一个
好主意,但是不会打包exe。。 歆风 发表于 2025-7-25 23:32
好主意,但是不会打包exe。。
问豆包怎么打包就行了,你能想出来的问题全部拿去问人工智能就有答案了
页:
[1]
2