import
sys
from
PyQt6.QtWidgets
import
(
QApplication, QMainWindow, QLabel, QStatusBar,
QToolBar, QTableWidget, QTableWidgetItem, QMenu, QFileDialog,
QInputDialog, QMessageBox
)
from
PyQt6.QtGui
import
QAction
from
PyQt6.QtCore
import
Qt
import
pandas as pd
import
json
import
numpy as np
import
matplotlib.pyplot as plt
import
seaborn as sns
from
sklearn
import
linear_model, preprocessing
import
statsmodels.api as sm
class
DataAnalysisPlatform(QMainWindow):
def
__init__(
self
):
super
().__init__()
self
.setWindowTitle(
"Python数据分析平台"
)
self
.setGeometry(
100
,
100
,
1200
,
800
)
self
._init_ui()
def
_init_ui(
self
):
self
.table_widget
=
QTableWidget()
self
.setCentralWidget(
self
.table_widget)
self
.status_bar
=
QStatusBar()
self
.setStatusBar(
self
.status_bar)
self
.status_bar.showMessage(
"就绪"
)
self
._create_menus()
self
._create_toolbar()
self
.table_widget.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
self
.table_widget.customContextMenuRequested.connect(
self
._show_context_menu)
def
_create_menus(
self
):
file_menu
=
self
.menuBar().addMenu(
"文件"
)
open_action
=
QAction(
"打开"
,
self
)
open_action.triggered.connect(
self
._open_file)
file_menu.addAction(open_action)
save_action
=
QAction(
"保存"
,
self
)
save_action.triggered.connect(
self
._save_file)
file_menu.addAction(save_action)
edit_menu
=
self
.menuBar().addMenu(
"编辑"
)
sort_action
=
QAction(
"排序"
,
self
)
sort_action.triggered.connect(
self
._sort_data)
edit_menu.addAction(sort_action)
filter_action
=
QAction(
"筛选"
,
self
)
filter_action.triggered.connect(
self
._filter_data)
edit_menu.addAction(filter_action)
help_menu
=
self
.menuBar().addMenu(
"帮助"
)
about_action
=
QAction(
"关于"
,
self
)
about_action.triggered.connect(
self
._show_about)
help_menu.addAction(about_action)
def
_create_toolbar(
self
):
self
.toolbar
=
QToolBar(
"主工具栏"
)
self
.addToolBar(
self
.toolbar)
open_action
=
QAction(
"打开"
,
self
)
open_action.triggered.connect(
self
._open_file)
self
.toolbar.addAction(open_action)
save_action
=
QAction(
"保存"
,
self
)
save_action.triggered.connect(
self
._save_file)
self
.toolbar.addAction(save_action)
self
.toolbar.addSeparator()
sort_action
=
QAction(
"排序"
,
self
)
sort_action.triggered.connect(
self
._sort_data)
self
.toolbar.addAction(sort_action)
filter_action
=
QAction(
"筛选"
,
self
)
filter_action.triggered.connect(
self
._filter_data)
self
.toolbar.addAction(filter_action)
self
.toolbar.addSeparator()
analyze_action
=
QAction(
"分析"
,
self
)
analyze_action.triggered.connect(
self
._data_analysis)
self
.toolbar.addAction(analyze_action)
visualize_action
=
QAction(
"可视化"
,
self
)
visualize_action.triggered.connect(
self
._visualize_data)
self
.toolbar.addAction(visualize_action)
def
_copy_data(
self
):
selected_items
=
self
.table_widget.selectedItems()
if
not
selected_items:
self
.status_bar.showMessage(
"没有选中要复制的数据"
)
return
data
=
[]
current_row
=
-
1
for
item
in
selected_items:
if
item.row() !
=
current_row:
data.append([])
current_row
=
item.row()
data[
-
1
].append(item.text())
text
=
"\n"
.join(
"\t"
.join(row)
for
row
in
data)
clipboard
=
QApplication.clipboard()
clipboard.setText(text)
self
.status_bar.showMessage(f
"已复制 {len(selected_items)} 个单元格数据"
)
def
_paste_data(
self
):
clipboard
=
QApplication.clipboard()
text
=
clipboard.text()
if
not
text:
self
.status_bar.showMessage(
"剪贴板中没有数据"
)
return
try
:
data
=
[row.split(
"\t"
)
for
row
in
text.split(
"\n"
)
if
row]
selected_items
=
self
.table_widget.selectedItems()
start_row
=
selected_items[
0
].row()
if
selected_items
else
0
start_col
=
selected_items[
0
].column()
if
selected_items
else
0
for
row_idx, row_data
in
enumerate
(data):
for
col_idx, cell_data
in
enumerate
(row_data):
target_row
=
start_row
+
row_idx
target_col
=
start_col
+
col_idx
if
target_row >
=
self
.table_widget.rowCount():
self
.table_widget.insertRow(target_row)
if
target_col >
=
self
.table_widget.columnCount():
self
.table_widget.insertColumn(target_col)
item
=
QTableWidgetItem(cell_data)
self
.table_widget.setItem(target_row, target_col, item)
self
.status_bar.showMessage(f
"已粘贴 {len(data)} 行数据"
)
except
Exception as e:
self
.status_bar.showMessage(f
"粘贴失败: {str(e)}"
)
def
_show_context_menu(
self
, position):
menu
=
QMenu()
row
=
self
.table_widget.rowAt(position.y())
col
=
self
.table_widget.columnAt(position.x())
copy_action
=
menu.addAction(
"复制"
)
copy_action.triggered.connect(
self
._copy_data)
paste_action
=
menu.addAction(
"粘贴"
)
paste_action.triggered.connect(
self
._paste_data)
menu.addSeparator()
delete_action
=
menu.addAction(
"删除行"
)
delete_action.triggered.connect(
lambda
:
self
._delete_row(row))
insert_action
=
menu.addAction(
"插入行"
)
insert_action.triggered.connect(
lambda
:
self
._insert_row(row))
menu.addSeparator()
add_col_action
=
menu.addAction(
"添加列"
)
add_col_action.triggered.connect(
self
._add_column)
remove_col_action
=
menu.addAction(
"删除列"
)
remove_col_action.triggered.connect(
self
._remove_column)
if
col >
=
0
:
edit_col_action
=
menu.addAction(
"编辑列名"
)
edit_col_action.triggered.connect(
lambda
:
self
._edit_column_name(col))
menu.
exec
(
self
.table_widget.viewport().mapToGlobal(position))
def
_edit_column_name(
self
, col):
from
PyQt6.QtWidgets
import
QInputDialog
current_name
=
self
.table_widget.horizontalHeaderItem(col).text()
new_name, ok
=
QInputDialog.getText(
self
,
"编辑列名"
,
"请输入新的列名:"
,
text
=
current_name
)
if
ok
and
new_name:
self
.table_widget.setHorizontalHeaderItem(col, QTableWidgetItem(new_name))
self
.status_bar.showMessage(f
"已更新列名: {current_name} -> {new_name}"
)
def
_add_column(
self
):
col
=
self
.table_widget.columnCount()
self
.table_widget.insertColumn(col)
self
.table_widget.setHorizontalHeaderItem(col, QTableWidgetItem(f
"列{col+1}"
))
self
.status_bar.showMessage(f
"已添加第{col+1}列"
)
def
_remove_column(
self
):
col
=
self
.table_widget.currentColumn()
if
col >
=
0
:
self
.table_widget.removeColumn(col)
self
.status_bar.showMessage(f
"已删除第{col+1}列"
)
else
:
self
.status_bar.showMessage(
"请先选择要删除的列"
)
def
_insert_row(
self
, row):
self
.table_widget.insertRow(row)
for
col
in
range
(
self
.table_widget.columnCount()):
self
.table_widget.setItem(row, col, QTableWidgetItem(""))
def
_delete_row(
self
, row):
if
row >
=
0
:
self
.table_widget.removeRow(row)
def
_open_file(
self
):
from
PyQt6.QtWidgets
import
QFileDialog
import
pandas as pd
import
json
file_filter
=
"数据文件 (*.csv *.xlsx *.json);;CSV文件 (*.csv);;Excel文件 (*.xlsx);;JSON文件 (*.json)"
file_path, _
=
QFileDialog.getOpenFileName(
self
,
"打开数据文件"
,
"",
file_filter
)
if
file_path:
try
:
if
file_path.endswith(
'.csv'
):
data
=
pd.read_csv(file_path)
elif
file_path.endswith(
'.xlsx'
):
data
=
pd.read_excel(file_path)
elif
file_path.endswith(
'.json'
):
with
open
(file_path,
'r'
, encoding
=
'utf-8'
) as f:
json_data
=
json.load(f)
data
=
pd.DataFrame(json_data)
else
:
raise
ValueError(
"不支持的文件格式"
)
self
.table_widget.clear()
self
.table_widget.setRowCount(data.shape[
0
])
self
.table_widget.setColumnCount(data.shape[
1
])
self
.table_widget.setHorizontalHeaderLabels(data.columns.tolist())
for
i
in
range
(data.shape[
0
]):
for
j
in
range
(data.shape[
1
]):
value
=
str
(data.iloc[i, j])
if
not
pd.isna(data.iloc[i, j])
else
""
item
=
QTableWidgetItem(value)
self
.table_widget.setItem(i, j, item)
self
.status_bar.showMessage(f
"成功加载文件: {file_path}"
)
except
Exception as e:
self
.status_bar.showMessage(f
"加载文件失败: {str(e)}"
)
def
_save_file(
self
):
from
PyQt6.QtWidgets
import
QFileDialog
import
pandas as pd
import
json
if
self
.table_widget.rowCount()
=
=
0
or
self
.table_widget.columnCount()
=
=
0
:
self
.status_bar.showMessage(
"表格中没有数据可保存"
)
return
file_filter
=
"CSV文件 (*.csv);;Excel文件 (*.xlsx);;JSON文件 (*.json)"
file_path, selected_filter
=
QFileDialog.getSaveFileName(
self
,
"保存数据文件"
,
"",
file_filter
)
if
file_path:
try
:
data
=
[]
headers
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
header
=
self
.table_widget.horizontalHeaderItem(col)
headers.append(header.text()
if
header
else
f
"Column{col+1}"
)
for
row
in
range
(
self
.table_widget.rowCount()):
row_data
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
item
=
self
.table_widget.item(row, col)
row_data.append(item.text()
if
item
else
"")
data.append(row_data)
df
=
pd.DataFrame(data, columns
=
headers)
if
selected_filter
=
=
"CSV文件 (*.csv)"
or
file_path.endswith(
'.csv'
):
df.to_csv(file_path, index
=
False
, encoding
=
'utf-8-sig'
)
elif
selected_filter
=
=
"Excel文件 (*.xlsx)"
or
file_path.endswith(
'.xlsx'
):
df.to_excel(file_path, index
=
False
)
elif
selected_filter
=
=
"JSON文件 (*.json)"
or
file_path.endswith(
'.json'
):
df.to_json(file_path, orient
=
'records'
, force_ascii
=
False
, indent
=
4
)
else
:
raise
ValueError(
"不支持的文件格式"
)
self
.status_bar.showMessage(f
"数据已成功保存到: {file_path}"
)
except
Exception as e:
self
.status_bar.showMessage(f
"保存文件失败: {str(e)}"
)
def
_sort_data(
self
):
from
PyQt6.QtWidgets
import
(QInputDialog, QDialog, QVBoxLayout,
QLabel, QComboBox, QDialogButtonBox, QHBoxLayout)
if
self
.table_widget.rowCount()
=
=
0
or
self
.table_widget.columnCount()
=
=
0
:
self
.status_bar.showMessage(
"表格中没有数据可排序"
)
return
dialog
=
QDialog(
self
)
dialog.setWindowTitle(
"多列排序"
)
layout
=
QVBoxLayout()
for
i
in
range
(
3
):
row_layout
=
QHBoxLayout()
row_layout.addWidget(QLabel(f
"排序条件 {i+1}:"
))
col_combo
=
QComboBox()
col_combo.setObjectName(f
"col_combo_{i}"
)
col_combo.addItems([
self
.table_widget.horizontalHeaderItem(j).text()
for
j
in
range
(
self
.table_widget.columnCount())])
row_layout.addWidget(col_combo)
order_combo
=
QComboBox()
order_combo.setObjectName(f
"order_combo_{i}"
)
order_combo.addItems([
"升序"
,
"降序"
])
row_layout.addWidget(order_combo)
layout.addLayout(row_layout)
buttons
=
QDialogButtonBox(QDialogButtonBox.StandardButton.Ok |
QDialogButtonBox.StandardButton.Cancel)
buttons.accepted.connect(dialog.accept)
buttons.rejected.connect(dialog.reject)
layout.addWidget(buttons)
dialog.setLayout(layout)
if
dialog.
exec
()
=
=
QDialog.DialogCode.Accepted:
self
.status_bar.showMessage(
"正在排序数据..."
)
data
=
[]
for
row
in
range
(
self
.table_widget.rowCount()):
row_data
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
item
=
self
.table_widget.item(row, col)
row_data.append(item.text()
if
item
else
"")
data.append(row_data)
headers
=
[
self
.table_widget.horizontalHeaderItem(col).text()
for
col
in
range
(
self
.table_widget.columnCount())]
df
=
pd.DataFrame(data, columns
=
headers)
sort_conditions
=
[]
for
i
in
range
(
3
):
col_combo
=
dialog.findChild(QComboBox, f
"col_combo_{i}"
)
order_combo
=
dialog.findChild(QComboBox, f
"order_combo_{i}"
)
if
col_combo
and
col_combo.currentText():
ascending
=
order_combo.currentText()
=
=
"升序"
sort_conditions.append((col_combo.currentText(), ascending))
if
sort_conditions:
df
=
df.sort_values(
by
=
[col
for
col, _
in
sort_conditions],
ascending
=
[asc
for
_, asc
in
sort_conditions]
)
self
.table_widget.setRowCount(
len
(df))
for
row
in
range
(
len
(df)):
for
col
in
range
(
len
(df.columns)):
self
.table_widget.setItem(row, col,
QTableWidgetItem(
str
(df.iloc[row, col])))
self
.status_bar.showMessage(f
"已按{len(sort_conditions)}列排序完成"
)
else
:
self
.status_bar.showMessage(
"排序已取消"
)
def
_clean_data(
self
):
from
PyQt6.QtWidgets
import
QInputDialog, QMessageBox
if
self
.table_widget.rowCount()
=
=
0
or
self
.table_widget.columnCount()
=
=
0
:
self
.status_bar.showMessage(
"表格中没有数据可清洗"
)
return
options
=
[
"删除空行"
,
"填充空值"
,
"删除重复行"
,
"数据类型转换"
]
option, ok
=
QInputDialog.getItem(
self
,
"选择清洗选项"
,
"请选择要执行的清洗操作:"
,
options,
0
,
False
)
if
not
ok:
return
try
:
if
option
=
=
"删除空行"
:
rows_to_remove
=
[]
for
row
in
range
(
self
.table_widget.rowCount()):
is_empty
=
True
for
col
in
range
(
self
.table_widget.columnCount()):
item
=
self
.table_widget.item(row, col)
if
item
and
item.text().strip():
is_empty
=
False
break
if
is_empty:
rows_to_remove.append(row)
for
row
in
sorted
(rows_to_remove, reverse
=
True
):
self
.table_widget.removeRow(row)
self
.status_bar.showMessage(f
"已删除 {len(rows_to_remove)} 个空行"
)
elif
option
=
=
"填充空值"
:
col, ok
=
QInputDialog.getInt(
self
,
"选择列"
,
"请输入要填充空值的列号(从1开始):"
,
1
,
1
,
self
.table_widget.columnCount(),
1
)
if
not
ok:
return
value, ok
=
QInputDialog.getText(
self
,
"输入填充值"
,
"请输入要填充的值:"
)
if
not
ok:
return
col_index
=
col
-
1
filled_count
=
0
for
row
in
range
(
self
.table_widget.rowCount()):
item
=
self
.table_widget.item(row, col_index)
if
not
item
or
not
item.text().strip():
self
.table_widget.setItem(row, col_index, QTableWidgetItem(value))
filled_count
+
=
1
self
.status_bar.showMessage(f
"已填充 {filled_count} 个空值"
)
elif
option
=
=
"删除重复行"
:
rows_to_remove
=
[]
seen_rows
=
set
()
for
row
in
range
(
self
.table_widget.rowCount()):
row_data
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
item
=
self
.table_widget.item(row, col)
row_data.append(item.text()
if
item
else
"")
row_tuple
=
tuple
(row_data)
if
row_tuple
in
seen_rows:
rows_to_remove.append(row)
else
:
seen_rows.add(row_tuple)
for
row
in
sorted
(rows_to_remove, reverse
=
True
):
self
.table_widget.removeRow(row)
self
.status_bar.showMessage(f
"已删除 {len(rows_to_remove)} 个重复行"
)
elif
option
=
=
"数据类型转换"
:
col, ok
=
QInputDialog.getInt(
self
,
"选择列"
,
"请输入要转换数据类型的列号(从1开始):"
,
1
,
1
,
self
.table_widget.columnCount(),
1
)
if
not
ok:
return
types
=
[
"整数"
,
"浮点数"
,
"字符串"
,
"布尔值"
]
target_type, ok
=
QInputDialog.getItem(
self
,
"选择目标类型"
,
"请选择要转换的数据类型:"
,
types,
0
,
False
)
if
not
ok:
return
col_index
=
col
-
1
converted_count
=
0
for
row
in
range
(
self
.table_widget.rowCount()):
item
=
self
.table_widget.item(row, col_index)
if
item
and
item.text().strip():
try
:
text
=
item.text()
if
target_type
=
=
"整数"
:
value
=
int
(text)
elif
target_type
=
=
"浮点数"
:
value
=
float
(text)
elif
target_type
=
=
"布尔值"
:
value
=
True
if
text.lower()
in
[
"true"
,
"1"
,
"yes"
]
else
False
else
:
value
=
str
(text)
self
.table_widget.setItem(row, col_index, QTableWidgetItem(
str
(value)))
converted_count
+
=
1
except
ValueError:
pass
self
.status_bar.showMessage(f
"已转换 {converted_count} 个值为 {target_type}"
)
except
Exception as e:
self
.status_bar.showMessage(f
"数据清洗失败: {str(e)}"
)
def
_filter_data(
self
):
from
PyQt6.QtWidgets
import
(QDialog, QVBoxLayout, QHBoxLayout, QLabel,
QComboBox, QLineEdit, QDialogButtonBox, QPushButton,
QScrollArea, QWidget, QGroupBox)
from
PyQt6.QtCore
import
Qt
if
self
.table_widget.rowCount()
=
=
0
or
self
.table_widget.columnCount()
=
=
0
:
self
.status_bar.showMessage(
"表格中没有数据可筛选"
)
return
dialog
=
QDialog(
self
)
dialog.setWindowTitle(
"高级筛选"
)
dialog.resize(
500
,
400
)
layout
=
QVBoxLayout()
scroll
=
QScrollArea()
scroll.setWidgetResizable(
True
)
scroll_content
=
QWidget()
scroll_layout
=
QVBoxLayout(scroll_content)
condition_group
=
QGroupBox(
"筛选条件"
)
condition_group_layout
=
QVBoxLayout()
condition_widgets
=
[]
def
add_condition():
condition_widget
=
QWidget()
condition_layout
=
QHBoxLayout()
col_combo
=
QComboBox()
col_combo.addItems([
self
.table_widget.horizontalHeaderItem(j).text()
for
j
in
range
(
self
.table_widget.columnCount())])
condition_layout.addWidget(col_combo)
operator_combo
=
QComboBox()
operator_combo.addItems([
"="
,
">"
,
"<"
,
"<="
,
">="
,
"!="
,
"包含"
,
"不包含"
,
"开头为"
,
"结尾为"
,
"为空"
,
"不为空"
])
condition_layout.addWidget(operator_combo)
value_edit
=
QLineEdit()
condition_layout.addWidget(value_edit)
delete_btn
=
QPushButton(
"删除"
)
delete_btn.clicked.connect(
lambda
: remove_condition(condition_widget))
condition_layout.addWidget(delete_btn)
condition_widget.setLayout(condition_layout)
condition_group_layout.addWidget(condition_widget)
condition_widgets.append({
"widget"
: condition_widget,
"col_combo"
: col_combo,
"operator_combo"
: operator_combo,
"value_edit"
: value_edit
})
def
remove_condition(widget):
condition_group_layout.removeWidget(widget)
widget.deleteLater()
condition_widgets[:]
=
[cw
for
cw
in
condition_widgets
if
cw[
"widget"
] !
=
widget]
add_condition()
add_btn
=
QPushButton(
"添加条件"
)
add_btn.clicked.connect(add_condition)
condition_group_layout.addWidget(add_btn)
condition_group.setLayout(condition_group_layout)
scroll_layout.addWidget(condition_group)
logic_group
=
QGroupBox(
"逻辑组合"
)
logic_layout
=
QVBoxLayout()
logic_combo
=
QComboBox()
logic_combo.addItems([
"AND"
,
"OR"
])
logic_layout.addWidget(logic_combo)
logic_group.setLayout(logic_layout)
scroll_layout.addWidget(logic_group)
scroll.setWidget(scroll_content)
layout.addWidget(scroll)
buttons
=
QDialogButtonBox(QDialogButtonBox.StandardButton.Ok |
QDialogButtonBox.StandardButton.Cancel)
buttons.accepted.connect(dialog.accept)
buttons.rejected.connect(dialog.reject)
layout.addWidget(buttons)
dialog.setLayout(layout)
if
dialog.
exec
()
=
=
QDialog.DialogCode.Accepted:
self
.status_bar.showMessage(
"正在筛选数据..."
)
data
=
[]
for
row
in
range
(
self
.table_widget.rowCount()):
row_data
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
item
=
self
.table_widget.item(row, col)
row_data.append(item.text()
if
item
else
"")
data.append(row_data)
headers
=
[
self
.table_widget.horizontalHeaderItem(col).text()
for
col
in
range
(
self
.table_widget.columnCount())]
df
=
pd.DataFrame(data, columns
=
headers)
masks
=
[]
logic
=
logic_combo.currentText()
for
condition
in
condition_widgets:
col_name
=
condition[
"col_combo"
].currentText()
operator
=
condition[
"operator_combo"
].currentText()
value
=
condition[
"value_edit"
].text()
if
operator
=
=
"="
:
mask
=
df[col_name]
=
=
value
elif
operator
=
=
">"
:
mask
=
df[col_name] > value
elif
operator
=
=
"<"
:
mask
=
df[col_name] < value
elif
operator
=
=
"<="
:
mask
=
df[col_name] <
=
value
elif
operator
=
=
">="
:
mask
=
df[col_name] >
=
value
elif
operator
=
=
"!="
:
mask
=
df[col_name] !
=
value
elif
operator
=
=
"包含"
:
mask
=
df[col_name].
str
.contains(value, na
=
False
)
elif
operator
=
=
"不包含"
:
mask
=
~df[col_name].
str
.contains(value, na
=
False
)
elif
operator
=
=
"开头为"
:
mask
=
df[col_name].
str
.startswith(value, na
=
False
)
elif
operator
=
=
"结尾为"
:
mask
=
df[col_name].
str
.endswith(value, na
=
False
)
elif
operator
=
=
"为空"
:
mask
=
df[col_name].isna() | (df[col_name]
=
=
"")
elif
operator
=
=
"不为空"
:
mask
=
~df[col_name].isna() & (df[col_name] !
=
"")
masks.append(mask)
if
masks:
combined_mask
=
masks[
0
]
for
mask
in
masks[
1
:]:
if
logic
=
=
"AND"
:
combined_mask &
=
mask
else
:
combined_mask |
=
mask
filtered_df
=
df[combined_mask]
else
:
filtered_df
=
df.copy()
self
.table_widget.setRowCount(
len
(filtered_df))
for
row
in
range
(
len
(filtered_df)):
for
col
in
range
(
len
(filtered_df.columns)):
self
.table_widget.setItem(row, col,
QTableWidgetItem(
str
(filtered_df.iloc[row, col])))
self
.status_bar.showMessage(
f
"已筛选出{len(filtered_df)}条记录 (共{len(df)}条)"
+
f
" | 使用{len(condition_widgets)}个条件{logic}组合"
)
else
:
self
.status_bar.showMessage(
"筛选已取消"
)
def
_data_cleaning(
self
):
from
PyQt6.QtWidgets
import
QInputDialog
if
self
.table_widget.rowCount()
=
=
0
or
self
.table_widget.columnCount()
=
=
0
:
self
.status_bar.showMessage(
"表格中没有数据可清洗"
)
return
columns
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
header
=
self
.table_widget.horizontalHeaderItem(col)
columns.append(header.text()
if
header
else
f
"Column{col+1}"
)
column, ok
=
QInputDialog.getItem(
self
,
"选择清洗列"
,
"请选择要清洗的列:"
,
columns,
0
,
False
)
if
not
ok:
return
col_index
=
columns.index(column)
methods
=
[
"删除空值行"
,
"填充默认值"
,
"删除重复行"
]
method, ok
=
QInputDialog.getItem(
self
,
"选择清洗方式"
,
"请选择清洗方式:"
,
methods,
0
,
False
)
if
not
ok:
return
try
:
if
method
=
=
"删除空值行"
:
rows_to_keep
=
[]
for
row
in
range
(
self
.table_widget.rowCount()):
item
=
self
.table_widget.item(row, col_index)
if
item
and
item.text().strip():
rows_to_keep.append(row)
new_data
=
[]
for
row
in
rows_to_keep:
row_data
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
item
=
self
.table_widget.item(row, col)
row_data.append(item.text()
if
item
else
"")
new_data.append(row_data)
self
._update_table_with_data(new_data)
self
.status_bar.showMessage(f
"已删除{self.table_widget.rowCount() - len(rows_to_keep)}条空值行"
)
elif
method
=
=
"填充默认值"
:
default_value, ok
=
QInputDialog.getText(
self
,
"输入默认值"
,
f
"请输入{column}列的默认值:"
)
if
ok:
for
row
in
range
(
self
.table_widget.rowCount()):
item
=
self
.table_widget.item(row, col_index)
if
not
item
or
not
item.text().strip():
self
.table_widget.setItem(row, col_index, QTableWidgetItem(default_value))
self
.status_bar.showMessage(f
"已将{column}列的空值填充为: {default_value}"
)
elif
method
=
=
"删除重复行"
:
unique_values
=
set
()
rows_to_keep
=
[]
for
row
in
range
(
self
.table_widget.rowCount()):
item
=
self
.table_widget.item(row, col_index)
value
=
item.text()
if
item
else
""
if
value
not
in
unique_values:
unique_values.add(value)
rows_to_keep.append(row)
new_data
=
[]
for
row
in
rows_to_keep:
row_data
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
item
=
self
.table_widget.item(row, col)
row_data.append(item.text()
if
item
else
"")
new_data.append(row_data)
self
._update_table_with_data(new_data)
self
.status_bar.showMessage(f
"已删除{self.table_widget.rowCount() - len(rows_to_keep)}条重复行"
)
except
Exception as e:
self
.status_bar.showMessage(f
"数据清洗失败: {str(e)}"
)
def
_update_table_with_data(
self
, data):
self
.table_widget.clearContents()
self
.table_widget.setRowCount(
len
(data))
for
row
in
range
(
len
(data)):
for
col
in
range
(
len
(data[row])):
item
=
QTableWidgetItem(data[row][col])
self
.table_widget.setItem(row, col, item)
def
_visualize_data(
self
):
from
PyQt6.QtWidgets
import
QInputDialog, QMessageBox
if
self
.table_widget.rowCount()
=
=
0
or
self
.table_widget.columnCount()
=
=
0
:
self
.status_bar.showMessage(
"表格中没有数据可可视化"
)
return
columns
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
header
=
self
.table_widget.horizontalHeaderItem(col)
columns.append(header.text()
if
header
else
f
"Column{col+1}"
)
column, ok
=
QInputDialog.getItem(
self
,
"选择可视化列"
,
"请选择要可视化的列:"
,
columns,
0
,
False
)
if
not
ok:
return
col_index
=
columns.index(column)
chart_types
=
[
"柱状图"
,
"折线图"
,
"饼图"
,
"箱线图"
,
"散点图"
]
chart_type, ok
=
QInputDialog.getItem(
self
,
"选择图表类型"
,
"请选择图表类型:"
,
chart_types,
0
,
False
)
if
not
ok:
return
numeric_data
=
[]
labels
=
[]
for
row
in
range
(
self
.table_widget.rowCount()):
item
=
self
.table_widget.item(row, col_index)
if
item
and
item.text().strip():
try
:
value
=
float
(item.text())
numeric_data.append(value)
labels.append(
str
(row
+
1
))
except
ValueError:
pass
if
not
numeric_data:
self
.status_bar.showMessage(f
"{column}列没有可可视化的数值数据"
)
return
try
:
plt.figure(figsize
=
(
8
,
6
))
if
chart_type
=
=
"柱状图"
:
plt.bar(labels, numeric_data)
plt.title(f
"{column}列柱状图"
)
plt.xlabel(
"行号"
)
plt.ylabel(
"数值"
)
elif
chart_type
=
=
"折线图"
:
plt.plot(labels, numeric_data, marker
=
'o'
)
plt.title(f
"{column}列折线图"
)
plt.xlabel(
"行号"
)
plt.ylabel(
"数值"
)
elif
chart_type
=
=
"饼图"
:
plt.pie(numeric_data, labels
=
labels, autopct
=
'%1.1f%%'
)
plt.title(f
"{column}列饼图"
)
elif
chart_type
=
=
"箱线图"
:
sns.boxplot(data
=
numeric_data)
plt.title(f
"{column}列箱线图"
)
plt.ylabel(
"数值"
)
elif
chart_type
=
=
"散点图"
:
plt.scatter(
range
(
len
(numeric_data)), numeric_data)
plt.title(f
"{column}列散点图"
)
plt.xlabel(
"索引"
)
plt.ylabel(
"数值"
)
plt.tight_layout()
plt.show()
self
.status_bar.showMessage(f
"已生成{column}列的{chart_type}"
)
except
Exception as e:
self
.status_bar.showMessage(f
"数据可视化失败: {str(e)}"
)
def
_data_analysis(
self
):
from
PyQt6.QtWidgets
import
QMessageBox
import
numpy as np
if
self
.table_widget.rowCount()
=
=
0
or
self
.table_widget.columnCount()
=
=
0
:
self
.status_bar.showMessage(
"表格中没有数据可分析"
)
return
try
:
stats
=
[]
for
col
in
range
(
self
.table_widget.columnCount()):
data
=
[]
for
row
in
range
(
self
.table_widget.rowCount()):
item
=
self
.table_widget.item(row, col)
if
item
and
item.text().strip():
try
:
value
=
float
(item.text())
data.append(value)
except
ValueError:
pass
if
data:
header
=
self
.table_widget.horizontalHeaderItem(col)
col_name
=
header.text()
if
header
else
f
"Column{col+1}"
stats.append(f
"{col_name}列统计结果:"
)
stats.append(f
"-----------------"
)
stats.append(f
"数据个数: {len(data)}"
)
stats.append(f
"平均值: {np.mean(data):.2f}"
)
stats.append(f
"标准差: {np.std(data):.2f}"
)
stats.append(f
"最小值: {min(data):.2f}"
)
stats.append(f
"25%分位数: {np.percentile(data, 25):.2f}"
)
stats.append(f
"中位数: {np.median(data):.2f}"
)
stats.append(f
"75%分位数: {np.percentile(data, 75):.2f}"
)
stats.append(f
"最大值: {max(data):.2f}"
)
stats.append("")
if
stats:
QMessageBox.information(
self
,
"基础统计结果"
,
"\n"
.join(stats)
)
self
.status_bar.showMessage(
"已完成基础统计分析"
)
else
:
self
.status_bar.showMessage(
"没有找到可分析的数值数据"
)
except
Exception as e:
self
.status_bar.showMessage(f
"数据分析失败: {str(e)}"
)
def
_show_about(
self
):
self
.status_bar.showMessage(
"关于功能待实现"
)
if
__name__
=
=
"__main__"
:
app
=
QApplication(sys.argv)
window
=
DataAnalysisPlatform()
window.show()
sys.exit(app.
exec
())