[Python] 纯文本查看 复制代码
from pyecharts import options as opts
from pyecharts.charts import Bar, Pie, Line, Scatter, Grid, Page
from pyecharts.globals import ThemeType
import pandas as pd
# 加载数据
mum_baby_info = pd.read_csv('/Users/yanxue/Desktop/baby1.csv')
trade_history = pd.read_csv('/Users/yanxue/Desktop/baby.csv')
df_baby1 = pd.read_csv('/Users/yanxue/Desktop/baby_with_age.csv')
# 分析性别与购买偏好
gender_preference = trade_history.merge(mum_baby_info, on='user_id', how='left')
gender_preference = gender_preference.dropna(subset=['gender'])
gender_preference['gender'] = gender_preference['gender'].apply(lambda x: str(int(x)))
grouped = gender_preference.groupby(['gender', 'cat1']).size().unstack().reset_index()
categories = grouped.columns.tolist()[1:]
# 使用柱状图展示用户性别与购买偏好
bar1 = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add_xaxis(grouped['gender'].tolist())
)
for cat in categories:
bar1.add_yaxis(cat, grouped[cat].tolist())
bar1.set_global_opts(
title_opts=opts.TitleOpts(title="购买婴儿产品的性别偏好分析", pos_bottom="30px"),
xaxis_opts=opts.AxisOpts(name="性别"),
yaxis_opts=opts.AxisOpts(name="购买数量"),
legend_opts=opts.LegendOpts(is_show=True)
)
# 统计性别比例
gender_counts = mum_baby_info['gender'].value_counts()
data_pair = [list(z) for z in zip(gender_counts.index.tolist(), gender_counts.values.tolist())]
# 绘制饼图
pie = (
Pie()
.add("", data_pair)
.set_global_opts(title_opts=opts.TitleOpts(title="性别比例分布"))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c} ({d}%)"))
)
# 将交易时间列转换为日期类型
trade_history['day'] = pd.to_datetime(trade_history['day'], format='%Y%m%d')
# 提取月份信息
trade_history['month'] = trade_history['day'].dt.month
# 按月份和cat1属性统计销售数量
df_monthly_cat1 = trade_history.groupby(['month', 'cat1'])['buy_mount'].sum().reset_index()
# 按月份统计销售数量
df_monthly = trade_history.groupby('month')['buy_mount'].sum().reset_index()
# 自定义颜色列表
custom_palette = ['blue', 'orange', 'green', 'red', 'purple', 'brown', 'black']
# 绘制折线图
line = Line()
line.add_xaxis(df_monthly['month'].unique().tolist()) # 转换为列表
for cat1, color in zip(df_monthly_cat1['cat1'].unique(), custom_palette):
data = df_monthly_cat1[df_monthly_cat1['cat1'] == cat1]
y_data = data['buy_mount'].tolist() # 转换为列表
line.add_yaxis(str(cat1), y_data, color=color)
line.add_yaxis("总消费", df_monthly['buy_mount'].tolist(), color='black') # 添加总消费的线
line.set_global_opts(
xaxis_opts=opts.AxisOpts(name="月份"),
yaxis_opts=opts.AxisOpts(name="消费量"),
title_opts=opts.TitleOpts(title="按月份和cat1属性划分的婴儿产品消费趋势", pos_bottom="0px"),
)
# 创建散点图对象
scatter = Scatter()
# 添加数据
scatter.add_xaxis(df_baby1['age'].tolist())
scatter.add_yaxis("购买次数", df_baby1['buy_mount'].tolist(), label_opts=opts.LabelOpts(is_show=False)) # 隐藏数据标签
# 设置全局配置项
scatter.set_global_opts(
title_opts=opts.TitleOpts(title="用户年龄与购买金额关系"),
xaxis_opts=opts.AxisOpts(name="年龄", type_="value", min_=-4, max_=12), # 设置x轴范围为-4到12,并且类型为'value'
yaxis_opts=opts.AxisOpts(name="购买次数"),
)
# 统计销售量最高的商品
df_top = trade_history['cat_id'].value_counts().head(10)
# 创建条形图对象
bar2 = Bar()
# 添加数据
bar2.add_xaxis(df_top.index.tolist())
bar2.add_yaxis("销售量", df_top.values.tolist())
# 设置全局配置项
bar2.set_global_opts(
title_opts=opts.TitleOpts(title="销量排名前10的商品"),
xaxis_opts=opts.AxisOpts(name="商品"),
yaxis_opts=opts.AxisOpts(name="销售量"),
)
# 将day属性转换为datetime格式并提取年份
trade_history['day'] = pd.to_datetime(trade_history['day'], format='%Y%m%d')
trade_history['year'] = trade_history['day'].dt.year
# 统计不同年份各个产品的购买量
df_product_counts = trade_history.groupby(['year', 'cat1']).size().unstack()
# 创建条形图对象
bar3 = Bar()
# 添加数据
for cat1 in df_product_counts.columns:
bar3.add_xaxis(df_product_counts.index.tolist())
bar3.add_yaxis(cat1, df_product_counts[cat1].tolist(), stack="stack1")
# 设置全局配置项
bar3.set_global_opts(
title_opts=opts.TitleOpts(title="不同年份各个产品的购买量", pos_bottom="0px"),
xaxis_opts=opts.AxisOpts(name="年份"),
yaxis_opts=opts.AxisOpts(name="购买量"),
)
# 使用Grid将图表以一行两个的形式进行排列,并调整间距
def create_grid(chart1, chart2, pos_top="5%", height="45%"):
grid = Grid(init_opts=opts.InitOpts(width="100%", height="1200px")) # 增加页面高度设置以容纳更多图表
grid.add(
chart1,
grid_opts=opts.GridOpts(
pos_left="10%", pos_right="60%", pos_top=pos_top, height=height
),
)
grid.add(
chart2,
grid_opts=opts.GridOpts(
pos_left="60%", pos_right="10%", pos_top=pos_top, height=height
),
)
return grid
# 使用Grid将图表以一行两个的形式进行排列,并调整间距
grid1 = Grid(init_opts=opts.InitOpts(width="100%"))
grid1.add(bar1, grid_opts=opts.GridOpts(pos_left="5%", pos_right="55%", pos_top="5%"))
grid1.add(pie, grid_opts=opts.GridOpts(pos_left="55%", pos_right="5%", pos_top="5%"))
grid2 = Grid(init_opts=opts.InitOpts(width="100%"))
grid2.add(line, grid_opts=opts.GridOpts(pos_left="5%", pos_right="55%", pos_top="60%"))
grid2.add(scatter, grid_opts=opts.GridOpts(pos_left="55%", pos_right="5%", pos_top="60%"))
# 如果有更多图表,继续创建grid3, grid4, ... 并添加到Page中
# 创建页面对象并添加所有的Grid对象
page = Page(layout=Page.SimplePageLayout)
page.add(grid1, grid2) # 如果有更多的grid布局,继续添加到这里
# 保存图表到HTML文件
page.render("/Users/yanxue/Desktop/combined_charts_grid_layout.html")