import json
from pyecharts import options as opts
from pyecharts.charts import Pie, Bar, Funnel, Page, PictorialBar, WordCloud,Line
from pyecharts.globals import ThemeType, SymbolType
from pyspark import SparkConf, SparkContext
import os
# 设置 Python 环境
os.environ["PYSPARK_PYTHON"] = r"C:\Users\h\AppData\Local\Programs\Python\Python38\python.exe"
# 配置 Spark
conf = SparkConf().setMaster("local[*]").setAppName("GameLevelDistribution")
sc = SparkContext(conf=conf)
# 导入 JSON 数据
with open("game_data.json", encoding="utf-8") as f:
data = json.load(f)
# 将数据转换为 RDD
rdd = sc.parallelize(data)
# 提取 level 数据
level_rdd = rdd.map(lambda x: x.get('level', '未知等级'))
# 统计每个 level 的出现次数
level_counts = level_rdd.countByValue()
# 将统计结果转换为列表,格式为 [("等级1", 次数1), ("等级2", 次数2), ...]
level_data = [(level, count) for level, count in level_counts.items()]
# 创建饼图
def drawPie():
# 按等级从小到大排序
sorted_level_data = sorted(level_data, key=lambda x: int(x[0].replace('级', '')))
c = (
Pie(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add("", sorted_level_data, center=["50%", "63%"])
.set_global_opts(
title_opts=opts.TitleOpts(
title="游戏等级分布",
title_textstyle_opts=opts.TextStyleOpts(color="white")
),
legend_opts=opts.LegendOpts(
pos_left="30%",
textstyle_opts=opts.TextStyleOpts(color="white"),
),
toolbox_opts=opts.ToolboxOpts(is_show=False), # 隐藏工具栏
datazoom_opts=None # 禁用数据缩放
)
.set_series_opts(
label_opts=opts.LabelOpts(formatter="{b}: {c}", color="white"),
itemstyle_opts=opts.ItemStyleOpts(border_width=0),
init_opts=opts.InitOpts(renderer='svg')
)
)
return c
# 统计游戏类型分布
word_counts = (
rdd.flatMap(lambda x: [word.strip() for word in x["type"].split("/") if word.strip()])
.map(lambda word: (word, 1))
.reduceByKey(lambda x, y: x + y)
.collect()
)
# 将统计结果转换为漏斗图需要的格式
funnel_data = [(word, count) for word, count in word_counts]
# 创建漏斗图
def drawFunnel():
l = (
Funnel(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add(
series_name="游戏类型分布",
data_pair=funnel_data,
gap=2,
tooltip_opts=opts.TooltipOpts(trigger="item", formatter="{a} <br/>{b} : {c}"),
label_opts=opts.LabelOpts(
is_show=True,
position="inside",
color="white"
),
itemstyle_opts=opts.ItemStyleOpts(
border_color="#fff",
border_width=0
),
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="游戏类型分布漏斗图",
title_textstyle_opts=opts.TextStyleOpts(color="white")
),
legend_opts=opts.LegendOpts(
pos_top="bottom",
pos_left="center",
textstyle_opts=opts.TextStyleOpts(color="white")
),
toolbox_opts=opts.ToolboxOpts(is_show=False), # 隐藏工具栏
datazoom_opts=None # 禁用数据缩放
)
)
return l
# 统计游戏类型分布及平均评分
def split_types(game):
types = game["type"].split(" / ")
return [(t.strip(), 1) for t in types]
type_counts = rdd.flatMap(split_types).reduceByKey(lambda a, b: a + b).collectAsMap()
def split_types_with_score(game):
types = game["type"].split(" / ")
score = float(game["score"])
return [(t.strip(), score) for t in types]
type_scores = (
rdd.flatMap(split_types_with_score)
.groupByKey()
.mapValues(lambda scores: round(sum(scores) / len(scores), 2))
.collectAsMap()
)
# 准备数据
types = list(type_counts.keys())
counts = [type_counts[t] for t in types]
scores = [f"{score:.2f}" for score in [type_scores[t] for t in types]]
# 创建柱状图
def drawLine():
line = (
Line(init_opts=opts.InitOpts(theme="light"))
.add_xaxis(xaxis_data=types)
.add_yaxis(
series_name="游戏数量",
y_axis=counts,
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
markline_opts=opts.MarkLineOpts(
data=[opts.MarkLineItem(type_="average", name="平均值")]
),
)
.add_yaxis(
series_name="平均评分",
y_axis=scores,
markpoint_opts=opts.MarkPointOpts(
data=[opts.MarkPointItem(type_="max", name="最高评分")]
),
markline_opts=opts.MarkLineOpts(
data=[opts.MarkLineItem(type_="average", name="平均评分")]
),
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="游戏类型分布",
subtitle="纯属虚构",
title_textstyle_opts=opts.TextStyleOpts(color="white")
),
tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
toolbox_opts=opts.ToolboxOpts(is_show=True),
xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=False),
yaxis_opts=opts.AxisOpts(
name="数量/评分",
axislabel_opts=opts.LabelOpts(
formatter="{value}",
color="white"
)
),
legend_opts=opts.LegendOpts(textstyle_opts=opts.TextStyleOpts(color="white")),
datazoom_opts=None
)
)
return line
# 统计每一年的游戏数量
year_counts = rdd.map(lambda x: (x['year'], 1)).reduceByKey(lambda a, b: a + b).collect()
# 将结果转换为字典,方便后续处理
year_counts_dict = dict(year_counts)
# 分离“公元前”和普通年份
bc_years = {year: count for year, count in year_counts_dict.items() if '公元前' in year}
ad_years = {year: count for year, count in year_counts_dict.items() if '公元前' not in year}
# 对“公元前”年份进行排序(按绝对值降序)
sorted_bc_years = sorted(bc_years.keys(), key=lambda x: int(x.replace('公元前', '')), reverse=True)
# 对普通年份进行排序(按升序)
sorted_ad_years = sorted(ad_years.keys(), key=lambda x: int(x))
# 合并排序后的年份
sorted_years = sorted_bc_years + sorted_ad_years
# 准备柱状图数据
x_axis = sorted_years # 按排序后的年份
y_axis = [year_counts_dict[year] for year in sorted_years] # 对应的游戏数量
# 创建每年游戏数量统计的柱状图
def drawYearBar():
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add_xaxis(x_axis)
.add_yaxis("游戏数量", y_axis)
.set_global_opts(
title_opts=opts.TitleOpts(
title="每年游戏数量统计",
title_textstyle_opts=opts.TextStyleOpts(color="white")
),
datazoom_opts=[opts.DataZoomOpts(), opts.DataZoomOpts(type_="inside")],
xaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(
rotate=-15,
color="white"
)
),
yaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(color="white")
),
legend_opts=opts.LegendOpts(textstyle_opts=opts.TextStyleOpts(color="white")),
toolbox_opts=opts.ToolboxOpts(is_show=False), # 隐藏工具栏
)
)
return bar
# 1. 桌游类型词云图
wordCount = (
rdd.flatMap(lambda x: [word.strip() for word in x["type"].split("/") if word.strip()])
.map(lambda word: (word, 1))
.reduceByKey(lambda x, y: x + y)
)
def draw_wordcloud() -> WordCloud:
wc = (
WordCloud()
.add(series_name="", data_pair=wordCount.collect(), word_size_range=[10, 60])
.set_global_opts(
title_opts=opts.TitleOpts(
title="桌游类型统计",
title_textstyle_opts=opts.TextStyleOpts(font_size=20, color="white"),
pos_left="center"
),
tooltip_opts=opts.TooltipOpts(is_show=True),
toolbox_opts=opts.ToolboxOpts(is_show=False), # 隐藏工具栏
datazoom_opts=None
)
)
return wc
# 2. 桌游时长象形柱状图(第11-20名)
selected_data = data[10:20]
titles = [game["title"] for game in selected_data]
times = [int(game["time_per_person"].replace("分/人", "")) for game in selected_data]
def draw_pictorialbar() -> PictorialBar:
pb = (
PictorialBar()
.add_xaxis(titles)
.add_yaxis(
"人均游戏时长(分钟)",
times,
label_opts=opts.LabelOpts(is_show=True, color="white"),
symbol_size=22,
symbol_repeat="fixed",
symbol_offset=[0, 0],
is_symbol_clip=True,
symbol=SymbolType.ROUND_RECT,
color="#37A2DA"
)
.reversal_axis()
.set_global_opts(
title_opts=opts.TitleOpts(
title="11-20名桌游人均耗时象形图",
title_textstyle_opts=opts.TextStyleOpts(color="white")
),
legend_opts=opts.LegendOpts(pos_top="5%"),
yaxis_opts=opts.AxisOpts(
axistick_opts=opts.AxisTickOpts(is_show=False),
axisline_opts=opts.AxisLineOpts(
linestyle_opts=opts.LineStyleOpts(opacity=0)
),
axislabel_opts=opts.LabelOpts(color="white"),
),
xaxis_opts=opts.AxisOpts(
name="时长(分钟)",
axislabel_opts=opts.LabelOpts(font_size=12, color="white"),
splitline_opts=opts.SplitLineOpts(is_show=True),
),
tooltip_opts=opts.TooltipOpts(is_show=True),
toolbox_opts=opts.ToolboxOpts(is_show=False), # 隐藏工具栏
datazoom_opts=None
)
)
return pb# 创建Page对象
page = Page(layout=Page.DraggablePageLayout)
# 添加图表
page.add(
drawPie(),
drawFunnel(),
drawLine(),
drawYearBar(),
draw_wordcloud(),
draw_pictorialbar()
)
# 将自定义HTML内容嵌入到最终的HTML文件中
html_content += page.render_embed()
html_content += "</body></html>"
# 将完整的HTML内容写入文件
with open("jishi2.html", "w", encoding="utf-8") as f:
f.write(html_content)
# 额外的逻辑,用于调整图表的排版
from bs4 import BeautifulSoup
# 打开并读取 HTML 文件
with open("jishi2.html", "r+", encoding='utf-8') as html:
html_bf = BeautifulSoup(html, 'lxml')
# 选择所有具有类名 'chart-container' 的 div 元素
divs = html_bf.select('.chart-container')
# 检查是否有足够的 div 元素
if len(divs) < 6:
print("HTML 文件中 .chart-container 元素不足 6 个")
else:
# 设置每个 div 的样式
divs[0]["style"] = "width:20%;height:30%;position:absolute;bottom:0;left:5%;" # 左下方
divs[1]["style"] = "width:25%;height:26%;position:absolute;top:20%;right:0;" # 右上方
divs[2]["style"] = "width:40%;height:30%;position:absolute;bottom:0;left:30%;" # 中间的下面
divs[3]["style"] = "width:25%;height:25%;position:absolute;top:50%;right:0;" # 右边的中间
divs[4]["style"] = "width:20%;height:10%;position:absolute;bottom:5%;right:3%;" # 右下角
divs[5]["style"] = "width:35%;height:40%;position:absolute;top:20%;left:1%;" # 左上方
# 修改 body 的样式
body = html_bf.find("body")
body["style"] = "background-image: url('img/backGround.png'); background-size: cover; background-repeat: no-repeat; background-attachment: fixed; background-color: #464646;"
# 将修改后的内容写回文件
html_new = str(html_bf)
html.seek(0, 0)
html.truncate()
html.write(html_new)
html.close()为什么这段代码运行出来,图表在页面中,鼠标可以改变图表的位置,我应该怎么解决这一问题