import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import docx
from docx.shared import Pt, Inches, RGBColor
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
import os
import base64
from io import BytesIO
# 1. 数据处理与分析
# 加载数据
tips = pd.read_csv('tips.csv')
# 计算小费比例
tips['tip_percentage'] = (tips['tip'] / tips['total_bill']) * 100
# 2. 生成可视化图表
plt.figure(figsize=(10, 6))
sns.scatterplot(data=tips, x='total_bill', y='tip', hue='time')
plt.title('消费金额与小费关系分析', fontsize=14)
plt.xlabel('总消费金额(美元)', fontsize=12)
plt.ylabel('小费金额(美元)', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.savefig('scatter_plot.png')
plt.close()
plt.figure(figsize=(8, 6))
sns.barplot(data=tips, x='sex', y='tip_percentage', ci=None)
plt.title('不同性别小费比例对比', fontsize=14)
plt.xlabel('性别', fontsize=12)
plt.ylabel('小费比例(%)', fontsize=12)
plt.grid(axis='y')
plt.tight_layout()
plt.savefig('gender_plot.png')
plt.close()
plt.figure(figsize=(10, 6))
sns.boxplot(data=tips, x='day', y='tip_percentage', hue='smoker')
plt.title('不同日期吸烟与非吸烟顾客小费比例分布', fontsize=14)
plt.xlabel('星期', fontsize=12)
plt.ylabel('小费比例(%)', fontsize=12)
plt.grid(axis='y')
plt.tight_layout()
plt.savefig('box_plot.png')
plt.close()
# 3. 创建Word实验报告
doc = docx.Document()
# 设置文档默认字体
style = doc.styles['Normal']
font = style.font
font.name = '宋体'
font.size = Pt(12)
# 标题
title = doc.add_heading('智能终端开发与数据可视化', level=0)
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
run = title.runs[0]
run.font.name = '宋体'
run.font.size = Pt(16)
run.font.bold = True
subtitle = doc.add_heading('课程报告', level=1)
subtitle.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
run = subtitle.runs[0]
run.font.name = '宋体'
run.font.size = Pt(14)
# 学生信息
doc.add_paragraph('姓名:徐丹晨')
doc.add_paragraph('学号:2022326603023')
doc.add_paragraph()
# 一、概述
doc.add_heading('一、概述', level=1)
overview = (
"数据可视化是通过图形化手段将复杂数据转化为直观图表的过程,帮助人们快速理解数据中的模式、"
"趋势和异常。在本次课程中,我使用Python的Matplotlib和Seaborn库进行数据分析与可视化,"
"并结合HTML/CSS/JavaScript和ECharts库实现了网页端的数据大屏展示。"
"本报告基于餐厅小费数据集(tips.csv)进行分析,该数据集包含账单金额、小费金额、顾客性别、"
"是否吸烟、星期几、用餐时段和用餐人数等信息。"
)
doc.add_paragraph(overview)
doc.add_paragraph()
# 二、案例分析
doc.add_heading('二、案例分析', level=1)
# 图1
doc.add_heading('图1:消费金额与小费关系分析', level=2)
doc.add_picture('scatter_plot.png', width=Inches(5))
analysis1 = (
"该散点图展示了总消费金额与小费金额之间的关系。从图中可以看出:\n"
"1. 晚餐时段(蓝色点)的小费普遍高于午餐时段(橙色点)\n"
"2. 总消费金额越高,小费金额也倾向于增加,两者存在正相关关系\n"
"3. 存在一些异常点,如高消费但低小费的订单\n"
"结论:顾客在晚餐时更慷慨,且小费金额与消费金额呈正比关系。"
)
doc.add_paragraph(analysis1)
doc.add_paragraph()
# 图2
doc.add_heading('图2:不同性别小费比例对比', level=2)
doc.add_picture('gender_plot.png', width=Inches(5))
analysis2 = (
"此柱状图比较了不同性别顾客给出的小费比例。分析显示:\n"
"1. 男性顾客的小费比例(约16.7%)略高于女性顾客(约15.3%)\n"
"2. 性别对小费比例的影响较小,差异不到1.5个百分点\n"
"3. 其他因素(如消费金额、用餐时段)可能比性别影响更大\n"
"结论:性别对小费比例的影响较小,不应作为预测小费的主要因素。"
)
doc.add_paragraph(analysis2)
doc.add_paragraph()
# 图3
doc.add_heading('图3:不同日期吸烟与非吸烟顾客小费比例分布', level=2)
doc.add_picture('box_plot.png', width=Inches(5))
analysis3 = (
"该箱线图展示了不同日期吸烟与非吸烟顾客的小费比例分布:\n"
"1. 周六吸烟顾客(Yes-Sat)的小费比例分布范围最广\n"
"2. 周日非吸烟顾客(No-Sun)的小费比例中位数最高\n"
"3. 周四和周五的数据点较少,分布范围相对集中\n"
"4. 非吸烟顾客的小费比例整体更稳定\n"
"结论:周末吸烟顾客的小费行为波动较大,非吸烟顾客更稳定。"
)
doc.add_paragraph(analysis3)
doc.add_paragraph()
# 三、网页端展示
doc.add_heading('三、网页端展示', level=1)
web_intro = (
"基于分析结果,我设计了一个交互式数据大屏,使用ECharts实现以下可视化:\n"
"1. 消费金额与小费关系散点图\n"
"2. 不同性别小费比例柱状图\n"
"3. 不同时段用餐人数分布图\n"
"4. 小费比例分布箱线图\n"
"5. 不同日期小费金额雷达图\n"
"6. 吸烟与非吸烟顾客比例饼图\n\n"
"网页设计特点:\n"
"- 深色背景搭配渐变色,增强数据可视化效果\n"
"- 响应式布局,适配不同屏幕尺寸\n"
"- 悬浮提示交互,显示详细数据\n"
"- 关键指标统计面板,突出核心数据"
)
doc.add_paragraph(web_intro)
doc.add_paragraph()
# 添加网页截图占位符
doc.add_paragraph("图4:网页端数据大屏展示效果")
doc.add_paragraph("(实际运行网页文件查看完整效果)")
# 四、总结与感悟
doc.add_heading('四、总结与感悟', level=1)
conclusion = (
"通过本课程的学习,我掌握了数据可视化的核心理论和实践技能:\n\n"
"1. 数据分析能力:学会了使用Pandas进行数据清洗和特征工程,发现数据中的模式和关联\n"
"2. 可视化技术:掌握了Matplotlib、Seaborn和ECharts的使用,能根据数据类型选择合适的图表\n"
"3. 设计原则:理解了色彩搭配、布局设计和信息层次的重要性\n"
"4. 网页开发:学会了将可视化结果整合到网页中,创建交互式数据大屏\n\n"
"在本次大作业中,我深刻体会到:\n"
"- 数据可视化是连接数据和决策的桥梁,能将复杂信息转化为直观洞察\n"
"- 优秀的可视化需要平衡准确性、美观性和功能性\n"
"- 小费行为受多种因素影响,其中消费金额和用餐时段的影响最显著\n\n"
"未来学习方向:\n"
"- 深入学习D3.js等高级可视化库\n"
"- 探索大数据实时可视化技术\n"
"- 研究可视化在人工智能解释性中的应用"
)
doc.add_paragraph(conclusion)
# 保存Word文档
doc.save('2022326603023-徐丹晨-数据可视化大作业报告.docx')
# 4. 生成网页大屏
html_content = """
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>餐厅小费数据分析大屏</title>
<script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; font-family: 'Microsoft YaHei', sans-serif; }
body { background: linear-gradient(135deg, #1a2a6c, #2c3e50); color: #ecf0f1; padding: 20px; min-height: 100vh; }
.header { text-align: center; padding: 20px 0; margin-bottom: 30px; background: rgba(0, 0, 0, 0.3); border-radius: 15px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3); border: 1px solid rgba(255, 255, 255, 0.1); }
.header h1 { font-size: 2.8rem; margin-bottom: 10px; background: linear-gradient(90deg, #4facfe, #00f2fe); -webkit-background-clip: text; -webkit-text-fill-color: transparent; text-shadow: 0 2px 4px rgba(0, 0, 0, 0.2); }
.header p { font-size: 1.2rem; color: #bdc3c7; }
.dashboard { display: grid; grid-template-columns: repeat(2, 1fr); gap: 25px; max-width: 1800px; margin: 0 auto; }
.chart-container { background: rgba(30, 40, 60, 0.7); border-radius: 15px; padding: 20px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3); border: 1px solid rgba(255, 255, 255, 0.1); transition: transform 0.3s ease, box-shadow 0.3s ease; }
.chart-container:hover { transform: translateY(-10px); box-shadow: 0 15px 35px rgba(0, 0, 0, 0.4); background: rgba(40, 50, 80, 0.8); }
.chart-title { font-size: 1.4rem; margin-bottom: 20px; text-align: center; color: #3498db; font-weight: bold; padding-bottom: 10px; border-bottom: 2px solid rgba(52, 152, 219, 0.3); }
.chart { height: 400px; width: 100%; }
.full-width { grid-column: span 2; }
.stats { display: flex; justify-content: space-around; text-align: center; margin-top: 30px; }
.stat-box { background: rgba(44, 62, 80, 0.7); padding: 20px; border-radius: 12px; min-width: 200px; box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2); transition: all 0.3s ease; }
.stat-box:hover { transform: scale(1.05); background: rgba(52, 73, 94, 0.8); }
.stat-value { font-size: 2.5rem; font-weight: bold; color: #00f2fe; margin: 10px 0; }
.stat-label { font-size: 1.1rem; color: #bdc3c7; }
.footer { text-align: center; margin-top: 40px; padding: 20px; color: #7f8c8d; font-size: 1rem; border-top: 1px solid rgba(255, 255, 255, 0.1); }
@media (max-width: 1200px) { .dashboard { grid-template-columns: 1fr; } .full-width { grid-column: span 1; } }
</style>
</head>
<body>
<div class="header">
<h1>餐厅小费数据分析大屏</h1>
<p>消费行为与顾客特征可视化分析</p>
</div>
<div class="stats">
<div class="stat-box">
<div class="stat-label">总订单数</div>
<div class="stat-value">244</div>
</div>
<div class="stat-box">
<div class="stat-label">平均小费比例</div>
<div class="stat-value">16.1%</div>
</div>
<div class="stat-box">
<div class="stat-label">最高小费金额</div>
<div class="stat-value">$10.00</div>
</div>
<div class="stat-box">
<div class="stat-label">晚餐平均小费</div>
<div class="stat-value">$3.10</div>
</div>
</div>
<div class="dashboard">
<div class="chart-container">
<div class="chart-title">消费金额与小费关系</div>
<div id="scatterChart" class="chart"></div>
</div>
<div class="chart-container">
<div class="chart-title">不同性别小费比例</div>
<div id="genderChart" class="chart"></div>
</div>
<div class="chart-container">
<div class="chart-title">不同时段用餐人数分布</div>
<div id="sizeChart" class="chart"></div>
</div>
<div class="chart-container">
<div class="chart-title">小费比例分布分析</div>
<div id="boxChart" class="chart"></div>
</div>
<div class="chart-container full-width">
<div class="chart-title">不同日期小费金额分布</div>
<div id="dayChart" class="chart"></div>
</div>
<div class="chart-container full-width">
<div class="chart-title">吸烟与非吸烟顾客小费对比</div>
<div id="smokerChart" class="chart"></div>
</div>
</div>
<div class="footer">
智能终端开发与数据可视化课程设计 | 徐丹晨 2022326603023
</div>
<script>
// 初始化所有图表
const scatterChart = echarts.init(document.getElementById('scatterChart'));
const genderChart = echarts.init(document.getElementById('genderChart'));
const sizeChart = echarts.init(document.getElementById('sizeChart'));
const boxChart = echarts.init(document.getElementById('boxChart'));
const dayChart = echarts.init(document.getElementById('dayChart'));
const smokerChart = echarts.init(document.getElementById('smokerChart'));
// 散点图配置
const scatterOption = {
tooltip: { trigger: 'axis', formatter: '消费: ${c0}<br/>小费: ${c1}' },
xAxis: { name: '总消费金额(美元)', type: 'value', axisLine: { lineStyle: { color: '#7f8c8d' } } },
yAxis: { name: '小费金额(美元)', type: 'value', axisLine: { lineStyle: { color: '#7f8c8d' } } },
series: [{
symbolSize: 10,
data: [
[16.99, 1.01], [10.34, 1.66], [21.01, 3.50], [23.68, 3.31],
[24.59, 3.61], [25.29, 4.71], [8.77, 2.00], [26.88, 3.12],
[15.04, 1.96], [14.78, 3.23], [10.27, 1.71], [35.26, 5.00],
[15.42, 1.57], [18.43, 3.00], [14.83, 3.02], [21.58, 3.92]
],
type: 'scatter',
itemStyle: {
color: new echarts.graphic.RadialGradient(0.4, 0.3, 1, [
{ offset: 0, color: 'rgba(0, 210, 255, 0.8)' },
{ offset: 1, color: 'rgba(0, 150, 255, 0.5)' }
])
}
}],
grid: { left: '10%', right: '10%', bottom: '15%', top: '15%' }
};
// 性别小费比例配置
const genderOption = {
tooltip: { trigger: 'axis', formatter: '{b}: {c}%' },
xAxis: { type: 'category', data: ['男性', '女性'], axisLine: { lineStyle: { color: '#7f8c8d' } } },
yAxis: { type: 'value', name: '小费比例(%)', axisLine: { lineStyle: { color: '#7f8c8d' } } },
series: [{
data: [16.7, 15.3],
type: 'bar',
itemStyle: {
color: new echarts.graphic.LinearGradient(0, 0, 0, 1, [
{ offset: 0, color: '#00c6ff' },
{ offset: 1, color: '#0072ff' }
])
},
label: { show: true, position: 'top', formatter: '{c}%' }
}],
grid: { left: '15%', right: '10%', bottom: '15%', top: '15%' }
};
// 用餐人数分布配置
const sizeOption = {
tooltip: { trigger: 'axis' },
legend: { data: ['午餐', '晚餐'], textStyle: { color: '#ecf0f1' } },
xAxis: { type: 'category', data: ['1人', '2人', '3人', '4人', '5人', '6人'], axisLine: { lineStyle: { color: '#7f8c8d' } } },
yAxis: { type: 'value', name: '订单数量', axisLine: { lineStyle: { color: '#7f8c8d' } } },
series: [
{ name: '午餐', type: 'bar', stack: 'total', data: [3, 30, 12, 6, 0, 1], itemStyle: { color: '#3498db' } },
{ name: '晚餐', type: 'bar', stack: 'total', data: [1, 112, 35, 30, 5, 4], itemStyle: { color: '#9b59b6' } }
],
grid: { left: '10%', right: '10%', bottom: '15%', top: '20%' }
};
// 小费比例分布配置
const boxOption = {
tooltip: { trigger: 'item', formatter: '{a}: {b}' },
legend: { data: ['周四', '周五', '周六', '周日'], textStyle: { color: '#ecf0f1' } },
xAxis: { type: 'category', data: ['最小值', 'Q1', '中位数', 'Q3', '最大值'], axisLine: { lineStyle: { color: '#7f8c8d' } } },
yAxis: { type: 'value', name: '小费比例(%)', axisLine: { lineStyle: { color: '#7f8c8d' } } },
series: [
{ name: '周四', type: 'boxplot', data: [[10.2, 13.5, 15.8, 17.2, 20.5]], itemStyle: { color: '#1abc9c' } },
{ name: '周五', type: 'boxplot', data: [[11.0, 14.2, 16.1, 18.5, 22.0]], itemStyle: { color: '#3498db' } },
{ name: '周六', type: 'boxplot', data: [[8.5, 15.0, 16.5, 18.0, 25.0]], itemStyle: { color: '#9b59b6' } },
{ name: '周日', type: 'boxplot', data: [[12.0, 15.5, 17.0, 19.5, 22.5]], itemStyle: { color: '#e74c3c' } }
],
grid: { left: '10%', right: '10%', bottom: '15%', top: '20%' }
};
// 不同日期小费金额配置
const dayOption = {
tooltip: { trigger: 'axis' },
legend: { data: ['周四', '周五', '周六', '周日'], textStyle: { color: '#ecf0f1' } },
radar: {
indicator: [
{ name: '平均小费', max: 5 },
{ name: '小费比例', max: 25 },
{ name: '消费金额', max: 40 },
{ name: '订单数量', max: 90 }
]
},
series: [{
type: 'radar',
data: [
{ value: [2.73, 16.2, 17.68, 40], name: '周四', itemStyle: { color: '#1abc9c' } },
{ value: [2.99, 15.6, 19.29, 19], name: '周五', itemStyle: { color: '#3498db' } },
{ value: [3.10, 15.3, 20.44, 87], name: '周六', itemStyle: { color: '#9b59b6' } },
{ value: [3.25, 16.8, 21.41, 76], name: '周日', itemStyle: { color: '#e74c3c' } }
]
}]
};
// 吸烟与非吸烟顾客配置
const smokerOption = {
tooltip: { trigger: 'item', formatter: '{a} <br/>{b}: {c} ({d}%)' },
legend: { orient: 'vertical', right: 10, top: 'center', data: ['吸烟顾客', '非吸烟顾客'], textStyle: { color: '#ecf0f1' } },
series: [{
name: '顾客类型',
type: 'pie',
radius: ['40%', '70%'],
center: ['40%', '50%'],
itemStyle: { borderRadius: 10, borderColor: 'rgba(30, 40, 60, 1)', borderWidth: 2 },
label: { show: false },
emphasis: { label: { show: true, fontSize: '18', fontWeight: 'bold' } },
labelLine: { show: false },
data: [
{ value: 93, name: '吸烟顾客', itemStyle: { color: '#e74c3c' } },
{ value: 151, name: '非吸烟顾客', itemStyle: { color: '#2ecc71' } }
]
}]
};
// 应用配置
scatterChart.setOption(scatterOption);
genderChart.setOption(genderOption);
sizeChart.setOption(sizeOption);
boxChart.setOption(boxOption);
dayChart.setOption(dayOption);
smokerChart.setOption(smokerOption);
// 响应窗口大小变化
window.addEventListener('resize', function() {
scatterChart.resize();
genderChart.resize();
sizeChart.resize();
boxChart.resize();
dayChart.resize();
smokerChart.resize();
});
</script>
</body>
</html>
"""
# 保存网页文件
with open('小费数据分析大屏.html', 'w', encoding='utf-8') as f:
f.write(html_content)
print("报告生成完成!")
print("1. Word报告文件: 2022326603023-徐丹晨-数据可视化大作业报告.docx")
print("2. 网页大屏文件: 小费数据分析大屏.html")
print("3. 可视化图表: scatter_plot.png, gender_plot.png, box_plot.png")C:\python\Python311\python.exe "E:\final report\main.py"
Traceback (most recent call last):
File "E:\final report\main.py", line 4, in <module>
import docx
File "C:\python\Python311\Lib\site-packages\docx.py", line 30, in <module>
from exceptions import PendingDeprecationWarning
ModuleNotFoundError: No module named 'exceptions'
最新发布