import numpy as np
import pandas as pd
import os
import xlrd
from pandas import to_datetime
import matplotlib.pyplot as plt
import xlwt
import datetime, time
# 设置中文显示
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# === 问题一:数据加载与预处理 ===
inputdir = r'D:\\主桌面\\E题\\水表数据'
df_empty = pd.DataFrame(columns=['水表名', '水表号', '采集时间', '上次读数', '当前读数', '用量'])
# 遍历文件夹读取所有Excel文件
for parents, dirnames, filenames in os.walk(inputdir):
for filename in filenames:
df = pd.read_excel(os.path.join(parents, filename))
df_empty = df_empty.append(df, ignore_index=True)
# 加载水表层级附件并清理缺失值
data_index = pd.read_excel('D:\\主桌面\\E题\\附件_水表层级(1).xlsx')
data_index.dropna(axis=0, thresh=7, inplace=True)
data = df_empty.copy()
# 计算表显数据差值(问题一关键步骤)
data_all['表显差值'] = data_all['当前读数'] - data_all['上次读数']
data_all['差值'] = data_all['表显差值'] - data_all['用量']
data_all['差值'].abs().sum()
data_all = data_all[data_all['差值'].abs() == 0.01] # 过滤异常差值
# === 问题一:功能区用水量统计 ===
# 各水表总用水量
sbm_sl = data_all['用量'].groupby(by=data_all['序号']).sum()
# 12个功能区总用水量
gnq_sl = data_all['用量'].groupby(by=data_all['功能分类']).sum()
# 时间格式转换
data_all['采集时间'] = to_datetime(data_all.采集时间, format='%Y/%m/%d %H:%M:%S')
gnq_sl.to_excel('D:\\主桌面\\E题\\功能区总水量.xls') # 输出结果
# 功能区按天统计用水量
gnq_day = pd.DataFrame(columns=['总表', '宿舍', '教学楼', '办公楼', '食堂', '科研楼', '运动场所', '后勤保卫', '校医院', '澡堂', '酒店', '图书馆', '其他'])
for i in np.arange(13):
temp = data_all[data_all['功能分类'] == i]
temp.set_index('采集时间', inplace=True)
gnq_day.iloc[:, i-1] = temp['用量'].resample('D').sum()
gnq_day.to_excel('D:\\主桌面\\E题\\功能区每天水量.xls') # 输出日报表
# 功能区按月统计用水量
gnq_month = pd.DataFrame(columns=['总表', '宿舍', '教学楼', '办公楼', '食堂', '科研楼', '运动场所', '后勤保卫', '校医院', '澡堂', '酒店', '图书馆', '其他'])
for i in np.arange(13):
temp = data_all[data_all['功能分类'] == i]
temp.set_index('采集时间', inplace=True)
gnq_month.iloc[:, i-1] = temp['用量'].resample('M').sum()
gnq_month.to_excel('D:\\主桌面\\E题\\功能区每月水量.xls') # 输出月报表
# 功能区按小时统计用水量
gnq_hour = pd.DataFrame(columns=['总表', '宿舍', '教学楼', '办公楼', '食堂', '科研楼', '运动场所', '后勤保卫', '校医院', '澡堂', '酒店', '图书馆', '其他'])
for i in np.arange(13):
temp = data_all[data_all['功能分类'] == i]
temp.set_index('采集时间', inplace=True)
gnq_hour.iloc[:, i-1] = temp['用量'].resample('H').sum() # 按小时聚合
# 注:此处省略输出代码,实际需添加类似 gnq_hour.to_excel(...)
# === 问题二:水表层级数据处理(误差分析基础) ===
# 一级水表数据处理
sbm_1 = data_all[data_all['一级表计编码'].notnull()]
sbm_1_yl = pd.DataFrame(columns=sbm_1['一级表计编码'].unique())
sbm_1_yl15 = pd.DataFrame(columns=sbm_1['一级表计编码'].unique())
for i in sbm_1['一级表计编码'].unique():
temp = sbm_1[sbm_1['一级表计编码'] == i]
temp.set_index('采集时间', inplace=True)
sbm_1_yl[i] = temp['用量'].resample('D').sum() # 按天汇总
sbm_1_yl15[i] = temp['用量'].resample('15min').sum() # 按15分钟汇总
# 一级水表总用水量计算(含异常值处理)
for i in sbm_1['一级表计编码'].unique():
temp = sbm_1_yl[i]
temp[temp > 5 * temp.mean()] = temp.mean() # 异常值替换为均值
sbm_1_yl = sbm_1_yl.fillna(0)
zysl_yj = (sbm_1_yl.sum(axis=0)).sum() # 计算一级水表总用水量
# 二级水表数据处理(类似一级)
sbm_2 = data_all[data_all['二级表计编码'].notnull()]
sbm_2_yl = pd.DataFrame(columns=sbm_2['二级表计编码'].unique())
sbm_2_yl15 = pd.DataFrame(columns=sbm_2['二级表计编码'].unique())
for i in sbm_2['二级表计编码'].unique():
temp = sbm_2[sbm_2['二级表计编码'] == i]
temp.set_index('采集时间', inplace=True)
sbm_2_yl[i] = temp['用量'].resample('D').sum()
sbm_2_yl15[i] = temp['用量'].resample('15min').sum()
# 二级水表异常值处理与总用水量
for i in sbm_2['二级表计编码'].unique():
temp = sbm_2_yl[i]
temp[temp > 5 * temp.mean()] = temp.mean()
sbm_2_yl = sbm_2_yl.fillna(0)
zysl_ej = (sbm_2_yl.sum(axis=0)).sum() # 二级水表总用水量
# 合并各级水表数据(用于后续误差分析)
sbm_yl = pd.concat([sbm_1_yl, sbm_2_yl], axis=1)
sbm_yl.to_excel('D:\\主桌面\\E题\\各级用量.xls') # 输出层级用水量
代码如上
报错内容如下
E:\Anaconda\python.exe "C:\Users\cheny\Desktop\PythonProject2\图3.一、二级水表表显用水量(年).py"
Traceback (most recent call last):
File "C:\Users\cheny\Desktop\PythonProject2\图3.一、二级水表表显用水量(年).py", line 7, in <module>
import xlwt
File "E:\Anaconda\Lib\site-packages\xlwt\__init__.py", line 5, in <module>
from .Row import Row
File "E:\Anaconda\Lib\site-packages\xlwt\Row.py", line 9, in <module>
from . import ExcelFormula
File "E:\Anaconda\Lib\site-packages\xlwt\ExcelFormula.py", line 8, in <module>
from . import ExcelFormulaParser, ExcelFormulaLexer
File "E:\Anaconda\Lib\site-packages\xlwt\ExcelFormulaLexer.py", line 53, in <module>
_re = recompile(
^^^^^^^^^^
File "E:\Anaconda\Lib\re\__init__.py", line 228, in compile
return _compile(pattern, flags)
^^^^^^^^^^^^^^^^^^^^^^^^
File "E:\Anaconda\Lib\re\__init__.py", line 307, in _compile
p = _compiler.compile(pattern, flags)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "E:\Anaconda\Lib\re\_compiler.py", line 745, in compile
p = _parser.parse(p, flags)
^^^^^^^^^^^^^^^^^^^^^^^
File "E:\Anaconda\Lib\re\_parser.py", line 980, in parse
p.state.flags = fix_flags(str, p.state.flags)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "E:\Anaconda\Lib\re\_parser.py", line 957, in fix_flags
raise ValueError("cannot use LOCALE flag with a str pattern")
ValueError: cannot use LOCALE flag with a str pattern
进程已结束,退出代码为 1
最新发布