智能杯竞赛,是一道数据分析处理的题目。题目后续更新。先上传一部分我做的工作。
task1.1
import pandas as pd
data = pd.read_csv('E:/data/附件1.csv', encoding="gbk")
# 在data中地点这列,选取含有字符串所在的所有行
dataA = data[data['地点'].isin(["A"])]
dataB = data[data['地点'].isin(["B"])]
dataC = data[data['地点'].isin(["C"])]
dataD = data[data['地点'].isin(["D"])]
dataE = data[data['地点'].isin(["E"])]
# 将dataframe赋值给变量
x1 = pd.DataFrame(dataA)
x2 = pd.DataFrame(dataB)
x3 = pd.DataFrame(dataC)
x4 = pd.DataFrame(dataD)
x5 = pd.DataFrame(dataE)
# 分别写入到CSV文件中
x1.to_csv('task1-1A.csv', encoding="gbk")
x2.to_csv('task1-1B.csv', encoding="gbk")
x3.to_csv('task1-1C.csv', encoding="gbk")
x4.to_csv('task1-1D.csv', encoding="gbk")
x5.to_csv('task1-1E.csv', encoding="gbk")
print(x1)
task1.2
import pandas as pd
# 读取A、B、C、D、E售货机的数据
dataA = pd.read_csv('E:/data/task1-1A.csv', encoding="gbk")
dataB = pd.read_csv('E:/data/task1-1B.csv', encoding="gbk")
dataC = pd.read_csv('E:/data/task1-1C.csv', encoding="gbk")
dataD = pd.read_csv('E:/data/task1-1D.csv', encoding="gbk")
dataE = pd.read_csv('E:/data/task1-1E.csv', encoding="gbk")
# 删除A、B、C、D、E售货机实际金额为0的异常数据
dfA = dataA[~dataA['实际金额'].isin([0]) == True]
dfB = dataB[~dataB['实际金额'].isin([0]) == True]
dfC = dataC[~dataC['实际金额'].isin([0]) == True]
dfD = dataD[~dataD['实际金额'].isin([0]) == True]
dfE = dataE[~dataE['实际金额'].isin([0]) == True]
dtdA = pd.to_datetime(dfA['支付时间'], infer_datetime_format=True)
monthA = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-04', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-05', infer_datetime_format=True))]
dtdB = pd.to_datetime(dfB['支付时间'], infer_datetime_format=True)
monthB = dfB[(pd.to_datetime(dfB['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-04', infer_datetime_format=True))
& (pd.to_datetime(dfB['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-05', infer_datetime_format=True))]
dtdC = pd.to_datetime(dfC['支付时间'], infer_datetime_format=True, errors='coerce')
monthC = dfC[(pd.to_datetime(dfC['支付时间'], infer_datetime_format=True, errors='coerce') >= pd.to_datetime('2017-04', infer_datetime_format=True, errors='coerce'))
& (pd.to_datetime(dfC['支付时间'], infer_datetime_format=True, errors='coerce') <= pd.to_datetime('2017-05', infer_datetime_format=True, errors='coerce'))]
dtdD = pd.to_datetime(dfD['支付时间'], infer_datetime_format=True)
monthD = dfD[(pd.to_datetime(dfD['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-04', infer_datetime_format=True))
& (pd.to_datetime(dfD['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-05', infer_datetime_format=True))]
dtdE = pd.to_datetime(dfE['支付时间'], infer_datetime_format=True)
monthE = dfE[(pd.to_datetime(dfE['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-04', infer_datetime_format=True))
& (pd.to_datetime(dfE['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-05', infer_datetime_format=True))]
# 计算A、B、C、D、E五台售货机四月份交易额
A1 = monthA['实际金额'].sum()
B1 = monthB['实际金额'].sum()
C1 = monthC['实际金额'].sum()
D1 = monthD['实际金额'].sum()
E1 = monthE['实际金额'].sum()
print('A售货机4月份交易额为:', A1)
print('B售货机4月份交易额为:', B1)
print('C售货机4月份交易额为:', C1)
print('D售货机4月份交易额为:', D1)
print('E售货机4月份交易额为:', E1)
# 计算A、B、C、D、E五台售货机四月份订单量
A2 = len(monthA['实际金额'])
B2 = len(monthB['实际金额'])
C2 = len(monthC['实际金额'])
D2 = len(monthD['实际金额'])
E2 = len(monthE['实际金额'])
print('A售货机4月份订单量为:', A2)
print('B售货机4月份订单量为:', B2)
print('C售货机4月份订单量为:', C2)
print('D售货机4月份订单量为:', D2)
print('E售货机4月份订单量为:', E2)
# 计算五台售货机四月份总交易额
x = A1+B1+C1+D1+E1
print('五台售货机四月份总交易额为:', x)
# 计算五台售货机四月份总订单量
y = A2+B2+C2+D2+E2
print('五台售货机四月份总订单量为:', y)
task1.3
import pandas as pd
# 读取A、B、C、D、E售货机的数据
dataA = pd.read_csv('E:/data/task1-1A.csv', encoding="gbk")
dataB = pd.read_csv('E:/data/task1-1B.csv', encoding="gbk")
dataC = pd.read_csv('E:/data/task1-1C.csv', encoding="gbk")
dataD = pd.read_csv('E:/data/task1-1D.csv', encoding="gbk")
dataE = pd.read_csv('E:/data/task1-1E.csv', encoding="gbk")
# 删除A、B、C、D、E售货机实际金额为0的异常数据
dfA = dataA[~dataA['实际金额'].isin([0]) == True]
dfB = dataB[~dataB['实际金额'].isin([0]) == True]
dfC = dataC[~dataC['实际金额'].isin([0]) == True]
dfD = dataD[~dataD['实际金额'].isin([0]) == True]
dfE = dataE[~dataE['实际金额'].isin([0]) == True]
# 分割A售货机十二个月份的数据
dtdA = pd.to_datetime(dfA['支付时间'], infer_datetime_format=True)
monthA1 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-01', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-02', infer_datetime_format=True))]
monthA2 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-02', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-03', infer_datetime_format=True))]
monthA3 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-03', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-04', infer_datetime_format=True))]
monthA4 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-04', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-05', infer_datetime_format=True))]
monthA5 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-05', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-06', infer_datetime_format=True))]
monthA6 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-06', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-07', infer_datetime_format=True))]
monthA7 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-07', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-08', infer_datetime_format=True))]
monthA8 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-08', infer_datetime_format=True))
& (pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) <= pd.to_datetime('2017-09', infer_datetime_format=True))]
monthA9 = dfA[(pd.to_datetime(dfA['支付时间'], infer_datetime_format=True) >= pd.to_datetime('2017-09', infer_datetime_format=True))
& (pd.to_datetime(dfA