1.引入相关的包
from datetime import date, datetime, timedelta
import calendar
import pandas as pd
from workalendar.asia import China
from dateutil import rrule
2.节假日计算
1.由于节假日包中没有2018年之前的相关数据,所以进行了手动添加并转化为dataframe:
hol = {"2016-12-31":"New year","2017-01-01":"New year", "2017-01-02":"New year", "2017-01-27":"Chinese New Year's eve",
"2017-01-28":"Chinese New Year", "2017-01-29":"Spring Festival", "2017-01-30":"Spring Festival", "2017-01-31":"Spring Festival",
"2017-02-01":"Spring Festival","2017-02-02":"Spring Festival", "2017-04-02":"Ching Ming Festival",
"2017-04-03":"Ching Ming Festival", "2017-04-04":"Ching Ming Festival", "2017-04-29":"Labour Day Holiday",
"2017-04-30":"Labour Day Holiday", "2017-05-01":"Labour Day Holiday","2017-05-28":"Dragon Boat Festival",
"2017-05-29":"Dragon Boat Festival", "2017-05-30":"Dragon Boat Festival", "2017-10-01":"National Day", "2017-10-02":"National Day",
"2017-10-03":"National Day", "2017-10-04":"National Day", "2017-10-05":"National Day", "2017-10-06":"National Day",
"2017-10-07":"National Day", "2017-10-08":"National Day", "2017-12-30":"New year", "2017-12-31":"New year"}
hol=pd.DataFrame(hol,index=[0])
hol=pd.DataFrame(hol.T.reset_index())
hol.columns={'date','festival'}
hol['date']=pd.to_datetime(hol['date'],format='%Y-%m-%d')
2.运用workalendar.asia 中China中的holidays计算节假日数据
cal = China()
def cal_festival(year):
lis = []
if type(year) != list: # eval函数就是实现list、dict、tuple与str之间的转化
year = eval(year)
for ye in year:
for x, v in cal.holidays(ye):
lis.append([str(x).replace('-', ''), v])
lis.append(['20190504', 'Labour Day Holiday'])
df = pd.DataFrame(data=lis, columns=['date', 'festival'])
fe_2018=cal_festival([2018,2020])
fe_2018['date']=fe_2018['date'].apply(lambda x: datetime.strptime(x, '%Y%m%d'))
3.对数据进行合并,并计算节假日是否是周末(为后面计算工作日做准备)
festival=pd.concat([hol,fe_2018],axis=0)
festival.sort_values(by='date',inplace=True)
festival['weekday']=festival['date'].apply(lambda x:1 if((x.weekday()+1==6) or(x.weekday()+1==7))else 0)
4.按月汇总节假日天数与周末天数
festival['year_month']=festival['date'].astype('str').apply(lambda x:x[:7])
fest_influ=festival.groupby('year_month')['festival'].count()
week_influ=festival.groupby('year_month')['weekday'].sum()
fest_influ=fest_influ.reset_index()
week_influ=week_influ.reset_index()
fest_final=pd.merge(fest_influ,week_influ,on='year_month')
print(fest_final)
3.计算工作日
其中new_model_data为时间序列数据,包括[data_date,capacity]两个字段.
def workdays(x,holidays,weekday,days_off=None):
start_date = x
# start_date
start_datetime = pd.to_datetime(start_date, '%Y-%m-%d')
start_datetime_2 = datetime.date(start_datetime)
# 计算每月最后一天的date
days_in_month = start_datetime_2.replace(day=28) + timedelta(days=4)
# print( start_datetime_2.replace(day=28))
end_date = days_in_month - timedelta(days=days_in_month.day)
if days_off is None:
days_off = 5,6
workdays = [x for x in range(7) if x not in days_off]
days = rrule.rrule(rrule.DAILY, dtstart=start_datetime_2, until=end_date,byweekday=workdays)
return days.count() - holidays+ weekday
new_model_data['year_month']=new_model_data['data_date'].astype('str').apply(lambda x:x[:7])
new_model_da=pd.merge(new_model_data,fest_final,on='year_month',how='left')
new_model_da['month_day']= new_model_da['year_month'].apply(lambda x : calendar.monthrange(np.int(x[:4]),np.int(x[-2:]))[1])
new_model_da['festival']=new_model_da['festival'].fillna(0)
new_model_da['weekday']=new_model_da['weekday'].fillna(0)
for i in range(new_model_da.shape[0]):
new_model_da.ix[i,'workday']=workdays(new_model_da.ix[i,'data_date'],new_model_da.ix[i,'festival'],new_model_da.ix[i,'weekday'])
print(new_model_da)
由于是在大环境下的写的一部分,且部分代码命名不是很规范,希望与大家一起学习交流,部分内容借鉴网上相关代码,十分感谢!!!