import tensorflow报错'relativedelta only diffs datetime/date'的解决办法

本文介绍了一种罕见的TensorFlow安装错误'relativedeltaonlydiffsdatetime/date'及其解决办法。该错误出现在Ubuntu16.04环境下安装TensorFlow1.4.0的过程中。解决方法是在Anaconda环境中安装python-dateutil库。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

关于error:’relativedelta only diffs datetime/date’的解决办法

环境:
* ubuntu 16.04
* tensorflow1.4.0
* Anaconda3
* python3.5

我在Anaconda3环境下安装tensorflow1.4.0-cpu-only的时候遇到了一个很罕见的错误。

'relativedelta only diffs datetime/date'

解决办法:
进入刚创建好的anaconda的tensorflow环境,执行:

$ pip3 install python-dateutil 

#如果是python2的环境,则应该使用pip
$ pip install python-dateutil 
数据处理 import pandas as pd # 读 取 数 据 df=pd read_excel(附件1:污染物浓度数据 .xlsx') print(df) df['date']=df['年].map(str)+"/"+df[月'].map(str)+"/"+df[日'].map(str) pd.to_datetime(df['date']) #打印查看效果 print(df['date']) data2=pd.read_excel(附件2:气象数据 .xlsx') data2[date']=data2['V04001].map(str)+"/"+data2['V04002'].map(str)+"/"+data2['V04003'].map(str) pd.to_datetime(data2['date']) #将结果输出在另一个csv 文件中 df=df.drop(labels= '年',axis=1) df=df.drop(labels= '月',axis=1) df=df.drop(labels= '日',axis=1) filename= '附件一污染物浓度.csv' df.to_csv(filename,encoding='gb2312') #将结果输出在另一个csv 文件中 data2=data2.drop(labels='V04001,axis=1) data2=data2.drop(labels='V04002',axis=1) data2=data2.drop(labels='V04003',axis=1) filename= '附件二气象数据.csv data2.to_csv(filename,encoding='gb2312') import numpy as np from scipy import interpolate import matplotlib.pyplot as plt import time,datetimefrom datetime import datetime,date,timedelta import math # 插 值 def interpolation_value(data_gp): m 对时序数据进行插值,断开的数据用三次样条插值,不足数目的往前取均值插 param data_gp:id 分组后的dataframe return:df " x=data_gp['date_time'].values.tolist() y=data_gp['ecpm_tomorrow].values.tolist() #print(x) #print(y) #plt.scatter(x,y) #plt.plot(x,y) #plt.show( #获取需要插值的时间 lxs=len(x) insert_x=1 #插值时间列表 mean_x=[] #往前插均值时间列表 isx=0 flag=10 if lxs<10: #判断是否是连续日期,并对不连续的日期进行时间插值 for iin range(len(x)): ifi+1==len(x): break t1=int(time.mktime(time.strptime(x[i],"%Y-%m-%d")) t2=int(time.mktime(time.strptime(x[i+1],"%Y-%m-%d")) differ=(datetime.fromtimestamp(t2)-datetime.fromtimestamp(t1)).days #print(" 相差",differ," 天") while differ !=1: differ -=1 tmp=(datetime.fromtimestamp(t2)+ timedelta(days=-differ)).strftime("%Y-%m-%d") insert_x.append(tmp) isx=len(insert_x) tos=isx+lxs #如果不够10个点,往前插取均值:如第一个是现有数据前2个的均值、第二个是 现有数据前3个的均值 if tos<math.floor(lxs/2+1)+lxs: flag=math.floor(lxs/2+1) diffs=flag timx0=int(time.mktime(time.strptime(x[0],"%Y-%m-%d")) while diffs !=0: tmp=(datetime.fromtimestamp(timx0)+ timedelta(days=-diffs)).strftime("%Y-%m-%d") mean_x.append(tmp) diffs-=1 #print(insert_x) #print(mean_x) #将时间变为数字,保存对应的时间,便于插值 newxlist=x+insert_x+mean_x newxlist=sorted(newxlist) #print(newxlist) #xydict=} #for i in range(len(x): # xydict[x[i]]=y[i]xdict=0 #插值后的时间x resx_dict=0 #存放插值的结果列表, key: 时间,value:ecpm_yesterday x_list=1 # 原x 转为对应数字 x_i_list= # 待 插 值x 转为对应数字 x_m_list=1 j=0 #往前插均值x 转为对应数字 for i in range(len(newxlist)): xdict[newxlist[i]]=i+1 if newxlist[i]in x: x_list.append(xdict[newxlist[i]]) resx_dict[newxlist[i]]=y[i] j+=1 elif newxlist[i]in insert_x: x_i_list.append(xdict[newxlist[i]]) elif newxlist[i]in mean_x: x_m_list.append(xdict[newxlist[i]]) #print(xdict) #print(x_list) #print(x i list) #print(x_m_list) #print(resx_dict) #得到差值函数 linear: 线性插值 cubic:三次样条插值 #Flinear =interpolate.interpld(x_list,y,kind='linear') Flinear=interpolate.interpld(x_list,y,kind='cubic') #三次样条插值 if len(x_i_list)!=0: ynew=Flinear( x i list) ynew=np.array(ynew).tolist( ynew=[abs(round(xi,4)for xi in ynew] j=0 for i inx_i_list: k=[k for k,v in xdict.items()if v=i][0] resx_dict[k]=ynew[j] j+=1 #往前取均值插 if len(x_m_list)!=0: for iinx_m_list: k=[k for k,v in xdict.items(ifv==i][0] tmp=xdict[k]+1 value =round(sum(y[:tmp])/tmp,4) resx_dict[k]=value resx_dict=sorted(resx_dict.items(),key=lambda x:x[0],reverse=False) resx_dict=dict(resx_dict) #print(resx_dict) resx_list,resy_list=[], 口 for k,v in resx_dict.items(): resx_list.append(k) resy_list.append(v) #plt.scatter(resx_list,resy_list) #plt.plot(resx_list,resy_list) #plt.show( df={ 'date_time':resx_list, 'ecpm_tomorrow:resy_list, } data=pd.DataFrame(df) return data import pandas as pd from fancyimpute import Iterativelmputer# 读 取 Excel 文 件 df=pd.read_excel (附件一污染物浓度(无等级).xlsx') #按照时间顺序排序 df=df.sort_values(by='date') #进行多值插补 imputer=IterativeImputer() imputed_df=pd.DataFrame(imputer.fit_transform(df.iloc[:,0:7])) #将插补后的数据合并回原始数据框 df.iloc[:,0:7]=imputed_df.values #保存插补后的数据 df.to_excel(附件一污染物浓度(无等级)(插补).xlsx',encoding='gb2312',index=False) import pandas as pd# 导 入 pandas 库 #生成异常数据 data=pd.read_csv (附件二气象数据 .csv') print(data)# 打印输出 import pandas as pd # 导入数据分析库Pandas #print(data.describe()) outputfile= '描述.xls'# 输出数据路径 data.describe().to_excel(outputfile,index=True) import matplotlib.pyplot as plt # 导入图像库 plt.rcParams['font.sans-serif]=['SimHei]# 用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 import matplotlib.pyplot as plt # 导入图像库 plt.rcParams['font.sans-serif]=['SimHei']# 用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 plt.subplot(1,2,1) data[["V11291_700"].boxplot(return_type='dict') plt.subplot(1,2,2) data[["V12001_700"]].boxplot(return_type='dict') plt.show) plt.subplot(1,2,1) data[["V13305"]].boxplot(return_type='dict') plt.subplot(1,2,2) data[["V10004_700"].boxplot(return_type='dict') plt.show() import matplotlib.pyplot as plt # 导入图像库 plt.rcParams['font.sans-serif]=['SimHei']# 用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 plt.subplot(1,2,1) data[["V13003_700"]].boxplot(return_type='dict') plt.show() import numpy as np a=data["V13305"].quantile(0.75) b=data["V13305"].quantile(0.25) c=data["V13305"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V13305"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V13305"].mean()+data["V13305"].std()*4 b=data["V13305"].mean()-data["V13305"].std()*4 c=data["V13305"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True)data[[import numpy as np a=data["V10004_700"].quantile(0.75) b=data["V10004_700"].quantile(0.25) c=data["V10004_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V10004_700"]]=c #data.dropna(how='any',axis=0,inplace=True) import numpy as np a=data["V10004_700"].quantile(0.75) b=data["V10004_700"].quantile(0.25) c=data["V10004_700"] c[(c>=(a-b)*1.5+a)(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V10004_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V10004_700"].mean()+data["V10004_700"].std()*4 b=data["V10004_700"].mean()-data["V10004_700"].std()*4 c=data["V10004_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V10004_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V10004_700"].mean()+data["V10004_700"].std(*4 b=data["V10004_700"].mean()-data["V10004_700"].std()*4 c=data["V10004_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V10004_700"]]=c #data.dropna(how='any',axis=0,inplace=True)"V13305"]]=c 玉 #data.dropna(how='any',axis=0,inplace=True) import numpy as np a=data["V11291_700"].quantile(0.75) b=data["V11291_700"].quantile(0.25) c=data["V11291_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V11291_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V11291_700"].mean()+data["V11291_700"].std()*4 b=data["V11291_700"].mean()-data["V11291_700"].std()*4 c=data["V11291_700"] c[(c>=a)|(c<=b)]import numpy as np a=data["V12001_700"].quantile(0.75) b=data["V12001_700"].quantile(0.25) c=data["V12001_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V12001_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V12001_700"].mean()+data["V12001_700"].std(*4 b=data["V12001_700"].mean()-data["V12001_700"].std(*4 c=data["V12001_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V12001_700"]]=c #data.dropna(how='any',axis=0,inplace=True)=np.nan c.fillna(c.median(),inplace=True) data[["V11291_700"]]=c#data.dropna(how='any',axis=0,inplace=True) import numpy as np a=data["V12001_700"].quantile(0.75) b=data["V12001_700"].quantile(0.25) c=data["V12001_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V12001_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V12001_700"].mean()+data["V12001_700"].std(*4 b=data["V12001_700"].mean()-data["V12001_700"].std()*4 c=data["V12001_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V12001_700"]]=c #data.dropna(how='any',axis=0,inplace=True) import numpy as np a=data["V13003_700"].quantile(0.75) b=data["V13003_700"].quantile(0.25) c=data["V13003_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V13003_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V13003_700"].mean()+data["V13003_700"].std()*4 b=data["V13003_700"].mean()-data["V13003_700"].std)*4 c=data["V13003_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V13003_700"]]=c #data.dropna(how='any',axis=0,inplace=True) #XGBOOST import matplotlib.pyplot as plt from pandas import read_excel from pandas import read_csv import numpy as np from pandas import DataFrame from pandas import concat from sklearn.preprocessing import MinMaxScaler import pandas as pd df=pd.read_csv (总变量(无AQI 等级).csv',encoding='gb2312) df.drop(['date'],axis=1,inplace=True) columns=df.columns.tolist() df import pandas as pd from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import RobustScaler #创建 StandardScaler() 实例 standard_sl=StandardScaler() #将 DataFrame 格式的数据按照每一个series 分别标准化 standard_sl_data=standard_s1.fit_transform(df) #将标准化后的数据改成DataFrame 格式 df=pd.DataFrame(standard_sl_data,columns=columns) print(df.head() #通过如下代码将特征变量和目标变量单独提取出来,代码如下: X=df.drop(columns='PM2.5') y=df['PM2.5'] #划分训练集和测试集完成后,就可以从 Scikit-Learn 库中引入XGBRegressor() 模型进行模型 训练了,代码如下:from xgboost import XGBRegressor model=XGBRegressor()# 使用默认参数 model.fit(X_train,y_train) #模型搭建完毕后,通过如下代码预测测试集数据: y_pred=model.predict(X_test) #print(y_pred[0:134]) model.score(X_test,y_test) features=X.columns # 获取特征名称 importances=model.feature_importances_# 获取特征重要性 #通过二维表格形式显示 importances_df=pd.DataFrame() importances_df['特征名称]=features importances_df['特征重要性]=importances importances_df.sort_values('特征重要性',ascending=False) #我们可以对XGBoost 回归模型进行参数调优,代码如下: from sklearn.model_selection import GridSearchCV parameters={'max_depth':[1,3,5,7,9],'n_estimators':[20,50,100,150,200],'learning rate':[0.01, 0.05,0.1,0.2,0.3,0.5]}#指定模型中参数的范围 clf=XGBRegressor()# 构建回归模型 grid_search=GridSearchCV(model,parameters,scoring=r2',cv=5) grid_search.fit(X_train,y_train)# 传入数据 grid_search.best_params_# 输出参数的最优值 #在模型中设置参数,代码如下: model=XGBRegressor(max_depth=3,n_estimators=150,learning_rate=0.2) model.fit(X_train,y_train) #此时再通过r2_score()函数进行模型评估,代码如下(也可以用model.score(X_test,y_test) 进 行评分,效果一样): from sklearn.metrics import r2_score r2=r2_score(y_test,model.predict(X_test)) print(r2) features=X.columns # 获取特征名称 importances=model.feature_importances_# 获取特征重要性#通过二维表格形式显示 importances_df=pd.DataFrame( importances_df[ '特征名称']=features importances_df[ '特征重要性]=importances importances_df.sort_values( '特征重要性',ascending=False) #SHAP import shap shap.initjs() explainer =shap.TreeExplainer(model) shap_values =explainer.shap_values(X_test) shap_values_obj=explainer(X_test) plt.rcParams['font.sans-serif]=['SimHei]# 用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 features=X[:134]# 只分析前20个样本 shap_values =explainer.shap_values(features) shap.decision_plot(explainerexpected_value,shap_values, features,feature_order='hclust') shap.plots.heatmap(shap_values_obj) clustering=shap.utils.hclust(X,y) shap.plots.bar(shap_values_obj, clustering=clustering, clustering_cutoff=0.5) shap_interaction_values =explainer.shap_interaction_values(X_test) shap.summary_plot(shap_interaction_values,X_test) #shap_interaction_values=explainer.shap_interaction_values(X_test) shap.summary_plot(shap_interaction_values,X_test,max_display=10,plot_type="compact_dot") I 求排列熵 import numpy as np from math import factorial def permutation_entropy(time_series,order=7,delay=1,normalize=False): x=np.array(time_series)#x=[4,7,9,10,6,11,3] hashmult=np.power(order,np.arange(order))#[139] #_embed 的作用是生成上图中重构后的矩阵 #argsort 的作用是对下标排序,排序的标准是值的大小 #比如第3行[9,10,6]9的下标是06是2...,6最小 #所以排序后向量的第一个元素是6的下标2.排完[201] sorted_idx=_embed(x,order=order,delay=delay).argsort(kind='quicksort') #np.multiply 对应位置相乘 hashmult是 1 3 9 sum 是求每一行的和 #hashmult一定要保证三个一样的值顺序不同按位乘起来后每一行加起来大小不同类 似赋一个权重 hashval=(np.multiply(sorted_idx,hashmult)).sum(1)#[2121111911] #Return the counts _,c=np.unique(hashval,return_counts=True)# 重小到大每个数字出现的次数#c 是[21 2]最小的11出现了2次191次 p=np.true_divide(c,c.sum()#[0.40.20.4]2/5=0.4 pe=-np.multiply(p,np.log2(p)).sum()#根据公式 if normalize:#如果需要归一化 pe/=np.log2(factorial(order) return pe [25] #将一维时间序列,生成矩阵 def_embed(x,order=7,delay=1): """Time-delay embedding.Parameters x:1d-array,shape(n_times) Time series order:int Embedding dimension(order) delay :int Delay. Returns ------- embedded:ndarray,shape(n_times-(order-1)*delay,order) Embedded time-series. """ N=len(x) Y=np.empty(order,N-(order-1)*delay) for i in range(order): Y[i]=x[i*delay:i*delay+Y.shape[1]] return Y.T import numpy as np import matplotlib.pyplot as plt import numpy as np import pandas as pd if_name_='_main_: #x=pd.read_csv (总变量(无AQI 等级).csv',encoding='gb2312') #x.drop(['date'],axis=1,inplace=True) for i in df.columns: sl=df[i] fe=permutation_entropy(s1,2) features=df.columns features_df=pd.DataFrame() features_df['特征名称']=features features_df[特征重要性]=fe print(importances_df)# 将日期转换为数字(便于插值计算),采用时间戳方式 x_numbers = [datetime.datetime.strptime(date_str, '%Y-%m-%d').timestamp() for date_str in new_date_list] y_numbers = value_list # 进行插值计算(线性插值示例,可根据需求调整为三次样条等) linear_interp = interpolate.interp1d(x_numbers, y_numbers, kind='linear') new_y_numbers = linear_interp(x_numbers) # 整理插值后的数据 result_dict = {} for date_str, value in zip(new_date_list, new_y_numbers): result_dict[date_str] = value result_df = pd.DataFrame.from_dict(result_dict, orient='index', columns=['ecpm_tomorrow']) result_df.reset_index(inplace=True) result_df.rename(columns={'index': 'date'}, inplace=True) return result_df def standardize_data(df): """ 对数据进行标准化处理 :param df: 原始数据DataFrame :return: 标准化后的数据DataFrame """ scaler = StandardScaler() standardized_data = scaler.fit_transform(df) return pd.DataFrame(standardized_data, columns=df.columns) def train_xgb_model(X_train, y_train, X_test, y_test): """ 训练XGBoost回归模型并评估 :param X_train: 训练特征数据 :param y_train: 训练目标数据 :param X_test: 测试特征数据 :param y_test: 测试目标数据 :return: 训练好的模型,模型评估得分 """ model = xgb.XGBRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) score = r2_score(y_test, y_pred) return model, score def tune_xgb_model(X_train, y_train, X_test, y_test): """ 对XGBoost模型进行参数调优 :param X_train: 训练特征数据 :param y_train: 训练目标数据 :param X_test: 测试特征数据 :param y_test: 测试目标数据 :return: 调优后的模型,最佳参数,模型评估得分 """ parameters = { 'max_depth': [3, 5, 7, 9], 'n_estimators': [50, 100, 150, 200], 'learning_rate': [0.01, 0.05, 0.1, 0.2] } model = xgb.XGBRegressor() grid_search = GridSearchCV(model, parameters, scoring='r2', cv=5) grid_search.fit(X_train, y_train) best_model = grid_search.best_estimator_ best_params = grid_search.best_params_ y_pred = best_model.predict(X_test) score = r2_score(y_test, y_pred) return best_model, best_params, score def analyze_shap(model, X_test): """ 使用SHAP进行模型解释分析 :param model: 训练好的模型 :param X_test: 测试特征数据 """ explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X_test) # 绘制决策图(展示特征对预测结果的影响) shap.decision_plot(explainer.expected_value, shap_values, feature_names=X_test.columns) # 绘制特征重要性热力图 shap.plots.heatmap(shap_values, feature_names=X_test.columns) # 绘制特征交互作用图 shap_interaction_values = explainer.shap_interaction_values(X_test) shap.summary_plot(shap_interaction_values, X_test) if __name__ == "__main__": # 读取数据 pollutant_df = read_pollutant_data() meteorological_df = read_meteorological_data() # 数据处理(插值等,根据实际需求选择调用) # 假设这里对pollutant_df进行插值处理 interpolated_pollutant_df = interpolate_time_series(pollutant_df) # 数据标准化 standardized_pollutant_df = standardize_data(interpolated_pollutant_df) # 划分训练集和测试集 X = standardized_pollutant_df.drop('PM2.5', axis=1) y = standardized_pollutant_df['PM2.5'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 模型训练与评估 model, score = train_xgb_model(X_train, y_train, X_test, y_test) print(f"初始模型R2得分: {score}") # 模型参数调优 tuned_model, best_params, tuned_score = tune_xgb_model(X_train, y_train, X_test, y_test) print(f"调优后模型R2得分: {tuned_score},最佳参数: {best_params}") # SHAP分析 analyze_shap(tuned_model, X_test) 将以上程序进行修改完善,并将修改后的程序结合以上程序完整发出来
05-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值