How to embed a True Type font( 转载 )

博客介绍了在应用程序中嵌入不常见字体的方法。首先将字体添加到解决方案并标记为嵌入式资源,然后在运行时通过流加载字体并存储在PrivateFontCollection对象中。还给出了C#和VB的代码示例,同时说明了如何检查字体可用样式以避免异常。

Some applications, for reasons of esthetics or a required visual style, will embed certain uncommon fonts so that they are always there when needed regardless of whether the font is actually installed on the destination system.

The secret to this is twofold. First the font needs to be placed in the resources by adding it to the solution and marking it as an embedded resource. Secondly, at runtime, the font is loaded via a stream and stored in a PrivateFontCollection object for later use.

This example uses a font which is unlikely to be installed on your system. Alpha Dance is a free True Type font that is available from the Free Fonts Collection. This font was embedded into the application by adding it to the solution and selecting the "embedded resource" build action in the properties.

Figure 1. The Alpha Dance font file embedded in the solution.

Once the file has been successfully included in the resources you need to provide a PrivateFontCollection object in which to store it and a method by which it's loaded into the collection. The best place to do this is probably the form load override or event handler. The following listing shows the process. Note how the AddMemoryFont method is used. It requires a pointer to the memory in which the font is saved as an array of bytes. In C# we can use the unsafe keyword for convienience but VB must use the capabilities of the Marshal classes unmanaged memory handling. The latter option is of course open to C# programmers who just don't like the unsafe keyword.

    PrivateFontCollection pfc = new PrivateFontCollection();

 

    private void Form1_Load(object sender, System.EventArgs e)

    {

      Stream fontStream = this.GetType().Assembly.GetManifestResourceStream("embedfont.Alphd___.ttf");

 

      byte[] fontdata = new byte[fontStream.Length];

      fontStream.Read(fontdata,0,(int)fontStream.Length);

      fontStream.Close();

      unsafe

      {

        fixed(byte * pFontData = fontdata)

        {

          pfc.AddMemoryFont((System.IntPtr)pFontData,fontdata.Length);

        }

      }

    }

 

  Dim pfc As New PrivateFontCollection()

 

  Private Sub Form1_Load(ByVal sender As Object, ByVal e As System.EventArgs) Handles MyBase.Load

    'load the resource

    Dim fontStream As Stream = Me.GetType().Assembly.GetManifestResourceStream("embedfont.vb.Alphd___.ttf")

    'create an unsafe memory block for the data

    Dim data As System.IntPtr = Marshal.AllocCoTaskMem(fontStream.Length)

    'create a buffer to read in to

    Dim fontdata() As Byte

    ReDim fontdata(fontStream.Length)

    'fetch the font program from the resource

    fontStream.Read(fontdata, 0, fontStream.Length)

    'copy the bytes to the unsafe memory block

    Marshal.Copy(fontdata, 0, data, fontStream.Length)

    'pass the font to the font collection

    pfc.AddMemoryFont(data, fontStream.Length)

    'close the resource stream

    fontStream.Close()

    'free the unsafe memory

    Marshal.FreeCoTaskMem(data)

  End Sub

 

Fonts may have only certain styles which are available and unfortunately, selecting a font style that doesn't exist will throw an exception. To overcome this the font can be interrogated to see which styles are available and only those provided by the font can be used. The following listing demonstrates how the Alpha Dance font is used by checking the available font styles and showing all those that exist. Note that the underline and strikethrough styles are pseudo styles constructed by the font rendering engine and are not actually provided in glyph form.

    private void Form1_Paint(object sender, System.Windows.Forms.PaintEventArgs e)

    {

      bool bold=false;

      bool regular=false;

      bool italic=false;

 

      e.Graphics.PageUnit=GraphicsUnit.Point;

      SolidBrush b = new SolidBrush(Color.Black);

 

      float y=5;

 

      System.Drawing.Font fn;

 

      foreach(FontFamily ff in pfc.Families)

      {

        if(ff.IsStyleAvailable(FontStyle.Regular))

        {

          regular=true;

          fn=new Font(ff,18,FontStyle.Regular);

          e.Graphics.DrawString(fn.Name,fn,b,5,y,StringFormat.GenericTypographic);

          fn.Dispose();

          y+=20;

        }

        if(ff.IsStyleAvailable(FontStyle.Bold))

        {

          bold=true;

          fn=new Font(ff,18,FontStyle.Bold);

          e.Graphics.DrawString(fn.Name,fn,b,5,y,StringFormat.GenericTypographic);

          fn.Dispose();

          y+=20;

        }

        if(ff.IsStyleAvailable(FontStyle.Italic))

        {

          italic=true;

          fn=new Font(ff,18,FontStyle.Italic);

          e.Graphics.DrawString(fn.Name,fn,b,5,y,StringFormat.GenericTypographic);

          fn.Dispose();

          y+=20;

        }

        if(bold  && italic)

        {

          fn=new Font(ff,18,FontStyle.Bold | FontStyle.Italic);

          e.Graphics.DrawString(fn.Name,fn,b,5,y,StringFormat.GenericTypographic);

          fn.Dispose();

          y+=20;

        }

        fn=new Font(ff,18,FontStyle.Underline);

        e.Graphics.DrawString(fn.Name,fn,b,5,y,StringFormat.GenericTypographic);

        fn.Dispose();

        y+=20;

        fn=new Font(ff,18,FontStyle.Strikeout);

        e.Graphics.DrawString(fn.Name,fn,b,5,y,StringFormat.GenericTypographic);

        fn.Dispose();

      }

 

      b.Dispose();

    }

 

  Private Sub Form1_Paint(ByVal sender As Object, ByVal e As System.Windows.Forms.PaintEventArgs) Handles MyBase.Paint

    Dim bold As Boolean = False

    Dim regular As Boolean = False

    Dim italic As Boolean = False

 

    e.Graphics.PageUnit = GraphicsUnit.Point

    Dim b As New SolidBrush(Color.Black)

 

    Dim y = 5

 

    Dim fn As System.Drawing.Font

    Dim ff As FontFamily

    For Each ff In pfc.Families

      If ff.IsStyleAvailable(FontStyle.Regular) Then

        regular = True

        fn = New Font(ff, 32, FontStyle.Regular)

        e.Graphics.DrawString(fn.Name, fn, b, 5, y, StringFormat.GenericTypographic)

        fn.Dispose()

        y += 40

      End If

      If ff.IsStyleAvailable(FontStyle.Bold) Then

        bold = True

        fn = New Font(ff, 32, FontStyle.Bold)

        e.Graphics.DrawString(fn.Name, fn, b, 5, y, StringFormat.GenericTypographic)

        fn.Dispose()

        y += 40

      End If

      If ff.IsStyleAvailable(FontStyle.Italic) Then

        italic = True

        fn = New Font(ff, 32, FontStyle.Italic)

        e.Graphics.DrawString(fn.Name, fn, b, 5, y, StringFormat.GenericTypographic)

        fn.Dispose()

        y += 40

      End If

      If bold And italic Then

        fn = New Font(ff, 32, FontStyle.Bold Or FontStyle.Italic)

        e.Graphics.DrawString(fn.Name, fn, b, 5, y, StringFormat.GenericTypographic)

        fn.Dispose()

        y += 40

      End If

      fn = New Font(ff, 32, FontStyle.Underline)

      e.Graphics.DrawString(fn.Name, fn, b, 5, y, StringFormat.GenericTypographic)

      fn.Dispose()

      y += 40

      fn = New Font(ff, 32, FontStyle.Strikeout)

      e.Graphics.DrawString(fn.Name, fn, b, 5, y, StringFormat.GenericTypographic)

      fn.Dispose()

    Next

 

    b.Dispose()

  End Sub

 

Figure 2 shows the application in action.

Figure 2. The embedded Alpha Dance font.


转载于:https://www.cnblogs.com/cdo/archive/2005/06/17/176171.html

数据处理 import pandas as pd # 读 取 数 据 df=pd read_excel(附件1:污染物浓度数据 .xlsx') print(df) df['date']=df['年].map(str)+"/"+df[月'].map(str)+"/"+df[日'].map(str) pd.to_datetime(df['date']) #打印查看效果 print(df['date']) data2=pd.read_excel(附件2:气象数据 .xlsx') data2[date']=data2['V04001].map(str)+"/"+data2['V04002'].map(str)+"/"+data2['V04003'].map(str) pd.to_datetime(data2['date']) #将结果输出在另一个csv 文件中 df=df.drop(labels= '年',axis=1) df=df.drop(labels= '月',axis=1) df=df.drop(labels= '日',axis=1) filename= '附件一污染物浓度.csv' df.to_csv(filename,encoding='gb2312') #将结果输出在另一个csv 文件中 data2=data2.drop(labels='V04001,axis=1) data2=data2.drop(labels='V04002',axis=1) data2=data2.drop(labels='V04003',axis=1) filename= '附件二气象数据.csv data2.to_csv(filename,encoding='gb2312') import numpy as np from scipy import interpolate import matplotlib.pyplot as plt import time,datetimefrom datetime import datetime,date,timedelta import math # 插 值 def interpolation_value(data_gp): m 对时序数据进行插值,断开的数据用三次样条插值,不足数目的往前取均值插 param data_gp:id 分组后的dataframe return:df " x=data_gp['date_time'].values.tolist() y=data_gp['ecpm_tomorrow].values.tolist() #print(x) #print(y) #plt.scatter(x,y) #plt.plot(x,y) #plt.show( #获取需要插值的时间 lxs=len(x) insert_x=1 #插值时间列表 mean_x=[] #往前插均值时间列表 isx=0 flag=10 if lxs<10: #判断是否是连续日期,并对不连续的日期进行时间插值 for iin range(len(x)): ifi+1==len(x): break t1=int(time.mktime(time.strptime(x[i],"%Y-%m-%d")) t2=int(time.mktime(time.strptime(x[i+1],"%Y-%m-%d")) differ=(datetime.fromtimestamp(t2)-datetime.fromtimestamp(t1)).days #print(" 相差",differ," 天") while differ !=1: differ -=1 tmp=(datetime.fromtimestamp(t2)+ timedelta(days=-differ)).strftime("%Y-%m-%d") insert_x.append(tmp) isx=len(insert_x) tos=isx+lxs #如果不够10个点,往前插取均值:如第一个是现有数据前2个的均值、第二个是 现有数据前3个的均值 if tos<math.floor(lxs/2+1)+lxs: flag=math.floor(lxs/2+1) diffs=flag timx0=int(time.mktime(time.strptime(x[0],"%Y-%m-%d")) while diffs !=0: tmp=(datetime.fromtimestamp(timx0)+ timedelta(days=-diffs)).strftime("%Y-%m-%d") mean_x.append(tmp) diffs-=1 #print(insert_x) #print(mean_x) #将时间变为数字,保存对应的时间,便于插值 newxlist=x+insert_x+mean_x newxlist=sorted(newxlist) #print(newxlist) #xydict=} #for i in range(len(x): # xydict[x[i]]=y[i]xdict=0 #插值后的时间x resx_dict=0 #存放插值的结果列表, key: 时间,value:ecpm_yesterday x_list=1 # 原x 转为对应数字 x_i_list= # 待 插 值x 转为对应数字 x_m_list=1 j=0 #往前插均值x 转为对应数字 for i in range(len(newxlist)): xdict[newxlist[i]]=i+1 if newxlist[i]in x: x_list.append(xdict[newxlist[i]]) resx_dict[newxlist[i]]=y[i] j+=1 elif newxlist[i]in insert_x: x_i_list.append(xdict[newxlist[i]]) elif newxlist[i]in mean_x: x_m_list.append(xdict[newxlist[i]]) #print(xdict) #print(x_list) #print(x i list) #print(x_m_list) #print(resx_dict) #得到差值函数 linear: 线性插值 cubic:三次样条插值 #Flinear =interpolate.interpld(x_list,y,kind='linear') Flinear=interpolate.interpld(x_list,y,kind='cubic') #三次样条插值 if len(x_i_list)!=0: ynew=Flinear( x i list) ynew=np.array(ynew).tolist( ynew=[abs(round(xi,4)for xi in ynew] j=0 for i inx_i_list: k=[k for k,v in xdict.items()if v=i][0] resx_dict[k]=ynew[j] j+=1 #往前取均值插 if len(x_m_list)!=0: for iinx_m_list: k=[k for k,v in xdict.items(ifv==i][0] tmp=xdict[k]+1 value =round(sum(y[:tmp])/tmp,4) resx_dict[k]=value resx_dict=sorted(resx_dict.items(),key=lambda x:x[0],reverse=False) resx_dict=dict(resx_dict) #print(resx_dict) resx_list,resy_list=[], 口 for k,v in resx_dict.items(): resx_list.append(k) resy_list.append(v) #plt.scatter(resx_list,resy_list) #plt.plot(resx_list,resy_list) #plt.show( df={ 'date_time':resx_list, 'ecpm_tomorrow:resy_list, } data=pd.DataFrame(df) return data import pandas as pd from fancyimpute import Iterativelmputer# 读 取 Excel 文 件 df=pd.read_excel (附件一污染物浓度(无等级).xlsx') #按照时间顺序排序 df=df.sort_values(by='date') #进行多值插补 imputer=IterativeImputer() imputed_df=pd.DataFrame(imputer.fit_transform(df.iloc[:,0:7])) #将插补后的数据合并回原始数据框 df.iloc[:,0:7]=imputed_df.values #保存插补后的数据 df.to_excel(附件一污染物浓度(无等级)(插补).xlsx',encoding='gb2312',index=False) import pandas as pd# 导 入 pandas 库 #生成异常数据 data=pd.read_csv (附件二气象数据 .csv') print(data)# 打印输出 import pandas as pd # 导入数据分析库Pandas #print(data.describe()) outputfile= '描述.xls'# 输出数据路径 data.describe().to_excel(outputfile,index=True) import matplotlib.pyplot as plt # 导入图像库 plt.rcParams['font.sans-serif]=['SimHei]# 用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 import matplotlib.pyplot as plt # 导入图像库 plt.rcParams['font.sans-serif]=['SimHei']# 用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 plt.subplot(1,2,1) data[["V11291_700"].boxplot(return_type='dict') plt.subplot(1,2,2) data[["V12001_700"]].boxplot(return_type='dict') plt.show) plt.subplot(1,2,1) data[["V13305"]].boxplot(return_type='dict') plt.subplot(1,2,2) data[["V10004_700"].boxplot(return_type='dict') plt.show() import matplotlib.pyplot as plt # 导入图像库 plt.rcParams['font.sans-serif]=['SimHei']# 用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 plt.subplot(1,2,1) data[["V13003_700"]].boxplot(return_type='dict') plt.show() import numpy as np a=data["V13305"].quantile(0.75) b=data["V13305"].quantile(0.25) c=data["V13305"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V13305"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V13305"].mean()+data["V13305"].std()*4 b=data["V13305"].mean()-data["V13305"].std()*4 c=data["V13305"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True)data[[import numpy as np a=data["V10004_700"].quantile(0.75) b=data["V10004_700"].quantile(0.25) c=data["V10004_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V10004_700"]]=c #data.dropna(how='any',axis=0,inplace=True) import numpy as np a=data["V10004_700"].quantile(0.75) b=data["V10004_700"].quantile(0.25) c=data["V10004_700"] c[(c>=(a-b)*1.5+a)(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V10004_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V10004_700"].mean()+data["V10004_700"].std()*4 b=data["V10004_700"].mean()-data["V10004_700"].std()*4 c=data["V10004_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V10004_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V10004_700"].mean()+data["V10004_700"].std(*4 b=data["V10004_700"].mean()-data["V10004_700"].std()*4 c=data["V10004_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V10004_700"]]=c #data.dropna(how='any',axis=0,inplace=True)"V13305"]]=c 玉 #data.dropna(how='any',axis=0,inplace=True) import numpy as np a=data["V11291_700"].quantile(0.75) b=data["V11291_700"].quantile(0.25) c=data["V11291_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V11291_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V11291_700"].mean()+data["V11291_700"].std()*4 b=data["V11291_700"].mean()-data["V11291_700"].std()*4 c=data["V11291_700"] c[(c>=a)|(c<=b)]import numpy as np a=data["V12001_700"].quantile(0.75) b=data["V12001_700"].quantile(0.25) c=data["V12001_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V12001_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V12001_700"].mean()+data["V12001_700"].std(*4 b=data["V12001_700"].mean()-data["V12001_700"].std(*4 c=data["V12001_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V12001_700"]]=c #data.dropna(how='any',axis=0,inplace=True)=np.nan c.fillna(c.median(),inplace=True) data[["V11291_700"]]=c#data.dropna(how='any',axis=0,inplace=True) import numpy as np a=data["V12001_700"].quantile(0.75) b=data["V12001_700"].quantile(0.25) c=data["V12001_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V12001_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V12001_700"].mean()+data["V12001_700"].std(*4 b=data["V12001_700"].mean()-data["V12001_700"].std()*4 c=data["V12001_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V12001_700"]]=c #data.dropna(how='any',axis=0,inplace=True) import numpy as np a=data["V13003_700"].quantile(0.75) b=data["V13003_700"].quantile(0.25) c=data["V13003_700"] c[(c>=(a-b)*1.5+a)|(c<=b-(a-b)*1.5)]=np.nan c.fillna(c.median(),inplace=True) data[["V13003_700"]]=c #data.dropna(how='any',axis=0,inplace=True) a=data["V13003_700"].mean()+data["V13003_700"].std()*4 b=data["V13003_700"].mean()-data["V13003_700"].std)*4 c=data["V13003_700"] c[(c>=a)|(c<=b)]=np.nan c.fillna(c.median(),inplace=True) data[["V13003_700"]]=c #data.dropna(how='any',axis=0,inplace=True) #XGBOOST import matplotlib.pyplot as plt from pandas import read_excel from pandas import read_csv import numpy as np from pandas import DataFrame from pandas import concat from sklearn.preprocessing import MinMaxScaler import pandas as pd df=pd.read_csv (总变量(无AQI 等级).csv',encoding='gb2312) df.drop(['date'],axis=1,inplace=True) columns=df.columns.tolist() df import pandas as pd from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import RobustScaler #创建 StandardScaler() 实例 standard_sl=StandardScaler() #将 DataFrame 格式的数据按照每一个series 分别标准化 standard_sl_data=standard_s1.fit_transform(df) #将标准化后的数据改成DataFrame 格式 df=pd.DataFrame(standard_sl_data,columns=columns) print(df.head() #通过如下代码将特征变量和目标变量单独提取出来,代码如下: X=df.drop(columns='PM2.5') y=df['PM2.5'] #划分训练集和测试集完成后,就可以从 Scikit-Learn 库中引入XGBRegressor() 模型进行模型 训练了,代码如下:from xgboost import XGBRegressor model=XGBRegressor()# 使用默认参数 model.fit(X_train,y_train) #模型搭建完毕后,通过如下代码预测测试集数据: y_pred=model.predict(X_test) #print(y_pred[0:134]) model.score(X_test,y_test) features=X.columns # 获取特征名称 importances=model.feature_importances_# 获取特征重要性 #通过二维表格形式显示 importances_df=pd.DataFrame() importances_df['特征名称]=features importances_df['特征重要性]=importances importances_df.sort_values('特征重要性',ascending=False) #我们可以对XGBoost 回归模型进行参数调优,代码如下: from sklearn.model_selection import GridSearchCV parameters={'max_depth':[1,3,5,7,9],'n_estimators':[20,50,100,150,200],'learning rate':[0.01, 0.05,0.1,0.2,0.3,0.5]}#指定模型中参数的范围 clf=XGBRegressor()# 构建回归模型 grid_search=GridSearchCV(model,parameters,scoring=r2',cv=5) grid_search.fit(X_train,y_train)# 传入数据 grid_search.best_params_# 输出参数的最优值 #在模型中设置参数,代码如下: model=XGBRegressor(max_depth=3,n_estimators=150,learning_rate=0.2) model.fit(X_train,y_train) #此时再通过r2_score()函数进行模型评估,代码如下(也可以用model.score(X_test,y_test) 进 行评分,效果一样): from sklearn.metrics import r2_score r2=r2_score(y_test,model.predict(X_test)) print(r2) features=X.columns # 获取特征名称 importances=model.feature_importances_# 获取特征重要性#通过二维表格形式显示 importances_df=pd.DataFrame( importances_df[ '特征名称']=features importances_df[ '特征重要性]=importances importances_df.sort_values( '特征重要性',ascending=False) #SHAP import shap shap.initjs() explainer =shap.TreeExplainer(model) shap_values =explainer.shap_values(X_test) shap_values_obj=explainer(X_test) plt.rcParams['font.sans-serif]=['SimHei]# 用来正常显示中文标签 plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号 features=X[:134]# 只分析前20个样本 shap_values =explainer.shap_values(features) shap.decision_plot(explainerexpected_value,shap_values, features,feature_order='hclust') shap.plots.heatmap(shap_values_obj) clustering=shap.utils.hclust(X,y) shap.plots.bar(shap_values_obj, clustering=clustering, clustering_cutoff=0.5) shap_interaction_values =explainer.shap_interaction_values(X_test) shap.summary_plot(shap_interaction_values,X_test) #shap_interaction_values=explainer.shap_interaction_values(X_test) shap.summary_plot(shap_interaction_values,X_test,max_display=10,plot_type="compact_dot") I 求排列熵 import numpy as np from math import factorial def permutation_entropy(time_series,order=7,delay=1,normalize=False): x=np.array(time_series)#x=[4,7,9,10,6,11,3] hashmult=np.power(order,np.arange(order))#[139] #_embed 的作用是生成上图中重构后的矩阵 #argsort 的作用是对下标排序,排序的标准是值的大小 #比如第3行[9,10,6]9的下标是06是2...,6最小 #所以排序后向量的第一个元素是6的下标2.排完[201] sorted_idx=_embed(x,order=order,delay=delay).argsort(kind='quicksort') #np.multiply 对应位置相乘 hashmult是 1 3 9 sum 是求每一行的和 #hashmult一定要保证三个一样的值顺序不同按位乘起来后每一行加起来大小不同类 似赋一个权重 hashval=(np.multiply(sorted_idx,hashmult)).sum(1)#[2121111911] #Return the counts _,c=np.unique(hashval,return_counts=True)# 重小到大每个数字出现的次数#c 是[21 2]最小的11出现了2次191次 p=np.true_divide(c,c.sum()#[0.40.20.4]2/5=0.4 pe=-np.multiply(p,np.log2(p)).sum()#根据公式 if normalize:#如果需要归一化 pe/=np.log2(factorial(order) return pe [25] #将一维时间序列,生成矩阵 def_embed(x,order=7,delay=1): """Time-delay embedding.Parameters x:1d-array,shape(n_times) Time series order:int Embedding dimension(order) delay :int Delay. Returns ------- embedded:ndarray,shape(n_times-(order-1)*delay,order) Embedded time-series. """ N=len(x) Y=np.empty(order,N-(order-1)*delay) for i in range(order): Y[i]=x[i*delay:i*delay+Y.shape[1]] return Y.T import numpy as np import matplotlib.pyplot as plt import numpy as np import pandas as pd if_name_='_main_: #x=pd.read_csv (总变量(无AQI 等级).csv',encoding='gb2312') #x.drop(['date'],axis=1,inplace=True) for i in df.columns: sl=df[i] fe=permutation_entropy(s1,2) features=df.columns features_df=pd.DataFrame() features_df['特征名称']=features features_df[特征重要性]=fe print(importances_df)# 将日期转换为数字(便于插值计算),采用时间戳方式 x_numbers = [datetime.datetime.strptime(date_str, '%Y-%m-%d').timestamp() for date_str in new_date_list] y_numbers = value_list # 进行插值计算(线性插值示例,可根据需求调整为三次样条等) linear_interp = interpolate.interp1d(x_numbers, y_numbers, kind='linear') new_y_numbers = linear_interp(x_numbers) # 整理插值后的数据 result_dict = {} for date_str, value in zip(new_date_list, new_y_numbers): result_dict[date_str] = value result_df = pd.DataFrame.from_dict(result_dict, orient='index', columns=['ecpm_tomorrow']) result_df.reset_index(inplace=True) result_df.rename(columns={'index': 'date'}, inplace=True) return result_df def standardize_data(df): """ 对数据进行标准化处理 :param df: 原始数据DataFrame :return: 标准化后的数据DataFrame """ scaler = StandardScaler() standardized_data = scaler.fit_transform(df) return pd.DataFrame(standardized_data, columns=df.columns) def train_xgb_model(X_train, y_train, X_test, y_test): """ 训练XGBoost回归模型并评估 :param X_train: 训练特征数据 :param y_train: 训练目标数据 :param X_test: 测试特征数据 :param y_test: 测试目标数据 :return: 训练好的模型,模型评估得分 """ model = xgb.XGBRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) score = r2_score(y_test, y_pred) return model, score def tune_xgb_model(X_train, y_train, X_test, y_test): """ 对XGBoost模型进行参数调优 :param X_train: 训练特征数据 :param y_train: 训练目标数据 :param X_test: 测试特征数据 :param y_test: 测试目标数据 :return: 调优后的模型,最佳参数,模型评估得分 """ parameters = { 'max_depth': [3, 5, 7, 9], 'n_estimators': [50, 100, 150, 200], 'learning_rate': [0.01, 0.05, 0.1, 0.2] } model = xgb.XGBRegressor() grid_search = GridSearchCV(model, parameters, scoring='r2', cv=5) grid_search.fit(X_train, y_train) best_model = grid_search.best_estimator_ best_params = grid_search.best_params_ y_pred = best_model.predict(X_test) score = r2_score(y_test, y_pred) return best_model, best_params, score def analyze_shap(model, X_test): """ 使用SHAP进行模型解释分析 :param model: 训练好的模型 :param X_test: 测试特征数据 """ explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X_test) # 绘制决策图(展示特征对预测结果的影响) shap.decision_plot(explainer.expected_value, shap_values, feature_names=X_test.columns) # 绘制特征重要性热力图 shap.plots.heatmap(shap_values, feature_names=X_test.columns) # 绘制特征交互作用图 shap_interaction_values = explainer.shap_interaction_values(X_test) shap.summary_plot(shap_interaction_values, X_test) if __name__ == "__main__": # 读取数据 pollutant_df = read_pollutant_data() meteorological_df = read_meteorological_data() # 数据处理(插值等,根据实际需求选择调用) # 假设这里对pollutant_df进行插值处理 interpolated_pollutant_df = interpolate_time_series(pollutant_df) # 数据标准化 standardized_pollutant_df = standardize_data(interpolated_pollutant_df) # 划分训练集和测试集 X = standardized_pollutant_df.drop('PM2.5', axis=1) y = standardized_pollutant_df['PM2.5'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 模型训练与评估 model, score = train_xgb_model(X_train, y_train, X_test, y_test) print(f"初始模型R2得分: {score}") # 模型参数调优 tuned_model, best_params, tuned_score = tune_xgb_model(X_train, y_train, X_test, y_test) print(f"调优后模型R2得分: {tuned_score},最佳参数: {best_params}") # SHAP分析 analyze_shap(tuned_model, X_test) 将以上程序进行修改完善,并将修改后的程序结合以上程序完整发出来
最新发布
05-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值