2升4数据建模 离散化特征
# In[1]:
"""
导入数据
"""
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
os.chdir('K:\\项目\\2升4\\')
data = pd.read_excel('5月拍照持2G终端用户.xlsx')
# In[4]:
df=data.copy()
df.columns=['numbers','Network Time','Disbursements','start','cost name','names','ID','age','adress','nodeb','3/4G net','2 to 4']
# In[5]:
df=df[df['age']!='未知']
df=df[df['age']!=0]
# In[6]:
X = df[['cost name','3/4G net','2 to 4']].values
# In[7]:
ZC_label=LabelEncoder()
X[:,0] = ZC_label.fit_transform(X[:,0])
X[:,1] = ZC_label.fit_transform(X[:,1])
X[:,2] = ZC_label.fit_transform(X[:,2])
one_hot = OneHotEncoder(categorical_features='all')
#print(one_hot.fit_transform(X).toarray())
data=one_hot.fit_transform(X).toarray()
data= pd.DataFrame(data,columns=['cs id']*len(data[0]))
# In[8]:
df1=df[['numbers','Network Time','Disbursements','age','2 to 4']]
# In[9]:
df2=pd.concat([data,df1],axis=1)
# In[10]:
features= ['numbers','Network Time','Disbursements','age','cs id']
# In[11]:
target = df2['2 to 4'].value
x_train,x_test,y_train,y_test=train_test_split(df2[features],target,test_size=0.3)
# In[12]:
x_test.to_csv("x_test1.csv")
# In[13]:
x_train=x_train[['Network Time','Disbursements','age','cs id']]
# In[14]:
x_test=x_test[['Network Time','Disbursements','age','cs id']]
x_test.to_csv("x_test2.csv")
# In[15]:
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.fit_transform(x_test)
# In[16]:
#建模
mlp=MLPClassifier(hidden_layer_sizes=(100,50),max_iter=500)
# In[17]:
#训练
mlp.fit(x_train,y_train)
# In[18]:
#预测
predictions=mlp.predict(x_test)
# In[19]:
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))
# In[20]:
confusion_matrix=confusion_matrix(y_test,predictions)
# In[28]:
df_cm = pd.DataFrame(confusion_matrix)
sns.heatmap(df_cm,annot = True,fmt = 'd',cmap = 'Blues',square = True )
plt.show()
# In[59]:
my_prediction = pd.DataFrame(predictions)
# In[61]:
my_prediction.to_csv("my_prediction.csv")
# In[62]:
#保存模型
import pickle
with open('mlp.pickle','wb') as f:
pickle.dump(mlp,f)
# In[262]:
#读取模型
with open('mlp.pickle','rb') as f:
mlp=pickle.load(f)
mlp.predict(x_test)