代码
#读取数据
import pandas as pd
def retrieveData():
trainingdata=pd.read_csv("training-data.csv").as_matrix()
testdata=pd.read_csv("test-data.csv").as_matrix()
return trainingdata,testdata
trainingdata,testdata=retrieveData()
print(trainingdata.shape)
print(testdata.shape)
print(trainingdata[:3])
输出:
(7493, 17)
(3497, 17)
[[ 0 89 27 100 42 75 29 45 15 15 37 0 69 2 100 6 2]
[ 0 57 31 68 72 90 100 100 76 75 50 51 28 25 16 0 1]
[ 0 100 7 92 5 68 19 45 86 34 100 45 74 23 67 0 4]]
#数据分割
def separateFeaturesAndCategories(trainingdata,testdata):
trainingFeatures=trainingdata[:,:-1]
trainingCategories=trainingdata[:,-1:]
testFeatures=testdata[:,:-1]
testCategories=testdata[:,-1:]
return trainingFeatures,trainingCategories,testFeatures,testCategories
trainingFeatures,trainingCategories,testFeatures,testCategories=separateFeaturesAndCategories(trainingdata,testdata)
print(trainingFeatures[:3])
print(trainingCategories[:3].T)
输出:
[[ 0 89 27 100 42 75 29 45 15 15 37 0 69 2 100 6]
[ 0 57 31 68 72 90 100 100 76 75 50 51 28 25 16 0]
[ 0 100 7 92 5 68 19 45 86 34 100 45 74 23 67 0]]
[[2 1 4]]
#数据归一化
from sklearn.preprocessing import StandardScaler
def scaleData(trainingFeatures,testFeatures):
scaler=StandardScaler()
scaler.fit(trainingFeatures)
scaledtrainingFeatures=scaler.transform(trainingFeatures)
scaledtestFeatures=scaler.transform(testFeatures)
return scaledtrainingFeatures,scaledtestFeatures
scaledtrainingFeatures,scaledtestFeatures=scaleData(trainingFeatures,testFeatures)
print(scaledtrainingFeatures[:1])
输出:
[[-1.12187758 0.25657834 -0.49541212 0.87125674 -0.25417164 0.36353124
-0.73260178 0.01286466 -1.25134391 -0.6944943 -0.6576261 -1.30131455
0.63319745 -1.01733507 1.28059647 -0.63002209]]
#对于多分类问题,将标签进行one-hot编码,采用to_categorical函数.
from keras.utils.np_utils import to_categorical
trainingCategories=to_categorical(trainingCategories, num_classes=10)
testCategories=to_categorical(testCategories, num_classes=10)
输出:
[[8]
[8]
[9]]
[[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]
#搭建BP网络
from keras.models import Sequential
from keras.layers import Activation,Dense
model=Sequential()
#model=Sequential([Dense(16,input_dim=16),Activation('relu'),Dense(10),Activation('softmax')])
model.add(Dense(16,input_shape=(16,)))
model.add(Activation('relu'))
#model.add(Dropout(0.5))
model.add(Dense(16,activation='relu'))
model.add(Dense(10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adadelta',metrics=['accuracy'])
#softmax对应损失函数categorical_crossentropy,多分类问题
model.fit(scaledtrainingFeatures,trainingCategories,epochs=20,batch_size=20)
accuracy=model.evaluate(scaledtestFeatures,testCategories,verbose=0)
print('\nTest accuracy:',accuracy[1])
print('\nTest score:',accuracy[0])
Epoch 19/20
7493/7493 [==============================] - 6s 855us/step - loss: 1.1042 - acc: 0.6349
Epoch 20/20
7493/7493 [==============================] - 6s 845us/step - loss: 1.0671 - acc: 0.6434
Test accuracy: 0.8581641407431553
Test score: 0.4339949927876805
附:如何调参?