读取数据函数:
import csv
from numpy import *
import pandas as pd
def loaddata(d):
with open(d, 'rb') as myFile:
data = []
label = []
lines=csv.reader(myFile)
for line in lines:
temp = []
for l in line:
temp.append(int(l))
label.append(temp[0])
data.append(temp[1:])
return data, label
def lodatestdata(dir):
with open(dir, 'rb') as myf:
data = []
lines = csv.reader(myf)
for line in lines:
temp = []
for li in line:
temp.append(int(li))
data.append(temp)
return data
def loaddf(di):
train_df = pd.read_csv(di)
x_train = train_df.drop(['label'], axis=1).values.astype('float32')
y_train = train_df['label'].values
return x_train, y_train
# x_train = train_df.values.astype('float32')
# return x_train
cnn训练from pca_test import loaddf
from keras.utils import np_utils
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
import matplotlib.pyplot as plt
from keras import backend as K
K.set_image_dim_ordering('th') #input shape: (channels, height, width)
x_train, y_train = loaddf('/home/hbsage/workdata/mnist/train.csv')
image_hight = 28
image_weight = 28
n_train = x_train.shape[0]
x_train = x_train.reshape(n_train, 1, image_weight, image_hight)
x_train = x_train/255
# plt.imshow(x_train[4,0,:,:,],cmap='gray_r')
# plt.pause(10)
nb_classes = np.max(y_train) + 1
y_train = np_utils.to_categorical(y_train, nb_classes)
input_shape = (1, image_weight, image_hight)
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
model.add(Conv2D(32, (5, 5), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
batch_size = 128
n_epochs = 15
model.fit(x_train,
y_train,
batch_size=batch_size,
epochs=n_epochs, verbose=2,
validation_split=.1)
model.save_weights('kaggle_mnist_2.h5')
# model.fit(x_train, y_train,
# epochs=20,
# batch_size=32)
预测
from pca_test import loaddf
from keras.utils import np_utils
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
K.set_image_dim_ordering('th') #input shape: (channels, height, width)
x_train = loaddf('/home/hbsage/workdata/mnist/test.csv')
image_hight = 28
image_weight = 28
n_train = x_train.shape[0]
x_train = x_train.reshape(n_train, 1, image_weight, image_hight)
x_train = x_train/255
nb_classes = 10
input_shape = (1, image_weight, image_hight)
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape=input_shape))
model.add(Conv2D(32, (5, 5), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Conv2D(64, (5, 5), activation='relu', padding='same'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))
model.load_weights('kaggle_mnist_1.h5')
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
batch_size = 128
n_epochs = 15
# label = model.predict(x_train)
yPred = model.predict_classes(x_train, batch_size=32, verbose=1)
np.savetxt('mnist_output_1.csv', np.c_[range(1, len(yPred)+1), yPred], delimiter=',', header='ImageId,Label', comments='', fmt='%d')