import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
# load dataset
dataframe = pandas.read_csv("/work/johnson_folder/biggamesData/housing.data", delim_whitespace=True, header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:13]
Y = dataset[:,13]
# define base model
# def baseline_model():
# # create model
# model = Sequential()
# model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
# model.add(Dense(1, kernel_initializer='normal'))
# # Compile model
# model.compile(loss='mean_squared_error', optimizer='adam')
# return model
# define the model
def baseline_model():
# create model
model = Sequential()
model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
model.add(Dense(6, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
return model
# fix random seed for reproducibility
seed = 7
# numpy.random.seed(seed)
# # evaluate model with standardized dataset
# estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=5, verbose=0)
# kfold = KFold(n_splits=10, random_state=seed)
# evaluate model with standardized dataset
numpy.random.seed(seed)
estimator = []
estimator.append(('standardize', StandardScaler().fit(X)))
estimator.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimator)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
keras-gridsearch
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor,KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
#创建模型
def create_model():
#create model
model = Sequential()
model.add(Dense(12,input_dim=8,activation='relu'))
model.add(Dense(1,activation='sigmoid'))
#模型编译
model.compile(loss="binary_crossentropy",optimizer='adam',metrics=["accuracy"])
return model
#fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
#load dataset
dataset = np.loadtxt("/work/johnson_folder/biggamesData/pima-indians-diabetes.data",delimiter=",")
#split into input and output variable
X = dataset[:,0:8]
Y = dataset[:,8]
#create model
model = KerasClassifier(build_fn=create_model,verbose=0)
#define the grid search parameters
batch_size = [10,20,30,40,80,100]
epochs = [10,50,100]
param_grid = dict(batch_size=batch_size,epochs=epochs)
grid = GridSearchCV(estimator=model,param_grid=param_grid,n_jobs=-1)
grid_result = grid.fit(X,Y)
#小结
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
keras-多分类问题
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor,KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold,train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.pipeline import Pipeline
from keras.utils import np_utils
#load dataset
dataframe = pd.read_csv("iris.csv", header=None)
dataset = dataframe.values
X = dataset[:, 0:4].astype(float)
Y = dataset[:, 4]
#encode class values as int
encoder = LabelEncoder()
encoded_Y = encoder.fit_transform(Y)
#convert int to dummy variable
dummy_y = np_utils.to_categorical(encoded_Y)
#define model strucure
def baseline_model():
model = Sequential()
model.add(Dense(output_dim=10,input_dim=4,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(output_dim=3,input_dim=10,activation='softmax'))
#编译模型
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
estimator = KerasClassifier(build_fn=baseline_model,nb_epoch=40,batch_size=256)
# splitting data into training set and test set. If random_state is set to an integer, the split datasets are fixed.
X_train, X_test, Y_train, Y_test = train_test_split(X, dummy_y, test_size=0.3, random_state=0)
estimator.fit(X_train, Y_train)
# make predictions
pred = estimator.predict(X_test)
# inverse numeric variables to initial categorical labels
init_lables = encoder.inverse_transform(pred)
# k-fold cross-validate
seed = 42
np.random.seed(seed)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
keras-分类问题
# download the mnist to the path '~/.keras/datasets/' if it is the first time to be called
# X shape (60,000 28x28), y shape (10,000, )
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#data pre-processing
#转化为(6000,784)这种形状,并转化为0-1
X_train = X_train.reshape(X_train.shape[0],-1)/255 #
X_test = X_test.reshape(X_test.shape[0],-1)/255 #标准化
y_train = np_utils.to_categorical(y_train,num_classes=10)
y_test = np_utils.to_categorical(y_test,num_classes=10)
#用数组的方式定义多个层
model = Sequential([
Dense(32, input_dim=784),
Activation('relu'),
Dense(10),
Activation('softmax'),
])
# Another way to define your optimizer
rmsprop = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
# We add metrics to get more results you want to see
#设置优化器,误差计算函数,metrics其他在训练过程中需要计算和更新的参数,可以写多个参数
model.compile(optimizer=rmsprop,
loss='categorical_crossentropy',
metrics=['accuracy'])
print('Training ------------')
# Another way to train the model
model.fit(X_train, y_train, nb_epoch=2, batch_size=32)
print('\nTesting ------------')
# Evaluate the model with the metrics we defined earlier
loss, accuracy = model.evaluate(X_test, y_test)
print('test loss: ', loss)
print('test accuracy: ', accuracy)
keras-NLP分类
'''Trains and evaluate a simple MLP
on the Reuters newswire topic classification task.
'''
from __future__ import print_function
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.utils import np_utils
from keras.preprocessing.text import Tokenizer
max_words = 1000 # vocab大小
batch_size = 32 # mini_batch_size
nb_epoch = 5 # 大循环次数
print('Loading data...')
(X_train, y_train), (X_test, y_test) = reuters.load_data(nb_words=max_words, test_split=0.2) # 载入路透社语料<br>#打印
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')
# 分类数目--原版路透社我记着是10来着,应该是语料用的是大的那个
nb_classes = np.max(y_train) + 1
print(nb_classes, 'classes')
print('Vectorizing sequence data...') # tokenize
tokenizer = Tokenizer(nb_words=max_words) # 序列化,取df前1000大<br>#这里有个非常好玩的事, X_train 里面初始存的是wordindex,wordindex是按照词大小来的(应该是,因为直接就给撇了)<br>#所以这个效率上还是很高的<br>#转化的还是binary,默认不是用tfidf
X_train = tokenizer.sequences_to_matrix(X_train, mode='binary')
X_test = tokenizer.sequences_to_matrix(X_test, mode='binary')
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
print('Convert class vector to binary class matrix (for use with categorical_crossentropy)') # 这个就好理解多了, 编码而已
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
print('Y_train shape:', Y_train.shape)
print('Y_test shape:', Y_test.shape)
print('Building model...')
model = Sequential() # 第一层<br>#Dense就是全连接层
model.add(Dense(512, input_shape=(max_words,))) # 输入维度, 512==输出维度
model.add(Activation('relu')) # 激活函数
model.add(Dropout(0.5)) # dropout<br><br>#第二层
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
# 损失函数设置、优化函数,衡量标准
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
# 训练,交叉验证
history = model.fit(X_train, Y_train,
nb_epoch=nb_epoch, batch_size=batch_size,
verbose=1, validation_split=0.1)
score = model.evaluate(X_test, Y_test,
batch_size=batch_size, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])
keras-回归问题
import os
import numpy as np
from keras.models import Sequential, Model
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.applications.vgg16 import VGG16
from keras.utils.np_utils import to_categorical
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
from keras.datasets import imdb
from keras.datasets import reuters
from keras.datasets import boston_housing
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
#preprocessing the data
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std
test_data -= mean
test_data /= std
def build_model():
model = Sequential()
model.add(layers.Dense(64,activation="relu",input_shape=(train_data.shape[1],)))
model.add(layers.Dense(64,activation="relu"))
model.add(layers.Dense(1))
model.compile(optimizer="rmsprop",loss="mse",metrics=['mse'])
return model
#
k =4
num_val_samples = len(train_data)//k
num_epochs = 500
all_mae_histories = []
for i in range(k):
print("processig fold #",i)
val_data = train_data[i*num_val_samples:(i+1)*num_val_samples]
val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
#prepare the training data :data from all other partitions
partial_train_data = np.concatenate(
[train_data[:i * num_val_samples],
train_data[(i + 1) * num_val_samples:]],
axis=0)
partial_train_targets = np.concatenate(
[train_targets[:i * num_val_samples],
train_targets[(i + 1) * num_val_samples:]],
axis=0)
#build the keras model
model = build_model()
#Train the model (in slient model,verbose=0)
history = model.fit(partial_train_data,partial_train_targets,validation_data=(val_data,val_targets),epochs=num_epochs,batch_size=1,verbose=1)
mae_history = history.history['mean_squared_error']
all_mae_histories.append(mae_history)
average_mae_history = [x[i] for x in all_mae_histories for i in range(num_epochs)]
plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()