这里写图片描述##MNIST Digit Classification
MNIST一个比较出名的数据集,包括60000张训练图片和10000张测试图片,每张图片是一个手写数字,包括282像素值的手写识别数据,扫描的手写数字如下图所示:

Example in python
import h2o
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
# Start H2O cluster with all available cores (default)
h2o.init()
# Get a brief summary of the data
train.describe()
test.describe()
train = h2o.import_file("./train.csv.gz")
test = h2o.import_file("./test.csv.gz")
x=train.names[0:784]
y="C785"
# Encode the response column as categorical for multinomial classification
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()
model = H2ODeepLearningEstimator(distribution='multinomial',
activation='RectifierWithDropout',
hidden=[32,32,32],
input_dropout_ratio=0.2,
sparse=True,
l1=1e-5,
epochs=10,
nfolds=5)
model.train(x=x,y=y,training_frame=train,validation_frame=test)
print model.params
print model.model_performance(train=True)
print model.model_performance(test=True)
print model.auc(train=True)
print model.predict(test_data=test)
# Get a brief summary of the data
# train.describe()
# test.describe()
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
Cartesian Grid Search
对超参数hidden
和l1
两个参数做网格搜索
from h2o.estimators import H2ODeepLearningEstimator
from h2o.grid.grid_search import H2OGridSearch
import h2o
h2o.init()
train = h2o.import_file("./train.csv.gz")
test = h2o.import_file("./test.csv.gz")
# Get a brief summary of the data
train.describe()
test.describe()
# Specify the response and predictor columns
x=train.names[0:784]
y="C785"
# Encode the response column as categorical for multinomial classification
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()
hidden_opt = [[32,32],[32,16,8],[100]]
l1_opt = [1e-4,1e-3]
hyper_parameters = {"hidden":hidden_opt, "l1":l1_opt}
model_grid = H2OGridSearch(H2ODeepLearningEstimator,
hyper_params=hyper_parameters)
model_grid.train(x=x, y=y,
distribution="multinomial",
epochs=1000,
training_frame=train,
validation_frame=test,
score_interval=2,
stopping_rounds=3,
stopping_tolerance=0.05,
stopping_metric="misclassification")
#print model grid search results
for model in model_grid:
print model.model_id + " mse: " + str(model.mse())
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
搜索结果:

Random Grid Search
如果参数的搜索空间太大,可以使用随机网格搜索,只需要指定最大的模型个数和seed,使用search_criteria
来设置
hidden_opt =[[17,32],[8,19],[32,16,8],[100],[10,10,10,10]]
l1_opt = [s/1e6 for s in range(1,1001)]
hyper_parameters = {"hidden":hidden_opt, "l1":l1_opt}
search_criteria = {"strategy":"RandomDiscrete",
"max_models":10,
"max_runtime_secs":100,
"seed":123456}
model_grid = H2OGridSearch(H2ODeepLearningEstimator,
hyper_params=hyper_parameters,
search_criteria = search_criteria)
model_grid.train(x=x, y=y,
distribution="multinomial",
epochs=1000,
training_frame=train,
validation_frame=test,
score_interval=2,
stopping_rounds=3,
stopping_tolerance=0.05,
stopping_metric="misclassification")
#print model grid search results
for model in model_grid:
print model.model_id + " mse: " + str(model.mse())
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
搜索结果:
