sklearn 练手

涉及:交叉验证、GridSearchCV、SVM、PCA、




#from __future__ import print_function




from time import time
#import logging
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split








from sklearn.datasets import fetch_lfw_people
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
#from sklearn.decomposition import RandomizedPCA
from sklearn.decomposition import PCA
from sklearn.svm import SVC




#print(__doc__)




#logging.basicConfig(level=logging.INFO,format='%(asctime)s %(message)')




lfw_people = fetch_lfw_people(min_faces_per_person=70,resize=0.4)








n_samples,h,w = lfw_people.images.shape




x = lfw_people.data
n_features = x.shape[1]




y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]




print("total dataset size:")
print("n_samples:",n_samples)
print("n_features:",n_features)
print("n_classes:",n_classes)




x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.25)








n_components = 150




print("extracting the top %d eigenfaces from %d faces"%(n_components,x_train.shape[0]))




t0 = time()
#pca = RandomizedPCA(n_components = n_components,whiten=True).fit(x_train)
pca = PCA(svd_solver= 'randomized',n_components = n_components,whiten=True).fit(x_train)








print("done in %0.3fs"%(time()-t0))




eigenfaces = pca.components_.reshape((n_components,h,w))
print("projecting the input data on the eigenfaces orthonormal basis")
t0=time()
x_train_pca=pca.transform(x_train)
x_test_pca = pca.transform(x_test)
print('x_test',x_test)
print("done in %0.3fs"%(time()-t0))




print("fitting the classifier to the training set")
t0=time()
##param_grid = {"C":[1e3,5e3,1e4,5e4,1e5],"gamma":[0.0001,0.0005,0.001,0.005,0.01,0.1],}
param_grid = {"C":[1e3,5e3],"gamma":[0.0001,0.0005],}




clf = GridSearchCV(SVC(kernel="rbf",class_weight="balanced"),param_grid)
clf = clf.fit(x_train_pca,y_train)
print("done in %0.3fs"%(time()-t0))
print("best estimator found by grid search:",clf.best_estimator_)




print("predicting people's names on the test set......")
t0=time()




y_pred=clf.predict(x_test_pca)
print('ooooooooooookkkkkkkkkkkkkkkkkkk')




print("done in %0.3fs"%(time()-t0))




print(classification_report(y_test,y_pred,target_names=target_names))
print(confusion_matrix(y_test,y_pred,labels=range(n_classes)))








def plot_gallery(images,titles,h,w,n_row=3,n_col=4):
    plt.figure(figsize=(1.8*n_col,2.4*n_row))
    plt.subplots_adjust(bottom=0,left=0.01,right=0.99,top=0.9,hspace=.35)
    for i in range(n_row*n_col):
        plt.subplot(n_row,n_col,i+1)
        plt.imshow(images[i].reshape((h,w)),cmap=plt.cm.gray)
        plt.title(titles[i],size=12)
        plt.xticks(())
        plt.yticks(())




def title(y_pred,y_test,target_names,i):
    pred_name = target_names[y_pred[i]].rsplit(' ',1)[-1]
    true_name = target_names[y_test[i]].rsplit(' ',1)[-1]
    return "predicted:%s\ntrue:       %s"%(pred_name,true_name)




prediction_titles = [title(y_pred,y_test,target_names,1) for i in range(y_pred.shape[0])]




plot_gallery(x_test,prediction_titles,h,w)




eigenface_titles=["eigenface %d"%i for i in range(eigenfaces.shape[0])]
plot_gallery(eigenfaces, eigenface_titles,h,w)
plt.show()


GridCV,一种几乎能自动查找最优参数调整的优秀 sklearn 工具







评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值