人脸分类
#用sklearn中自带的程序来加载数据
from sklearn.datasets import fetch_lfw_people
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
#数据集介绍:图片数大于等于70张人有3个,3个人加起来总图片有760张,每张图片大小50*37(总特征为1850个)
print(lfw_people.data.shape)
print(lfw_people.images.shape)
print(lfw_people.target.shape)
print(lfw_people.target_names.shape)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(3, 4) #plt.subplots函数,直接在函数内部设置子图纸信息,返回值: figure实例fig,AxesSubplot实例ax,fig代表整个图纸
#ax代表坐标轴和画的子图
for i, axi in enumerate(ax.flat):
axi.imshow(lfw_people.images[i], cmap='bone') #将标量数据映射到彩色图,将颜色 #映射为’bone‘
axi.set(xticks=[], yticks=[],xlabel=lfw_people.target_names[lfw_people.target[i]])
#由于每张图50*37,1850个像素点也就是1850个特征,所以得进行降维
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
#原始1875D==>150D,whiten白化:使每个特征具有相同的方差
pca = PCA(n_components=150, whiten=True, random_state=42)
#支持向量机用高斯核函数,class_weight每个类所占据的权重
svc = SVC(kernel='rbf', class_weight='balanced')
#使用pca进行数据降维之后再训练一个SVM的工作流程
model = make_pipeline(pca, svc)
#数据集划分
from sklearn.model_selection import train_test_split
Xtrain,Xtest,ytrain,ytest= train_test_split(lfw_people.data, lfw_people.target,random_state=40)
Xtrain.shape,Xtest.shape
from sklearn.