本篇延续上篇内容,主要讲解train_with_keras.py跟face_predict_test.py文件,参考博客:https://www.cnblogs.com/neo-T/p/6477378.html ,该博客对于CNN网络构建原理、操作步骤、以及代码介绍都很详细,所以本文就不再将代码细节详说,只对一些出现的问题做一下说明,有不懂得读者可以直接查阅该博客。
用Keras库开始训练
train_with_keras.py文件中主要有Dataset跟Model两个类,类Dataset主要函数load将前面load_dataset.py中所得到的数据读出并运用Keras库相关函数对数据进行训练集、验证集、测试集分类,并打印出各个集的数目,再对各个集进行One-hot处理跟归一化处理用于后面模型训练。
Dataset类
class Dataset:
def __init__(self, path_name):
# training set
self.train_images = None
self.train_labels = None
# validation
self.valid_images = None
self.valid_labels = None
# testing set
self.test_images = None
self.test_labels = None
# loading direction
self.path_name = path_name
# input shape
self.input_shape = None
def load(self):
# load dataset
images, labels, nb_classes = load_data(self.path_name)
images = np.array(images) # transform list to array
_, img_rows, img_cols, img_channels = images.shape
train_images, valid_images, train_labels, valid_labels = train_test_split(
images, labels, test_size=0.3, random_state=random.randint(0, 100))
_, test_images, _, test_labels = train_test_split(images, labels, test_size=0.5,
random_state=random.randint(0, 100))
# if current dimension is 'th',the order of the input picture data is:channels,rows,cols;
# or is:rows,cols,channels
# Reorganize the training dataset in the order of dimensions required by the keras library
if K.image_dim_ordering() == 'th':
train_images = train_images.reshape(train_images.shape[0], img_channels,
img_rows, img_cols)
valid_images = valid_images.reshape(train_images.shape[0], img_channels,
img_rows, img_cols)
test_images = test_images.reshape(train_images.shape[0], img_channels,
img_rows, img_cols)
self.input_shape = (img_channels, img_rows, img_cols)
else:
self.input_shape = (img_rows, img_cols, img_channels)
# show the number of training set, validation, testing set
print('train samples: ', train_images.shape[0])
print('valid samples: ', valid_images.shape[0])
print('test samples: ', test_images.shape[0])
# use categorical_crossentropy as loss function, so vectorize those categories using
# one-hot encode according to the number of categories.
train_labels = np_utils.to_categorical(train_labels, nb_classes)
valid_labels = np_utils.to_categorical(valid_labels, nb_classes)
test_labels = np_utils.to_categorical(test_labels, nb_classes)
# floating pixel data for normalization
train_images = train_images.astype('float32')
valid_images = valid_images.astype('float32')
test_images = test_images.astype('float32')
# normalization
train_images /= 255
valid_images /= 255
test_images /= 255
self.train_images = train_images
self.train_labels = train_labels
self.valid_images = valid_images
self.valid_labels = valid_labels
self.test_images = test_images
self.test_labels = test_labels
原博客中函数train_test_split现在已经在model_selection模块中了需要进行更改。
images = np.array(images) # transform list to array
添加这句的原因是因为在后面进行One-hot处理时,要求的数据类型必须是array,而之前获得的images是list。
if K.image_dim_ordering() == 'th':
train_images = train_images.reshape(train_images.shape[0], img_channels,
img_rows, img_cols)
valid_images = valid_images.reshape(train_images.shape[0], img_channels,
img_rows, img_cols)
test_images = test_images.reshape(train_images.shape[0], img_channels,
img_rows, img_cols)
self.input_shape = (img_channels, img_rows, img_cols)
else:
self.input_shape = (img_rows, img_cols, img_channels)
这段因为最初获取的图片shape为[rows, cols, channels],所以满足tensorflow处理数据的要求(其实如果使用 tensorflow 的话这段都可以不必写,反正得到的图片shape都满足)。
Model类
Model类主要是创建卷积网络,训练模型,保存和加载模型,评估和预测模型。这部分代码基本跟参考博客没有区别,是keras库训练模型的一般流程,如果对其中某些函数参数或者细节有疑惑的,可以查看前面的参考博客以及前篇列出的Keras中文库介绍。还是附上代码吧:
class Model:
def __init__(self):
self.model = None
def build_model(self, dataset, nb_classes):
# create a net model
self.model = Sequential()
# network layers
self.model.add(Convolution2D(32, 3, 3, border_mode='same',
input_shape=dataset.input_shape))
self.model.add(Activation('relu'))
self.model.add(Convolution2D(32, 3, 3))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.25))
self.model.add(Convolution2D(64, 3, 3, border_mode='same'))
self.model.add(Activation('relu'))
self.model.add(Convolution2D(64, 3, 3))
self.model.add(Activation('relu'))
self.model.add(MaxPooling2D(pool_size=(2, 2)))
self.model.add(Dropout(0.25))
self.model.add(Flatten())
self.model.add(Dense(512))
self.model.add(Activation('relu'))
self.model.add(Dropout(0.5))
self.model.add(Dense(nb_classes))
self.model.add(Activation('softmax'))
self.model.summary() # summary of the output
def train(self, dataset, batch_size=40, nb_epoch=50, data_augmentation=True):
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) # Generate optimizer object
self.model.compile(loss='categorical_crossentropy', optimizer=sgd,
metrics=['accuracy']) # 模型配置
# Don't use data augmentation, that means, from the training data provided by us to
# create new training data by rotating, flipping, adding noise etc.,and
# artificially increase the number of model training data.
if not data_augmentation:
self.model.fit(dataset.train_images,
dataset.train_labels,
batch_size = batch_size,
nb_epoch = nb_epoch,
validation_data = (dataset.valid_images, dataset.valid_labels),
shuffle = True)
# using data augmentation( on-the-fly data augmentation)
else:
# Define a data generator
datagen = ImageDataGenerator(
featurewise_center = False, # 是否使输入数据去中心化
samplewise_center = False, # 是否使输入数据的每个样本均值为0
featurewise_std_normalization = False, # 是否数据标准化(输入数据除以数据集的标准差)
samplewise_std_normalization = False, # 是否将每个样本数据除以自身的标准差
zca_whitening = False, # 是否对输入数据施以ZCA白化
rotation_range = 20, # 数据增强图片随机转动角度(0-180)
width_shift_range = 0.2, # 数据增强图片水平偏移的幅度(0-1)
height_shift_range = 0.2, # 垂直
horizontal_flip = True, # 是否随机水平翻转
vertical_flip = False) # 是否随机垂直翻转
datagen.fit(dataset.train_images) # 计算整个训练样本的数量
# using generator to train data
self.model.fit_generator(datagen.flow(dataset.train_images, dataset.train_labels,
batch_size=batch_size),
samples_per_epoch=dataset.train_images.shape[0],
nb_epoch=nb_epoch,
validation_data=(dataset.valid_images, dataset.valid_labels))
# model path to restore model
def save_model(self, file_path):
self.model.save(file_path)
def load_model(self, file_path):
self.model = load_model(file_path)
def evaluate(self, dataset):
score = self.model.evaluate(dataset.test_images, dataset.test_labels, verbose=1)
print("%s: %.2f%%"%(self.model.metrics_names[1], score[1]*100))
def face_predict(self, image):
# 转化为浮点数标准化
image = image.astype('float32')
image /= 255
result = self.model.predict_proba(image)
print('result:', result)
result = self.model.predict_classes(image)
return result[0]
训练精度也在99.5%左右。
实施测试
train_test_keras.py文件已经将训练好的模型保存在.\model\face.model.h5中。face_predict_test.py使用opencv获取实时图片,再用预测函数face_predict将得到的人脸与训练模型进行对比评估得到预测结果,并在图片上打印出预测标签。
if __name__ == '__main__':
# =============================================================================
# choose specific camera on the terminal
# if len(sys.argv) != 2:
# print("Usage:%s camera_id\r\n" % (sys.argv[0]))
# sys.exit()
# =============================================================================
model = Model()
model.load_model(file_path='.\\model\\face.model.h5')
cap = cv.VideoCapture(0)
cascade_path = ".\\haarcascades\\haarcascade_frontalface_alt2.xml"
# cyclic detection and recognition of faces
while True:
_, image = cap.read()
image = cv.flip(image, 1)
image_gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
cascade = cv.CascadeClassifier(cascade_path)
faceRects = cascade.detectMultiScale(image_gray, scaleFactor=1.2, minNeighbors=3,
minSize=(32, 32))
if len(faceRects) > 0:
for faceRect in faceRects:
y, x, h, w = faceRect
imageRect = image[x-10:x+w+10, y-10:y+h+10]
# 先变为设定图片大小,再变为四维矩阵进行模型预测
imageRect = resize_image(imageRect, 64, 64)
imageRect = imageRect.reshape((1, 64, 64, 3))
# imageRect = imageRect.reshape((1, imageRect.shape[0], imageRect.shape[1],
# imageRect.shape[2]))
faceID = model.face_predict(imageRect)
cv.rectangle(image, (y-10, x-10), (y+h+10, x+w+10), color=(0, 255, 0), thickness=2)
cv.putText(image, '%s'%(faceID), (x+30, y+30), cv.FONT_HERSHEY_COMPLEX, 1, (255, 0, 255), 2)
print('%s'% (faceID))
cv.imshow("face detect", image)
if cv.waitKey(5) == 27:
break
cap.release()
cv.destroyAllWindows()
附上一张自己的测试图片:
总结
可以看到我的图片在s41文件夹中对应标签应该是40,确实是识别出来了,但是当我进行位置移动时,会识别成其他人脸,所以效果还是有待提高。考虑到本文所采用的训练集数量较小,模型可能训练有所偏差,所以后面可以考虑增加训练数据以及调整优化CNN网络结构。希望能对读者有所帮助。