import os
import zipfile
import random
import json
import paddle
import sys
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from paddle.io import Dataset
实验目的 (1)探索VGG网络在鲜花识别中的应用:通过使用VGG网络,研究其在鲜花分类任务中的性能表现,验证其在小样本数据集上的适用性。 (2)提高鲜花分类的准确率:利用VGG网络的深度结构、超参数调整(迭代次数,batchsize等。。。)和预训练模型(预训练可选),优化鲜花分类模型的性能,提升分类准确率。 (3)掌握PaddlePaddle框架的使用:通过实际项目,熟悉PaddlePaddle框架在深度学习模型构建、训练和部署中的应用。 (4)数据增强与模型优化:学习如何通过数据增强技术(如随机旋转、翻转等)和模型优化方法(如调整学习率、使用Dropout等)提高模型的泛化能力
实验步骤 自行完成拟定填写,要求有完整的实验报告内容。
markdown内容为空,双击立刻开始创作
markdown内容为空,双击立刻开始创作#配置参数
train_parameters = {
"input_size":[3,224,224], #输入图片的shape
"class_dim": -1, #分类数
"src_path":"/home/aistudio/data/data6504/flower7595.zip", #原始数据集路径
"target_path":"/home/aistudio/data/", #要解压的路径
"train_list_path":"/home/aistudio/data/train.txt", #train.txt路径
"eval_list_path":"/home/aistudio/data/eval.txt", #eval.txt路径
"readme_path":"/home/aistudio/data/readme.json", #readme.json路径
"label_dict":{}, #标签字典
"num_epochs":1, #训练轮数
"train_bath_size":8, #训练时每个批次的大小
"skip_steps":10,
"save_steps":300,
"learning_strategy":{ #优化函数相关的配置
"lr":0.0001 #超参数学习率
},
"checkpoints":"/home/aistudio/work/checkpoints" #保存的路径
一、数据准备
(1)解压原始数据集
(2)按照比例划分训练集与验证集
(3)乱序,生成数据列表
(4)定义数据读取器
def unzip_data(src_path,target_path):
#解压原始数据集,将src_path路径下的zip压缩包解压至target_path目录下
if(not os.path.isdir(target_path +"Chinese Medicine")):
z=zipfile.ZipFile(src_path,"r")
z.extractall(path=target_path)
z.close()#函数 生成数据列表
def get_data_list(target_path,train_list_path,eval_list_path):
#存放所有类别的信息
class_detail = []
#获取所有类别保存的文件夹名称
data_list_path=target_path+"flowers/"
class_dirs = os.listdir(data_list_path)
#总的图像数量
all_class_images = 0
#存放类别标签
class_label=0
#存放类别数目
class_dim=0
#存储要写进eval.txt和train.txt中的内容
trainer_list=[]
eval_list=[]
#读取每个类别
for class_dir in class_dirs:
if class_dir != ".DS_Store":
class_dim += 1
#每个类别的信息
class_detail_list = {}
eval_sum = 0
trainer_sum = 0
#统计每个类别有多少张图片
class_sum = 0
#获取类别路径
path = data_list_path + class_dir
#获取所有图片
img_paths = os.listdir(path)
for img_path in img_paths: #遍历文件夹下的每个图片
if img_path.split(".")[-1] == "jpg":
name_path = path + '/' + img_path #每张图片的路径
if class_sum % 8 == 0: #每8张图片取一个做验证数据
eval_sum += 1 #test_sum为测试数据的数目
eval_list.append(name_path + "\t%d" % class_label + "\n")
else:
trainer_sum += 1
trainer_list.append(name_path + "\t%d" % class_label + "\n")
class_sum += 1 #每类图片的数目
all_class_images += 1 #所有类图片的数目
else:
continue
#说明的json文件的class_detail数据
class_detail_list['class_name'] = class_dir #类别名称
class_detail_list['class_label'] = class_label #类别标签
class_detail_list['class_eval_images'] = eval_sum #该类数据的测试集数目
class_detail_list['class_trainer_images'] = trainer_sum #该类数据的训练集数目
class_detail.append(class_detail_list)
#初始化标签列表
train_parameters['label_dict'][str(class_label)] = class_dir
class_label += 1
#初始化分类树
train_parameters['class_dim'] = class_dim
#乱序
random.shuffle(eval_list)
with open(eval_list_path,'a') as f:
for eval_image in eval_list:
f.write(eval_image)
random.shuffle(trainer_list)
with open(train_list_path, 'a') as f2:
for train_image in trainer_list:
f2.write(train_image)
# 说明的json文件信息
readjson = {}
readjson['all_class_name'] = data_list_path #文件父目录
readjson['all_class_images'] = all_class_images
readjson['class_detail'] = class_detail
jsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))
with open(train_parameters['readme_path'],'w') as f:
f.write(jsons)
print ('生成数据列表完成!')#参数初始化
src_path = train_parameters['src_path']
target_path = train_parameters['target_path']
train_list_path = train_parameters['train_list_path']
eval_list_path = train_parameters['eval_list_path']
#解压原始数据到指定路径
unzip_data(src_path,target_path)
#划分训练集与验证集,乱序,生成数据列表
#每次生成数据列表前,首先清空 train.txt 和 eval.txt
with open(train_list_path,'w') as f:
f.seek(0)
f.truncate()
with open(eval_list_path,'w') as f:
f.seek(0)
f.truncate()
#生成数据列表
get_data_list(target_path,train_list_path,eval_list_path)class Reader(Dataset):
def __init__(self,data_path,mode='train'):
'''
数据读取器
:param data_path:数据集所在路径
:param mode: train or eval
'''
super().__init__()
self.data_path = data_path
self.img_paths = []
self.labels = []
if mode =='train':
with open(os.path.join(self.data_path,"train.txt"),"r",encoding="utf-8") as f:
self.info = f.readlines()
for img_info in self.info:
img_path, label = img_info.strip().split('\t')
self.img_paths.append(img_path)
self.labels.append(int(label))
else:
with open(os.path.join(self.data_path,"eval.txt"),"r",encoding="utf-8") as f:
self.info = f.readlines()
for img_info in self.info:
img_path, label = img_info.strip().split('\t')
self.img_paths.append(img_path)
self.labels.append(int(label))
def __getitem__(self, index):
#获取一组数据 :param index: 文件索引号
#第一步打开图像文件并获取label值
img_path = self.img_paths[index]
img = Image.open(img_path)
if img.mode != 'RGB':
img = img.convert('RGB')
img = img.resize((224,224), Image.BILINEAR)
img = np.array(img).astype('float32')
img = img.transpose((2,0,1)) / 255
label = self.labels[index]
label = np.array([label],dtype="int64")
return img,label
def print_sample(self, index: int = 0):
print("文件名",self.img_paths[index],"\t标签值",self.labels[index])
def __len__(self):
return len(self.img_paths)#训练数据加载
train_dataset = Reader('/home/aistudio/data',mode='train')
train_loader = paddle.io.DataLoader(train_dataset, batch_size=16, shuffle=True)
#测试数据加载
eval_dataset = Reader('/home/aistudio/data',mode='eval')
eval_loader = paddle.io.DataLoader(eval_dataset, batch_size = 8, shuffle=False)train_dataset.print_sample(200)
print(train_dataset.__len__())
eval_dataset.print_sample(0)
print(eval_dataset.__len__())
print(eval_dataset.__getitem__(10)[0].shape)
print(eval_dataset.__getitem__(10)[1].shape)二、模型配置
VGG的核心是五组卷积操作,每两组之间做Max-Pooling空间降维。同一组内采用多次连续的3X3卷积,卷积核的数目由较浅组的64增多到最深组的512,同一组内的卷积核数目是一样的。卷积之后接两层全连 接层,之后是分类层。由于每组内卷积层的不同,有11、13、16、19层这几种模型,上图展示一个16层的网络结构。class ConvPool(paddle.nn.Layer):
#卷积+池化
def __init__(self,num_channels,num_filters,filter_size,pool_size,pool_stride,groups,conv_stride=1,conv_padding=1,):
super(ConvPool,self).__init__()
for i in range(groups):
self.add_sublayer( #添加子层实例
'bb_%d' % i,
paddle.nn.Conv2D( #layer
in_channels=num_channels, #通道数
out_channels=num_filters, #卷积核个数
kernel_size=filter_size, #卷积核大小
stride=conv_stride, #步长
padding = conv_padding #padding
)
)
self.add_sublayer(
'relu%d' % i,
paddle.nn.ReLU()
)
num_channels = num_filters
self.add_sublayer(
'Maxpool',
paddle.nn.MaxPool2D(
kernel_size=pool_size, #池化核大小
stride=pool_stride #池化步长
)
)
def forward(self,inputs):
x = inputs
for prefix, sub_layer in self.named_children():
#print(prefix,sub_layer)
x = sub_layer(x)
return x
class VGGNet(paddle.nn.Layer):
def __init__(self, class_dim=5):
super(VGGNet, self).__init__()
# 卷积+池化部分
self.convpool01 = ConvPool(3, 64, 3, 2, 2, groups=1) # [N,3,224,224] -> [N,64,112,112]
self.convpool02 = ConvPool(64, 128, 3, 2, 2, groups=1) # [N,64,112,112] -> [N,128,56,56]
self.convpool03 = ConvPool(128, 256, 3, 2, 2, groups=2) # 可根据需要继续加深
self.convpool04 = ConvPool(256, 512, 3, 2, 2, groups=2)
# 分类头
self.fc1 = paddle.nn.Linear(512 * 14 * 14, 4096)
self.fc2 = paddle.nn.Linear(4096, 4096)
self.fc3 = paddle.nn.Linear(4096, class_dim)
def forward(self, inputs, label=None):
out = self.convpool01(inputs)
out = self.convpool02(out)
out = self.convpool03(out)
out = self.convpool04(out) # [N,512,14,14]
out = paddle.flatten(out, start_axis=1)
out = paddle.nn.functional.relu(self.fc1(out))
out = paddle.nn.functional.relu(self.fc2(out))
logits = self.fc3(out) # [N, class_dim]
if label is not None:
acc = paddle.metric.accuracy(input=logits, label=label)
return logits, acc
else:
return logits三、模型训练