实训总结。


在海云捷迅实习期间,我参与了多个实践练习项目,其中包括改变卷积神经网络的卷积层数,制作图片拼图小游戏,实现手写体识别和完成口罩识别。以下是我对这些实践练习的归纳总结:

一、改变卷积神经网络的卷积层数

在这个项目中,我通过调整卷积神经网络的卷积层数,使得精确率达到了72.29%。通过增加或减少卷积层数,我能够调整网络对图像的特征提取和学习能力。通过不断尝试和调整,我找到了最佳的卷积层数组合,使得精确率得到了显著提升。
在这里插入图片描述
在这里插入图片描述

# 定义网络结构
class network(nn.Module):
    def __init__(self,num_classes):
        super(network, self).__init__()
        #继承父类,即此处的nn.Module
        self.conv1 = nn.Sequential(nn.Conv2d(3,16,3,1,1),nn.ReLU(),nn.MaxPool2d(2,2))
        #第一个卷积后的大小是:int((32-3+2*1)/1+1) = 32,第一个最大池化结果:int((32-2+2*0)/2+1) = 16
        
        self.conv2 = nn.Sequential(nn.Conv2d(16,32,3,1,1),nn.ReLU(),nn.MaxPool2d(2,2))
        #第二个卷积后的大小:int((16-3 + 2* 1)/1+1) = 16 第二个最大池化结果:int((16-2+2*0)/2 + 1) = 8
        self.conv3 = nn.Sequential(nn.Conv2d(32,64,3,1,1),nn.ReLU(),nn.MaxPool2d(2,2))
         #第三个卷积后的大小:int((8-3 + 2* 1)/1+1) = 8, 第三个最大池化结果:int((8-2+2*0)/2 + 1) = 4
            
        #最终的图片size是(64,4,4)
        
        self.fc1 = nn.Linear(64*4*4,256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256,128)
        self.fc3 = nn.Linear(128,num_classes)
        
        #前向传播
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)
        #x.view(x.size()[0], -1)将前面多维度的tensor展平成一维
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
       
        return x

二、制作图片拼图小游戏

我通过学习图像处理技术,成功制作了一个我喜欢的图片拼图小游戏。在这个小游戏中,我将一张图片分割成多个小块,并通过交换这些小块的位置来完成拼图。通过学习图像处理和游戏开发技术,我深入理解了图像处理算法和游戏设计原理,并将其应用到实际项目中。

在这里插入图片描述

以下是代码模块:

import pygame, sys, random
from pygame.locals import *

# 一些常量
WINDOWWIDTH = 500
WINDOWHEIGHT = 500
BACKGROUNDCOLOR = (255, 255, 255)
BLUE = (0, 0, 255)
BLACK = (0, 0, 0)
FPS = 40

VHNUMS = 3
CELLNUMS = VHNUMS * VHNUMS
MAXRANDTIME = 100


# 退出
def terminate():
    pygame.quit()
    sys.exit()


# 随机生成游戏盘面
def newGameBoard():
    board = []
    for i in range(CELLNUMS):
        board.append(i)
    blackCell = CELLNUMS - 1
    board[blackCell] = -1

    for i in range(MAXRANDTIME):
        direction = random.randint(0, 3)
        if (direction == 0):
            blackCell = moveLeft(board, blackCell)
        elif (direction == 1):
            blackCell = moveRight(board, blackCell)
        elif (direction == 2):
            blackCell = moveUp(board, blackCell)
        elif (direction == 3):
            blackCell = moveDown(board, blackCell)
    return board, blackCell


# 若空白图像块不在最左边,则将空白块左边的块移动到空白块位置
def moveRight(board, blackCell):
    if blackCell % VHNUMS == 0:
        return blackCell
    board[blackCell - 1], board[blackCell] = board[blackCell], board[blackCell - 1]
    return blackCell - 1


# 若空白图像块不在最右边,则将空白块右边的块移动到空白块位置
def moveLeft(board, blackCell):
    if blackCell % VHNUMS == VHNUMS - 1:
        return blackCell
    board[blackCell + 1], board[blackCell] = board[blackCell], board[blackCell + 1]
    return blackCell + 1


# 若空白图像块不在最上边,则将空白块上边的块移动到空白块位置
def moveDown(board, blackCell):
    if blackCell < VHNUMS:
        return blackCell
    board[blackCell - VHNUMS], board[blackCell] = board[blackCell], board[blackCell - VHNUMS]
    return blackCell - VHNUMS


# 若空白图像块不在最下边,则将空白块下边的块移动到空白块位置
def moveUp(board, blackCell):
    if blackCell >= CELLNUMS - VHNUMS:
        return blackCell
    board[blackCell + VHNUMS], board[blackCell] = board[blackCell], board[blackCell + VHNUMS]
    return blackCell + VHNUMS


# 是否完成
def isFinished(board, blackCell):
    for i in range(CELLNUMS - 1):
        if board[i] != i:
            return False
    return True


# 初始化
pygame.init()
mainClock = pygame.time.Clock()

# 加载图片
gameImage = pygame.image.load('D:/python/pythonProject/static/Charlie.png')
gameRect = gameImage.get_rect()

# 设置窗口,窗口的宽度和高度取决于图片的宽高
windowSurface = pygame.display.set_mode((gameRect.width, gameRect.height))
pygame.display.set_caption('查理苏痛吻围城拼图')

cellWidth = int(gameRect.width / VHNUMS)
cellHeight = int(gameRect.height / VHNUMS)

finish = False

gameBoard, blackCell = newGameBoard()

# 游戏主循环
while True:
    for event in pygame.event.get():
        if event.type == QUIT:
            terminate()
        if finish:
            continue
        if event.type == KEYDOWN:
            if event.key == K_LEFT or event.key == ord('a'):
                blackCell = moveLeft(gameBoard, blackCell)
            if event.key == K_RIGHT or event.key == ord('d'):
                blackCell = moveRight(gameBoard, blackCell)
            if event.key == K_UP or event.key == ord('w'):
                blackCell = moveUp(gameBoard, blackCell)
            if event.key == K_DOWN or event.key == ord('s'):
                blackCell = moveDown(gameBoard, blackCell)
        if event.type == MOUSEBUTTONDOWN and event.button == 1:
            x, y = pygame.mouse.get_pos()
            col = int(x / cellWidth)
            row = int(y / cellHeight)
            index = col + row * VHNUMS
            if (
                    index == blackCell - 1 or index == blackCell + 1 or index == blackCell - VHNUMS or index == blackCell + VHNUMS):
                gameBoard[blackCell], gameBoard[index] = gameBoard[index], gameBoard[blackCell]
                blackCell = index

    if (isFinished(gameBoard, blackCell)):
        gameBoard[blackCell] = CELLNUMS - 1
        finish = True

    windowSurface.fill(BACKGROUNDCOLOR)

    for i in range(CELLNUMS):
        rowDst = int(i / VHNUMS)
        colDst = int(i % VHNUMS)
        rectDst = pygame.Rect(colDst * cellWidth, rowDst * cellHeight, cellWidth, cellHeight)

        if gameBoard[i] == -1:
            continue

        rowArea = int(gameBoard[i] / VHNUMS)
        colArea = int(gameBoard[i] % VHNUMS)
        rectArea = pygame.Rect(colArea * cellWidth, rowArea * cellHeight, cellWidth, cellHeight)
        windowSurface.blit(gameImage, rectDst, rectArea)

    for i in range(VHNUMS + 1):
        pygame.draw.line(windowSurface, BLACK, (i * cellWidth, 0), (i * cellWidth, gameRect.height))
    for i in range(VHNUMS + 1):
        pygame.draw.line(windowSurface, BLACK, (0, i * cellHeight), (gameRect.width, i * cellHeight))

    pygame.display.update()

    mainClock.tick(FPS)


三、实现手写体识别

在这个项目中,我利用全连接算子实现了手写体识别。通过收集手写数字的数据集,并利用全连接算子进行特征提取和分类,我成功实现了对手写数字的准确识别。这个项目让我更深入地了解了全连接算子的原理和应用,并提高了我的机器学习和数据处理能力。

在这里插入图片描述

四、完成口罩识别

在这个项目中,我成功完成了口罩识别。通过收集和标记带有口罩和不带口罩的人脸图像,并利用深度学习算法进行训练和模型构建,在实际场景中实现了精确的口罩识别。这个项目提高了我在计算机视觉领域的知识和技能,并加深了我对深度学习和模型训练的理解。

from openvino.runtime import Core
import openvino.runtime as ov
import cv2 as cv
import numpy as np
import tensorflow as tf

OpenVINO 模型推理器(class)

class Predictor:
    """
    OpenVINO 模型推理器
    """
    def __init__(self, model_path):
        ie_core = Core()
        model = ie_core.read_model(model=model_path)
        self.compiled_model = ie_core.compile_model(model=model, device_name="CPU")
    def get_inputs_name(self, num):
        return self.compiled_model.input(num)
    
    def get_outputs_name(self, num):
        return self.compiled_model.output(num)
    
    def predict(self, input_data):
        return self.compiled_model([input_data])
    
    def get_request(self):
        return self.compiled_model.create_infer_request()


图像预处理

def process_image(input_image, size):
    """输入图片与处理方法,按照PP-Yoloe模型要求预处理图片数据

    Args:
        input_image (uint8): 输入图片矩阵
        size (int): 模型输入大小

    Returns:
        float32: 返回处理后的图片矩阵数据
    """
    max_len = max(input_image.shape)
    img = np.zeros([max_len,max_len,3],np.uint8)
    img[0:input_image.shape[0],0:input_image.shape[1]] = input_image # 将图片放到正方形背景中
    img = cv.cvtColor(img,cv.COLOR_BGR2RGB)  # BGR转RGB
    img = cv.resize(img, (size, size), cv.INTER_NEAREST) # 缩放图片
    img = np.transpose(img,[2, 0, 1]) # 转换格式
    img = img / 255.0 # 归一化
    img = np.expand_dims(img,0) # 增加维度
    return img.astype(np.float32)


图像后处理

def process_result(box_results, conf_results):
    """按照PP-Yolove模型输出要求,处理数据,非极大值抑制,提取预测结果

    Args:
        box_results (float32): 预测框预测结果
        conf_results (float32): 置信度预测结果
    Returns:
        float: 预测框
        float: 分数
        int: 类别
    """
    conf_results = np.transpose(conf_results,[0, 2, 1]) # 转置
    # 设置输出形状
    box_results =box_results.reshape(8400,4) 
    conf_results = conf_results.reshape(8400,2)
    scores = []
    classes = []
    boxes = []
    for i in range(8400):
        conf = conf_results[i,:] # 预测分数
        score = np.max(conf) # 获取类别
        # 筛选较小的预测类别
        if score > 0.5:
            classes.append(np.argmax(conf)) 
            scores.append(score) 
            boxes.append(box_results[i,:])
    scores = np.array(scores)
    boxes = np.array(boxes)
    
    result_box = []
    result_score = []
    result_class = []
    # 非极大值抑制筛选重复的预测结果
    if len(boxes) != 0:
        # 非极大值抑制结果
        indexs = tf.image.non_max_suppression(boxes,scores,len(scores),0.25,0.35)
        for i, index in enumerate(indexs):
            result_score.append(scores[index])
            result_box.append(boxes[index,:])
            result_class.append(classes[index])
    # 返回结果
    return np.array(result_box),np.array(result_score),np.array(result_class)


画出预测框

def draw_box(image, boxes, scores, classes, labels):
    """将预测结果绘制到图像上

    Args:
        image (uint8): 原图片
        boxes (float32): 预测框
        scores (float32): 分数
        classes (int): 类别
        lables (str): 标签

    Returns:
        uint8: 标注好的图片
    """
    colors = [(0, 0, 255), (0, 255, 0)]
    scale = max(image.shape) / 640.0 # 缩放比例
    if len(classes) != 0:
        for i in range(len(classes)):
            box = boxes[i,:]
            x1 = int(box[0] * scale)
            y1 = int(box[1] * scale)
            x2 = int(box[2] * scale)
            y2 = int(box[3] * scale)
            label = labels[classes[i]]
            score = scores[i]
            cv.rectangle(image, (x1, y1), (x2, y2), colors[classes[i]], 2, cv.LINE_8)
            cv.putText(image,label+":"+str(score),(x1,y1-10),cv.FONT_HERSHEY_SIMPLEX, 0.55, colors[classes[i]], 2)
        
    return image


读取标签

def read_label(label_path):
    with open(label_path, 'r') as f:
        labels = f.read().split()
    return labels

同步推理

label_path = "labels.txt"
yoloe_model_path = "ppyoloe_crn_s_80.xml"
predictor = Predictor(model_path = yoloe_model_path)
boxes_name = predictor.get_outputs_name(0)
conf_name = predictor.get_outputs_name(1)
labels = read_label(label_path=label_path)
cap = cv.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    frame = cv.flip(frame, 180)
    cv.namedWindow("MaskDetection", 0)  # 0可调大小,注意:窗口名必须imshow里面的一窗口名一直
    cv.resizeWindow("MaskDetection", 640, 480)    # 设置长和宽
    input_frame = process_image(frame, 640)
    results = predictor.predict(input_data=input_frame)
    boxes, scores, classes = process_result(box_results=results[boxes_name], conf_results=results[conf_name])
    result_frame = draw_box(image=frame, boxes=boxes, scores=scores, classes=classes, labels=labels)
    cv.imshow('MaskDetection', result_frame)
    key = cv.waitKey(1)
    if key == 27: #esc退出
        break
cap.release()
cv.destroyAllWindows()

异步推理

label_path = "labels.txt"
yoloe_model_path = "ppyoloe_crn_s_80.xml"
predictor = Predictor(model_path = yoloe_model_path)
input_layer = predictor.get_inputs_name(0)
labels = read_label(label_path=label_path)
cap = cv.VideoCapture(0)
curr_request = predictor.get_request()
next_request = predictor.get_request()
ret, frame = cap.read()
curr_frame = process_image(frame, 640)
curr_request.set_tensor(input_layer, ov.Tensor(curr_frame))
curr_request.start_async()
while cap.isOpened():
    ret, next_frame = cap.read()
    next_frame = cv.flip(next_frame, 180)
    cv.namedWindow("MaskDetection", 0)  # 0可调大小,注意:窗口名必须imshow里面的一窗口名一直
    cv.resizeWindow("MaskDetection", 640, 480)    # 设置长和宽
    in_frame = process_image(next_frame, 640)
    next_request.set_tensor(input_layer, ov.Tensor(in_frame))
    next_request.start_async()
    if curr_request.wait_for(-1) == 1:
        boxes_name = curr_request.get_output_tensor(0).data
        conf_name = curr_request.get_output_tensor(1).data
        boxes, scores, classes = process_result(box_results=boxes_name, conf_results=conf_name)
        frame = draw_box(image=frame, boxes=boxes, scores=scores, classes=classes, labels=labels)
        cv.imshow('MaskDetection', frame)
    frame = next_frame
    curr_request, next_request = next_request, curr_request
    key = cv.waitKey(1)
    if key == 27: #esc退出
        break
cap.release()
cv.destroyAllWindows()

五、总结

通过这些实践练习项目,我获得了宝贵的实际经验和技能提升。我学会了调整卷积神经网络的结构来提高精确率,掌握了图像处理的技术和游戏开发的原理,理解了全连接算子在手写体识别中的应用,以及在口罩识别项目中应用深度学习算法的过程。这些实践练习不仅增强了我的实际操作能力,还培养了我解决问题和团队合作的能力。

通过海云捷迅的实习,我将这些实践练习所学到的知识和技能应用到了实际项目中,并不断提升自己的专业能力和职业素养。我相信这些经历将对我的未来职业发展产生积极的影响,并为我的技术和职业道路打下坚实的基础。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值