PyTorch入门精简资料(七)手写数字识别

本文通过PyTorch介绍手写数字识别,包括MNIST数据集的图像处理,利用LeNet5模型进行识别,并提供了Datawhale的相关参考资料。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Datawhale_Task7    手写数字识别
用PyTorch完成手写数字识别

1.MNIST数据集转换为图像

import os
import struct
import numpy as np
import cv2

def create_txt(path_txt, str_data):
    if not os.path.exists(path_txt):
        with open(path_txt, 'w') as f:
            print(f)
    with open (path_txt, 'a') as f:
        f.write(str_data)

def load_mnist(images_path,labels_path):
    """Load MNIST data from `path`"""
    # labels_path = "./data/raw/train-labels-idx1-ubyte"
    # images_path = "./data/raw/train-images-idx3-ubyte"
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)
    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)
    return images, labels

def save_mnist(images_path, labels_path, kind):
    imgs, labels = load_mnist(images_path, labels_path)
    root = './data'
    for i in range(imgs.shape[0]):
        imgname = '%05d'%i +'.png'
        imgpath = os.path.join(root, kind)
        if not os.path.exists(imgpath):
            os.mkdir(imgpath)
        img = imgs[i].reshape(28,28)
        cv2.imwrite(os.path.join(imgpath,imgname), img)
        path_txt = os.path.join(root, kind+'.txt')
        create_txt(path_txt, os.path.join(imgpath,imgname)+' '+str(labels[i])+'\n')

if __name__=="__main__":
    images_path = "./data/raw/train-images-idx3-ubyte"
    labels_path = "./data/raw/train-labels-idx1-ubyte"
    save_mnist(images_path, labels_path,'train')
    images_path = "./data/raw/t10k-images-idx3-ubyte"
    labels_path = "./data/raw/t10k-labels-idx1-ubyte"
    save_mnist(images_path, labels_path, 'test')

2.LeNet5实现手写数字识别

import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets

batch_size = 128
learning_rate = 0.01
num_epoch = 10

# 实例化MNIST数据集对象
train_data = datasets.MNIST('./data', train=True, transform=transforms.ToTensor(), download=True)
test_data = datasets.MNIST('./data', train=False, transform=transforms.ToTensor(), download=True)

# train_loader:以batch_size大小的样本组为单位的可迭代对象
train_loader = DataLoader(train_data, batch_size, shuffle=True)
test_loader = DataLoader(test_data)


class CNN(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_dim, 6, 3, stride=1, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(6)
        self.relu = nn.ReLU(True)
        self.conv2 = nn.Conv2d(6, 16, 5, stride=1, padding=0)
        self.pool = nn.MaxPool2d(2, 2)
        self.batch_norm2 = nn.BatchNorm2d(16)

        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, out_dim)

    def forward(self, x):
        x = self.batch_norm1(self.conv1(x))
        x = F.relu(x)
        x = self.pool(x)
        x = self.batch_norm2(self.conv2(x))
        x = self.relu(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


isGPU = torch.cuda.is_available()
print(isGPU)
model = CNN(1, 10)
if isGPU:
    model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
for epoch in range(num_epoch):
    running_acc = 0.0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 1):  # train_loader:以batch_size大小的样本组为单位的可迭代对象
        img, label = data
        img = Variable(img)
        label = Variable(label)
        if isGPU:
            img = img.cuda()
            label = label.cuda()
        # forward
        out = model(img)
        loss = criterion(out, label)
        # print(label)
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, pred = torch.max(out, dim=1)  # 按维度dim 返回最大值
        running_loss += loss.item() * label.size(0)
        current_num = (pred == label).sum()  # variable
        acc = (pred == label).float().mean()  # variable
        running_acc += current_num.item()

        if i % 100 == 0:
            print("epoch: {}/{}, loss: {:.6f}, running_acc: {:.6f}"
                  .format(epoch + 1, num_epoch, loss.item(), acc.item()))
    print("epoch: {}, loss: {:.6f}, accuracy: {:.6f}".format(epoch + 1, running_loss, running_acc / len(train_data)))

model.eval()
current_num = 0
for i, data in enumerate(test_loader, 1):
    img, label = data
    if isGPU:
        img = img.cuda()
        label = label.cuda()
    with torch.no_grad():
        img = Variable(img)
        label = Variable(label)
    out = model(img)
    _, pred = torch.max(out, 1)
    current_num += (pred == label).sum().item()

print("Test result: accuracy: {:.6f}".format(float(current_num / len(test_data))))

torch.save(model.state_dict(), './cnn.pth')  # 保存模型

3.手写数字识别(参考:Datawhale)

import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms

#device
device=torch.device('cuda' if torch.cuda.is_available else 'cpu')
#params
num_epochs=5
num_classes=10
batch_size=64
learning_rate=0.001

#dataset
train_dataset=torchvision.datasets.MNIST(root='./data',
                                        train=True,
                                        download=False,
                                        transform=transforms.ToTensor())

test_dataset=torchvision.datasets.MNIST(root='./data',
                                        train=False,
                                        download=False,
                                        transform=transforms.ToTensor())

#dataloader
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,
                                        batch_size=batch_size,
                                        shuffle=True)
test_loader=torch.utils.data.DataLoader(dataset=test_dataset,
                                       batch_size=batch_size,
                                       shuffle=False)

#cnn
class ConvNet(nn.Module):
    def __init__(self,num_classes=10):
        super(ConvNet,self).__init__()
        self.layer1=nn.Sequential(
            nn.Conv2d(1,16,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.layer2=nn.Sequential(
            nn.Conv2d(16,32,kernel_size=5,stride=1,padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.fc=nn.Linear(7*7*32,num_classes)
        
    def forward(self,x):
        out=self.layer1(x)
        out=self.layer2(out)
        out=out.reshape(out.size(0),-1)
        out=self.fc(out)
        return out

#model
model=ConvNet(num_classes).to(device)

#loss
criterion=nn.CrossEntropyLoss()
#optimizer
optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate)

total_step = len(train_loader)
#iterations
for epoch in range(num_epochs):
    for i,(images,labels) in enumerate(train_loader):
        images=images.to(device)
        labels=labels.to(device)
        
        #forward
        output=model(images)
        loss=criterion(output,labels)
        
        #backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1)%100==0:
            print('Epoch:[{}/{}],Step:[{}/{}],Loss:{:.4f}'.format(epoch+1,num_epochs,i+1,total_step,loss.item()))

#eval
model.eval()
with torch.no_grad():
    correct=0
    total=0
    for images,labels in test_loader:
        images=images.to(device)
        labels=labels.to(device)
        outputs=model(images)
        _,predicted=torch.max(outputs.data,1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()
        
print('Test Accuracy of the model on 10000 test images is :{} %'.format(100*correct/total))

 

 

参考:pytorch建立自己的数据集并加载

          LeNet手写数字识别

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值