pytorch-introduction

最新推荐文章于 2025-05-10 15:19:48 发布

cx_0401

最新推荐文章于 2025-05-10 15:19:48 发布

阅读量163

点赞数

分类专栏：李宏毅课程笔记文章标签：深度学习

本文链接：https://blog.youkuaiyun.com/qq_40438523/article/details/116160168

版权

李宏毅课程笔记专栏收录该内容

11 篇文章

订阅专栏

pytorch-introduction

Google Colab
DNN Training Procedure
A simple example for XOR

Google Colab

link: https://colab.research.google.com/drive/1plDsa66koeaskb3YFag4CAX6FSyoJzBc

DNN Training Procedure

1. Overview

在这里插入图片描述

2. Tensor

2.1 constructor:

import numpy
import torch

# from list
x = torch.tensor([[1, 0], [0, 1]])
# from numpy
x = torch.tensor((numpy.array[[1, 0], [1, 2]]))
# all 0
x = torch.zeros([2, 2])
# all 1
x = torch.ones([1, 2, 4])

2.2 squeeze: remove the dimension with length = 1

x4 = torch.ones([1, 2, 4])
x5 = x4.squeeze(0)  # [2,4]
x6 = x4.squeeze(1)  # [1,2,4]
x4 = torch.ones([2, 1, 4])
x5 = x4.squeeze(1)  # [2,4]

2.3 unsqueeze: expand one dimension

x3 = torch.zeros([2, 2])
x5 = x3.unsqueeze(0)  # [1,2,2]

2.4 transpose: transpose two specified dimensions

x4 = torch.ones([1, 2, 4])
x5 = x4.transpose(0, 1)  # [2,1,4]

2.5 cat: concatenate multiple tensors

x4 = torch.ones([1, 2, 4])
x5 = torch.ones([1, 1, 4])
x6 = torch.ones([1, 3, 4])
x = torch.cat([x4, x5, x6], dim=1)  # [1,6,4]

2.6 operators: + , - , pow

x = x.pow(2)
y = x.sum()
y = x.mean()

2.7 device

x.to('cpu')
x.to('gpu')

2.8 calculate gradient

x = torch.tensor([1., 2.], requires_grad=True)
z = x.pow(2).sum()
z.backward()
print(x.grad)  # [2.,4.]

an example for min

import torch

x = torch.tensor([1.], requires_grad=True)
optimizer = torch.optim.SGD([{'params': x}], lr=0.1, momentum=0)

for i in range(100):
    optimizer.zero_grad()
    z = x.pow(2).sum()
    z.backward()
    optimizer.step()
    print(x)

for detach()
2.9 mean()

import torch
x = [
    [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
    [[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]
]
x = torch.FloatTensor(x)
print(x.mean(dim=0, keepdim=True))
print(x.mean(dim=1))
print(x.mean(dim=2))

在这里插入图片描述

3. Dataset

shuffle=True表示train=True，test=False，因此是乱序

import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset


class mydata(Dataset):
    def __init__(self):
        data = np.loadtxt('data/test.txt')
        self.xdata = torch.from_numpy(data[:, 0:-1])
        self.ydata = torch.from_numpy(data[:, [-1]])

    def __getitem__(self, item):
        return self.xdata[item], self.ydata[item]

    def __len__(self):
        return len(self.xdata)


set = mydata()

loader = DataLoader(set, batch_size=4, shuffle=True)

for epoch in range(2):
    for i, val in enumerate(loader):
        inputs, labels = val
        print(epoch, inputs, labels)

4 Neural Network Layers

4.1 linear layers: nn.linear(in_features, out_features)

layer = torch.nn.Linear(2, 4)
print(layer.weight)
print(layer.bias)

4.2 activation functions
nn.Sigmoid()
在这里插入图片描述

nn.Relu()
在这里插入图片描述
4.3 Loss functions

# mean squared error: for linear regression
nn.MSELoss()

#	cross entropy error: for classification
nn.CrossEntropyLoss()

https://zhuanlan.zhihu.com/p/98785902

4.4 Model

class myModel(torch.nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        # define your layer
        self.net = torch.nn.Sequential(
            torch.nn.Linear(4, 10),
            torch.nn.Sigmoid(),
            torch.nn.Linear(10, 1)
        )

	# compute your output
    def forward(self, x):
        return self.net(x)

4.5 Optimizer

torch.optim.SGD(params, lr, momentum= 0 ,dampening=0, weight_decay=0, nesterov=False)

It realize the method of random gradient descent.
$p a r a m s$ are the parameters you are intended to train
$l r$ is the learning rate of your model, $m o m e n t u m$ is just like the inertia(default as 0).

# disable the gradient calculation
torch.zero_grad()

It clears the derivative of weights to 0.

torch.step()

It update the weights.

4.6 Validation

4.7 Save & Load

torch.save(model.state_dict(), 'data/model')
ckpt = torch.load('data/model')
model.load_state_dict(ckpt)

A simple example for XOR

import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn


class mydata(Dataset):
    def __init__(self, file):
        data = np.loadtxt(file)
        self.xdata = torch.from_numpy(data[:, 0:-1])
        self.ydata = torch.from_numpy(data[:, [-1]])

    def __getitem__(self, item):
        return self.xdata[item], self.ydata[item]

    def __len__(self):
        return len(self.xdata)


class myModel(torch.nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        self.net = torch.nn.Sequential(
            nn.Linear(2, 10),
            nn.ReLU(),
            nn.Linear(10, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = myModel().to(device)
criterion = torch.nn.MSELoss()


def loadData(file):
    set = mydata(file)
    loader = DataLoader(set, batch_size=4, shuffle=True)
    return loader


def trainNet():
    loader = loadData('data/1.txt')
    optimizer = torch.optim.SGD(model.parameters(), 0.2)
    for epoch in range(5000):
        model.train()
        for i, val in enumerate(loader):
            optimizer.zero_grad()
            inputs, labels = val
            inputs, labels = inputs.to(torch.float32), labels.to(torch.float32)
            y = model(inputs)
            loss = criterion(y, labels)
            loss.backward()
            optimizer.step()
            if epoch % 100 == 0:
                print(epoch, inputs, labels, y)


def validation():
    set = mydata('data/2.txt')
    loader = DataLoader(set, batch_size=1, shuffle=False)
    model.eval()
    total_loss = 0
    for x, y in loader:
        x, y = x.to(torch.float32), y.to(torch.float32)
        with torch.no_grad():
            pre = model(x)
            loss = criterion(pre, y)
        total_loss += loss.cpu().item() * len(x)
    avg_loss = total_loss / len(loader.dataset)
    print(avg_loss)


trainNet()
validation()