% matplotlib inline
from pathlib import Path
import requests
DATA_PATH = Path( "data" )
PATH = DATA_PATH / "mnist"
PATH. mkdir( parents= True , exist_ok= True )
URL = "http://deeplearning.net/data/mnist/"
FILENAME = "mnist.pkl.gz"
if not ( PATH / FILENAME) . exists( ) :
content = requests. get( URL + FILENAME) . content
( PATH / FILENAME) . open ( "wb" ) . write( content)
import pickle
import gzip
with gzip. open ( ( PATH / FILENAME) . as_posix( ) , "rb" ) as f:
( ( x_train, y_train) , ( x_valid, y_valid) , _) = pickle. load( f, encoding= "latin-1" )
import torch
x_train, y_train, x_valid, y_valid = map (
torch. tensor, ( x_train, y_train, x_valid, y_valid)
)
n, c = x_train. shape
x_train, x_train. shape, y_train. min ( ) , y_train. max ( )
print ( x_train, y_train)
print ( x_train. shape)
print ( y_train. min ( ) , y_train. max ( ) )
tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]) tensor([5, 0, 4, ..., 8, 4, 8])
torch.Size([50000, 784])
tensor(0) tensor(9)
from torch import nn
class Mnist_NN ( nn. Module) :
def __init__ ( self) :
super ( ) . __init__( )
self. hidden1 = nn. Linear( 784 , 128 )
self. hidden2 = nn. Linear( 128 , 256 )
self. out = nn. Linear( 256 , 10 )
def forward ( self, x) :
x = F. relu( self. hidden1( x) )
x = F. relu( self. hidden2( x) )
x = self. out( x)
return x
net = Mnist_NN( )
print ( net)
Mnist_NN(
(hidden1): Linear(in_features=784, out_features=128, bias=True)
(hidden2): Linear(in_features=128, out_features=256, bias=True)
(out): Linear(in_features=256, out_features=10, bias=True)
)
可以打印我们定义好名字里的权重和偏置项
for name, parameter in net. named_parameters( ) :
print ( name, parameter, parameter. size( ) )
hidden1.weight Parameter containing:
tensor([[ 0.0120, 0.0259, 0.0166, ..., -0.0152, -0.0222, -0.0233],
[-0.0293, -0.0273, 0.0242, ..., 0.0102, 0.0084, -0.0278],
[-0.0223, -0.0338, -0.0335, ..., -0.0143, -0.0263, 0.0205],
...,
[-0.0221, 0.0060, -0.0015, ..., 0.0212, 0.0217, 0.0258],
[ 0.0228, 0.0161, -0.0326, ..., 0.0022, 0.0111, 0.0106],
[ 0.0069, 0.0217, -0.0232, ..., -0.0172, -0.0313, 0.0335]],
requires_grad=True) torch.Size([128, 784])
hidden1.bias Parameter containing:
tensor([-0.0121, 0.0077, -0.0112, 0.0305, -0.0219, 0.0249, 0.0284, -0.0054,
0.0316, 0.0202, -0.0145, 0.0091, -0.0197, 0.0346, 0.0083, -0.0249,
-0.0189, 0.0152, 0.0041, -0.0252, 0.0205, 0.0126, -0.0169, -0.0122,
0.0045, -0.0050, 0.0318, 0.0319, 0.0295, 0.0281, 0.0161, 0.0055,
-0.0348, -0.0132, 0.0169, 0.0155, 0.0082, 0.0167, -0.0062, -0.0114,
-0.0321, -0.0294, 0.0234, 0.0205, 0.0065, 0.0052, -0.0088, 0.0012,
-0.0179, 0.0318, 0.0084, -0.0089, 0.0010, 0.0237, -0.0336, -0.0011,
-0.0225, 0.0151, -0.0272, -0.0067, -0.0336, 0.0314, 0.0191, 0.0053,
-0.0256, 0.0265, -0.0194, -0.0259, 0.0111, -0.0245, 0.0200, -0.0141,
-0.0289, -0.0121, -0.0294, -0.0298, -0.0074, -0.0286, 0.0175, 0.0144,
-0.0219, 0.0290, 0.0240, 0.0042, 0.0163, 0.0220, -0.0294, -0.0095,
0.0010, -0.0282, 0.0147, -0.0338, -0.0012, 0.0215, -0.0316, 0.0254,
-0.0353, -0.0301, -0.0342, 0.0064, 0.0059, 0.0034, -0.0332, 0.0258,
-0.0124, 0.0234, -0.0195, -0.0349, -0.0225, -0.0038, 0.0087, -0.0242,
-0.0282, -0.0332, 0.0191, -0.0024, -0.0191, -0.0089, -0.0105, -0.0153,
0.0059, -0.0282, 0.0133, -0.0100, -0.0353, -0.0197, -0.0305, -0.0231],
requires_grad=True) torch.Size([128])
hidden2.weight Parameter containing:
tensor([[ 0.0186, 0.0666, -0.0187, ..., 0.0039, 0.0149, 0.0816],
[-0.0417, 0.0513, 0.0242, ..., 0.0791, -0.0284, -0.0841],
[-0.0849, -0.0528, 0.0599, ..., -0.0800, 0.0366, -0.0870],
...,
[ 0.0787, -0.0507, 0.0574, ..., -0.0635, 0.0433, 0.0102],
[-0.0868, -0.0364, 0.0820, ..., -0.0031, 0.0828, -0.0369],
[-0.0441, 0.0789, 0.0396, ..., 0.0203, -0.0785, 0.0585]],
requires_grad=True) torch.Size([256, 128])
hidden2.bias Parameter containing:
tensor([-0.0416, -0.0420, 0.0491, 0.0176, -0.0386, 0.0102, 0.0858, -0.0834,
-0.0246, 0.0652, -0.0475, 0.0035, 0.0877, -0.0435, -0.0626, -0.0246,
-0.0699, -0.0730, 0.0330, 0.0014, -0.0432, 0.0656, -0.0783, -0.0650,
-0.0084, 0.0270, -0.0274, 0.0057, -0.0327, 0.0395, -0.0580, -0.0589,
0.0090, -0.0079, -0.0736, 0.0626, -0.0091, -0.0106, 0.0610, 0.0723,
0.0370, 0.0838, 0.0010, 0.0845, 0.0783, -0.0557, -0.0822, 0.0068,
0.0224, 0.0880, 0.0393, 0.0526, -0.0111, 0.0748, -0.0824, 0.0090,
-0.0191, 0.0546, 0.0774, 0.0592, 0.0531, -0.0127, 0.0373, 0.0882,
0.0387, -0.0370, 0.0463, -0.0007, 0.0497, 0.0639, 0.0001, -0.0506,
-0.0562, -0.0578, 0.0413, -0.0166, 0.0213, -0.0203, 0.0554, 0.0593,
-0.0634, -0.0165, -0.0829, -0.0574, -0.0510, -0.0668, 0.0472, -0.0352,
0.0882, -0.0378, 0.0155, -0.0624, 0.0391, -0.0698, 0.0305, 0.0695,
-0.0211, -0.0635, 0.0415, -0.0625, -0.0380, -0.0754, 0.0502, -0.0281,
-0.0175, -0.0060, -0.0154, -0.0344, 0.0571, 0.0710, 0.0346, -0.0220,
0.0749, -0.0089, 0.0114, 0.0021, 0.0619, -0.0670, -0.0105, 0.0803,
-0.0086, 0.0107, -0.0242, 0.0735, -0.0748, -0.0610, 0.0421, 0.0878,
0.0120, -0.0221, 0.0143, -0.0493, -0.0181, 0.0288, 0.0613, 0.0099,
-0.0008, -0.0805, 0.0072, 0.0103, -0.0525, -0.0708, 0.0328, -0.0327,
-0.0052, 0.0778, -0.0592, -0.0170, -0.0095, 0.0361, -0.0582, -0.0170,
-0.0537, 0.0697, 0.0236, -0.0469, -0.0080, -0.0367, -0.0007, 0.0755,
0.0088, -0.0046, 0.0059, 0.0410, 0.0325, 0.0448, 0.0092, 0.0225,
-0.0023, -0.0391, -0.0792, -0.0383, 0.0792, -0.0743, -0.0697, 0.0622,
-0.0187, 0.0700, -0.0701, -0.0821, 0.0692, -0.0543, 0.0645, -0.0273,
-0.0788, -0.0747, -0.0113, 0.0035, 0.0715, 0.0504, -0.0625, 0.0463,
0.0535, 0.0637, -0.0479, 0.0753, 0.0065, -0.0429, 0.0639, 0.0679,
0.0105, -0.0276, 0.0489, 0.0812, 0.0686, -0.0392, -0.0209, 0.0043,
-0.0627, 0.0180, -0.0224, -0.0242, -0.0646, 0.0048, -0.0070, 0.0703,
0.0577, 0.0370, 0.0161, -0.0574, 0.0466, 0.0008, -0.0659, -0.0639,
0.0145, -0.0619, -0.0528, -0.0631, -0.0113, 0.0614, 0.0691, 0.0258,
0.0769, 0.0536, 0.0731, 0.0389, -0.0563, -0.0072, 0.0662, -0.0800,
-0.0709, 0.0871, 0.0502, -0.0597, -0.0665, 0.0445, -0.0200, 0.0413,
-0.0159, -0.0609, 0.0605, -0.0634, 0.0324, -0.0284, 0.0844, 0.0051],
requires_grad=True) torch.Size([256])
out.weight Parameter containing:
tensor([[ 0.0543, 0.0035, -0.0097, ..., -0.0019, 0.0612, 0.0419],
[ 0.0442, -0.0441, -0.0015, ..., -0.0134, -0.0397, -0.0460],
[ 0.0588, -0.0561, -0.0317, ..., 0.0018, -0.0420, -0.0504],
...,
[-0.0441, -0.0078, -0.0287, ..., 0.0445, 0.0500, 0.0285],
[-0.0365, -0.0455, 0.0440, ..., 0.0438, -0.0578, 0.0552],
[ 0.0159, -0.0479, 0.0240, ..., 0.0292, 0.0286, 0.0472]],
requires_grad=True) torch.Size([10, 256])
out.bias Parameter containing:
tensor([ 0.0338, 0.0310, -0.0097, 0.0514, -0.0249, 0.0430, -0.0194, -0.0056,
-0.0235, 0.0238], requires_grad=True) torch.Size([10])
使用TensorDataset和DataLoader来简化
import torch. nn. functional as F
loss_func = F. cross_entropy
def model ( xb) :
return xb. mm( weights) + bias
bs = 64
xb = x_train[ 0 : bs]
yb = y_train[ 0 : bs]
weights = torch. randn( [ 784 , 10 ] , dtype = torch. float , requires_grad = True )
bs = 64
bias = torch. zeros( 10 , requires_grad= True )
print ( loss_func( model( xb) , yb) )
tensor(11.9960, grad_fn=<NllLossBackward>)
from torch. utils. data import TensorDataset
from torch. utils. data import DataLoader
train_ds = TensorDataset( x_train, y_train)
train_dl = DataLoader( train_ds, batch_size = bs, shuffle = True )
valid_ds = TensorDataset( x_valid, y_valid)
valid_dl = DataLoader( valid_ds, batch_size = bs* 2 )
def get_data ( train_ds, valid_ds, bs) :
return (
DataLoader( train_ds, batch_size= bs, shuffle= True ) ,
DataLoader( valid_ds, batch_size = bs * 2 ) ,
)
一般在训练模型时加上model.train(),这样会正常使用Batch Normalization 和Dropout 测试的时候一般选择model.eval(),这样就不会使用Batch Normalization 和 Dropout
import numpy as np
def fit ( steps, model, loss_func, opt, train_dl, valid_dl) :
for step in range ( steps) :
model. train( )
for xb, yb in train_dl:
loss_batch( model, loss_func, xb, yb, opt)
model. eval ( )
with torch. no_grad( ) :
losses, nums = zip (
* [ loss_batch( model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np. sum ( np. multiply( losses, nums) ) / np. sum ( nums)
print ( '当前step:' + str ( step) , '验证集损失:' + str ( val_loss) )
from torch import optim
def get_model ( ) :
model = Mnist_NN( )
return model, optim. SGD( model. parameters( ) , lr = 0.001 )
def loss_batch ( model, loss_func, xb, yb, opt = None ) :
loss = loss_func( model( xb) , yb)
if opt is not None :
loss. backward( )
opt. step( )
opt. zero_grad( )
return loss. item( ) , len ( xb)
train_dl, valid_dl = get_data( train_ds, valid_ds, bs)
model, opt = get_model( )
fit( 25 , model, loss_func, opt, train_dl, valid_dl)
当前step:0 验证集损失:2.2804867317199706
当前step:1 验证集损失:2.2482234981536866
当前step:2 验证集损失:2.196887675476074
当前step:3 验证集损失:2.112893226242065
当前step:4 验证集损失:1.9761430267333984
当前step:5 验证集损失:1.770499384689331
当前step:6 验证集损失:1.5132826763153076
当前step:7 验证集损失:1.2579261003494262
当前step:8 验证集损失:1.0497708839416504
当前step:9 验证集损失:0.8955571031570435
当前step:10 验证集损失:0.7838414452552795
当前step:11 验证集损失:0.7014339096069336
当前step:12 验证集损失:0.6391226231575012
当前step:13 验证集损失:0.5898873178482056
当前step:14 验证集损失:0.5509584670066834
当前step:15 验证集损失:0.5195278914451599
当前step:16 验证集损失:0.4932431311607361
当前step:17 验证集损失:0.4716634550571442
当前step:18 验证集损失:0.4531318392276764
当前step:19 验证集损失:0.43725813541412356
当前step:20 验证集损失:0.4232536446094513
当前step:21 验证集损失:0.4111496852397919
当前step:22 验证集损失:0.4003682988405228
当前step:23 验证集损失:0.39091790931224824
当前step:24 验证集损失:0.38286286516189577