手动实现方式:在计算loss时加上lambd*L2
rom mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
import random
import matplotlib as mpl
mpl.rcParams['figure.dpi']=120
import mxnet as mx
num_train=20
num_test=100
num_inputs=200
true_w=nd.ones((num_inputs,1))*0.01
true_b=0.05
X=nd.random.normal(shape=(num_train+num_test,num_inputs))
y=nd.dot(X,true_w)
y+=.01*nd.random_normal(shape=y.shape)
X_train,X_test=X[:num_train],X[num_train:]
Y_train,Y_test=y[:num_train],y[num_train:]
batch_size=1
def data_iter(num_examples):
idx=list(range(num_examples))
random.shuffle(idx)
for i in range(0,num_examples,batch_size):
j=nd.array(idx[i:min(i+batch_size,num_examples)])
yield X.take(j),y.take(j)
def init_params():
w=nd.random_normal(scale=1,shape=(num_inputs,1))
b=nd.zeros(shape=(1,))
params=[w,b]
for param in params:
param.attach_grad()
return params
def net(X,w,b):
return nd.dot(X,w)+b
def L2_penalty(w,b):
return ((w**2).sum+b**2)/2#L2_penalty是个标量,要加载Loss的每个值上
def square_loss(output,label):
return (yhat-y.reshape(yhat.shape))**2/2
def sgd(params,lr,batch_size):
for param in params:
param[:]=param-lr*param/batch_size
def test(net,params,X,y):
output=net(X,*params)#星号的意思是,本来net有三个参数,这里的params需要拆解成两个参数。
return square_loss(output,y).mean().asscalar()
def train(lambd):
epochs=10
learning_rate=0.05
w,b=params=init_params()
for e in range(epoch):
for data,label in data_iter(num_train):
with autograd.record():
output=net(data,*params)
loss=square_loss(output,label)+lambd*L2_penalty(*params)
loss.backward()
sgd(params,learning_rate,batch_size)
train_loss.append(test(net,params,X_train,y_train))
test_loss.append(test(net,params,X_test,y_test))
使用gluon实现正则化时,lambd并不是在计算loss的环节出现的,体现在trainer的参数里,weight_decay
trainer = gluon.Trainer(net.collect_params(), 'sgd', {
'learning_rate': learning_rate, 'wd': weight_decay})