Relu层
卷积层和全连接层这种有可训练参数的,可以求梯度。Relu层怎么办?
先说结论:Relu(x),若x<=0,x的梯度为0,若x>0,梯度为x。
代码测试验证
测试一个简单的网络,pred=relu(WX),loss = mean(pred - y)。不考虑Relu时,W的梯度应是X。
import torch
import torch.nn as nn
def seed_torch(seed=0):
# random.seed(seed)
# os.environ['PYTHONHASHSEED'] = str(seed)
# np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.relu = nn.ReLU()
self.para = torch.nn.Parameter(torch.ones([3, 1]))
def forward(self, x, y):
pred = self.relu(torch.mm(x, self.para))
loss = torch.mean(pred - y)
return pred, loss
if __name__ == '__main__':
seed_torch(12)
x = torch.ones([4, 3])
# x[:, 1] = torch.zeros([4])
# x[:, 2] = -torch.ones([4])
y = torch.rand([4, 1])
model = Model()
p, l = model(x, y)
l.backward()
print(x)
# print(y)
print(x.grad)
print(model.para.grad)
-
第一种情况,WX>0
当x = torch.ones([4, 3])
,输出tensor([[1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.]]) None tensor([[1.], [1.], [1.]])
-
第二种情况,WX=0
当x = torch.ones([4, 3]) x[:, 1] = torch.zeros([4]) x[:, 2] = -torch.ones([4])
输出
tensor([[ 1., 0., -1.], [ 1., 0., -1.], [ 1., 0., -1.], [ 1., 0., -1.]]) None tensor([[0.], [0.], [0.]])
-
第二种情况,WX<0
当x = torch.ones([4, 3]) *(-1)
输出
tensor([[-1., -1., -1.], [-1., -1., -1.], [-1., -1., -1.], [-1., -1., -1.]]) None tensor([[0.], [0.], [0.]])