pytorch 注意 torch.Tensor.normal_(mean,std) 以及torch.nn.init.normal(tensor,mean,std) 有一个容易错的点
a=torch.zeros(16655,200)
b=torch.FloatTensor(1,200)
for i in range(4):
a[i].normal_(mean=4, std=5) #只会重复更新a[0]
a[i]=b.normal_(mean=4,std=5)
print(b)
print(a[i])
loss 函数
交叉熵函数
class torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
loss = nn.CrossEntropyLoss() #示例化为空
output = loss(logits, targets)
# input(logits 随便的数字) shape: N C 必须是二维向量
# target(类别分布) shape : N
# output Output: scalar. If reduce is False, then the same size N
# 所以如果是batch*length*emb_size 要 input = input.reshape(-1,emb_size)改变形状 targrt = target.shape(-1,1)
# CrossEntropyLoss(logits, targets) = nn.NLLLoss(nn.LogSoftmax(logits), target)
tf:
1. tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits,labels= ys )
直接输入稀疏数字标签来计算cross_entropy损失
2. tf.nn.softmax_cross_entropy_with_logits( logits=logits,labels= dense_labels )
直接输入向量形式标签计算cross_entropy损失
nll loss函数
loss(x,class)=−x[class]
input: log_softmax(logits)后的值,class 4,6,7
KLDivLoss
l_n = y_n (log y_n - x_n)
torch.nn.KLDivLoss(size_average=None, reduce=None, reduction='mean')
loss = nn.KLDivLoss() #示例化为空
output = loss( log_prob, prob)
# input shape: 任意大小 经过了log_softmax之后的值 N *
# target shape : 任意大小 经过了softmax之后的值 N *
# output :scalar. If reduce is False, then the same size N *
修改requires_grad途径
- a.requires_grad_(requires_grad=True)
- a=a.detach()
a=torch.tensor([2.0])
print(a.requires_grad) #False
a.requires_grad_(requires_grad=True)
print(a.requires_grad) #True
a.requires_grad_(requires_grad=False)
print(a.requires_grad) #False
a.requires_grad_(requires_grad=True)
a.detach()
print(a.requires_grad) #True
a.requires_grad_(requires_grad=True)
a = a.detach()
print(a.requires_grad) #False
a.requires_grad_(requires_grad=True)
a.detach_()
print(a.requires_grad) #False
detach的理解
可以理解为 a=a.detach()就把计算图切断了 把a当作一个值(叶节点附上去) a下边的计算图(包括a)都不进行反向求导
a = torch.randn(2, 2, requires_grad=True)
b = a * 2
c = b * 2
b.detach_()
c.sum().backward()
print(a.grad, b.grad, c.grad)
# 4 4
# 4 4
# None None
a = torch.randn(2, 2, requires_grad=True)
b = a * 2
b.detach_()
c = b * 2
c.sum().backward()
print(a.grad, b.grad, c.grad)
#报错: element 0 of variables does not require grad and does not have a grad_fn
a = torch.randn(2, 2, requires_grad=True)
b = a * 2
b.detach_()
c = b * 2 + a
c.sum().backward()
print(a.grad, b.grad, c.grad)
# tensor([[1., 1.],
# [1., 1.]]) None None
a = torch.randn(3, 2, requires_grad=True)
b = a * 2
b.detach_()
d = torch.tensor(0.0, requires_grad=True)
c = b * 2 + d
c.sum().backward()
print(a.grad, b.grad, c.grad, d.grad)
# None None None tensor(6.) 2*3
a = torch.randn(2, 2, requires_grad=True)
b = a * 2
d = a * 3
temp = b.detach()
c = temp * 2 + d
c.sum().backward()
print(a.grad, b.grad, c.grad, d.grad)
# 3 3
# 3 3
# None None None