masked_?
masked_select
# ~对Boolean取反
# torch.masked_select(x, mask) 与 x[mask]等效
# Returns a new 1-D tensor which indexes the input tensor according to the boolean mask mask which is a BoolTensor.
x = torch.randn(3, 4)
mask = x.ge(0.5)
x
Out[5]:
tensor([[-0.3873, -0.4278, 0.1722, -0.9274],
[ 0.5413, 1.3651, 2.1112, -0.5613],
[-0.2118, -0.8192, 1.1267, 1.2863]])
mask
Out[6]:
tensor([[False, False, False, False],
[ True, True, True, False],
[False, False, True, True]])
torch.masked_select(x, mask)
Out[7]: tensor([0.5413, 1.3651, 2.1112, 1.1267, 1.2863])
torch.masked_select(x, ~mask)
Out[16]: tensor([-0.3873, -0.4278, 0.1722, -0.9274, -0.5613, -0.2118, -0.8192])
x[mask]
Out[8]: tensor([0.5413, 1.3651, 2.1112, 1.1267, 1.2863])
x[~mask]
Out[15]: tensor([-0.3873, -0.4278, 0.1722, -0.9274, -0.5613, -0.2118, -0.8192])
masked_fill
# Fills elements of self tensor with value where mask is True.
x
Out[18]:
tensor([[-0.3873, -0.4278, 0.1722, -0.9274],
[ 0.5413, 1.3651, 2.1112, -0.5613],
[-0.2118, -0.8192, 1.1267, 1.2863]])
mask
Out[19]:
tensor([[False, False, False, False],
[ True, True, True, False],
[False, False, True, True]])
x.masked_fill(mask, 0)
Out[20]:
tensor([[-0.3873, -0.4278, 0.1722, -0.9274],
[ 0.0000, 0.0000, 0.0000, -0.5613],
[-0.2118, -0.8192, 0.0000, 0.0000]])
torch.masked_fill(x, mask, 0)
Out[21]:
tensor([[-0.3873, -0.4278, 0.1722, -0.9274],
[ 0.0000, 0.0000, 0.0000, -0.5613],
[-0.2118, -0.8192, 0.0000, 0.0000]])
masked_scatter
# Copies elements from source into self tensor at positions where the mask is True
x
Out[25]:
tensor([[-0.3873, -0.4278, 0.1722, -0.9274],
[ 0.5413, 1.3651, 2.1112, -0.5613],
[-0.2118, -0.8192, 1.1267, 1.2863]])
mask
Out[26]:
tensor([[False, False, False, False],
[ True, True, True, False],
[False, False, True, True]])
x.masked_scatter(mask, torch.ones_like(x))
Out[27]:
tensor([[-0.3873, -0.4278, 0.1722, -0.9274],
[ 1.0000, 1.0000, 1.0000, -0.5613],
[-0.2118, -0.8192, 1.0000, 1.0000]])
torch.masked_scatter(x, mask, torch.ones_like(x))
Out[28]:
tensor([[-0.3873, -0.4278, 0.1722, -0.9274],
[ 1.0000, 1.0000, 1.0000, -0.5613],
[-0.2118, -0.8192, 1.0000, 1.0000]])
重要 梯度传播
# 以下3种方式均不影响梯度传播过程
# out = out[mask]
# out = torch.masked_select(out, mask)
# out = out[0:2, 0:2]
import torch
from torch import nn
import torch.optim as optim
from torch.nn import functional as F
mask = torch.tensor([[True, True, False, False],
[True, True, False, False],
[False, False, False, False]])
x = torch.tensor([[-0.3873, -0.4278, 0.1722, -0.9274],
[0.5413, 1.3651, 2.1112, -0.5613],
[-0.2118, -0.8192, 1.1267, 1.2863]])
# [1, 1, 3, 4] -> [1, 1, 3, 4]
net = nn.Conv2d(1, 1, 3, 1, 1)
optimizer = optim.SGD(net.parameters(), lr=0.002, momentum=0.99)
for i in range(10):
out = net(x.view(1, 1, 3, 4)) # [1, 1, 3, 4]
out = torch.squeeze(out) # [3, 4]
# out = out[mask] 等效于 out = torch.masked_select(out, mask) 等效于
out = out[0:2, 0:2]
loss = F.mse_loss(out, torch.zeros(out.shape)) # [1]
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# tensor(0.2077, grad_fn=<MseLossBackward>)
# tensor(0.2038, grad_fn=<MseLossBackward>)
# tensor(0.1963, grad_fn=<MseLossBackward>)
# tensor(0.1856, grad_fn=<MseLossBackward>)
# tensor(0.1720, grad_fn=<MseLossBackward>)
# tensor(0.1561, grad_fn=<MseLossBackward>)
# tensor(0.1387, grad_fn=<MseLossBackward>)
# tensor(0.1205, grad_fn=<MseLossBackward>)
# tensor(0.1021, grad_fn=<MseLossBackward>)
# tensor(0.0843, grad_fn=<MseLossBackward>)
gather & scatter
# gather
out[i][j][k] = input[index[i][j][k]][j][k] # if dim == 0
out[i][j][k] = input[i][index[i][j][k]][k] # if dim == 1
out[i][j][k] = input[i][j][index[i][j][k]] # if dim == 2
# scatter
self[index[i][j][k]][j][k] = src[i][j][k] # if dim == 0
self[i][index[i][j][k]][k] = src[i][j][k] # if dim == 1
self[i][j][index[i][j][k]] = src[i][j][k] # if dim == 2
torch.index_select
>>> x = torch.randn(3, 4)
>>> x
tensor([[ 0.1427, 0.0231, -0.5414, -1.0009],
[-0.4664, 0.2647, -0.1228, -1.1068],
[-1.1734, -0.6571, 0.7230, -0.6004]])
>>> indices = torch.tensor([0, 2])
>>> torch.index_select(x, 0, indices)
tensor([[ 0.1427, 0.0231, -0.5414, -1.0009],
[-1.1734, -0.6571, 0.7230, -0.6004]])
>>> torch.index_select(x, 1, indices)
tensor([[ 0.1427, -0.5414],
[-0.4664, -0.1228],
[-1.1734, 0.7230]])
torch.take
# 多维tensor按照一维的下标取
>>> src = torch.tensor([[4, 3, 5],
... [6, 7, 8]])
>>> torch.take(src, torch.tensor([0, 2, 5]))
tensor([ 4, 5, 8])
torch.where
可通过换号转变为torch.masked_scatter
# True取A,False的位置取b
>>> x = torch.randn(3, 2)
>>> y = torch.ones(3, 2)
>>> x
tensor([[-0.4620, 0.3139],
[ 0.3898, -0.7197],
[ 0.0478, -0.1657]])
>>> torch.where(x > 0, x, y) # 等效于torch.masked_scatter(x, x<=0, y)
tensor([[ 1.0000, 0.3139],
[ 0.3898, 1.0000],
[ 0.0478, 1.0000]])
>>> x = torch.randn(2, 2, dtype=torch.double)
>>> x
tensor([[ 1.0779, 0.0383],
[-0.8785, -1.1089]], dtype=torch.float64)
>>> torch.where(x > 0, x, 0.)
tensor([[1.0779, 0.0383],
[0.0000, 0.0000]], dtype=torch.float64)
cat & stack
a1 = torch.rand(4,3,32,32)
a2 = torch.rand(5,3,32,32)
# cat
t = torch.cat([a1, a2, a2], dim=0)
t.shape
Out[76]: torch.Size([14, 3, 32, 32])
# stack
torch.stack([a1, a1], dim=0).shape
Out[79]: torch.Size([2, 4, 3, 32, 32])
torch.stack([a1, a1], dim=1).shape
Out[80]: torch.Size([4, 2, 3, 32, 32])
torch.stack([a1, a1], dim=3).shape
Out[81]: torch.Size([4, 3, 32, 2, 32])
chunk & split & unbind
a1 = torch.rand(4,3,32,32)
a2 = torch.rand(5,3,32,32)
# 按长度分 split
t1, t2 = a1.split(2, dim=0)
t1.shape, t2.shape
Out[86]: (torch.Size([2, 3, 32, 32]), torch.Size([2, 3, 32, 32]))
t1, t2 = a1.split([1,3], dim=0)
t1.shape, t2.shape
Out[88]: (torch.Size([1, 3, 32, 32]), torch.Size([3, 3, 32, 32]))
# 按份数分
t1, t2 = a1.chunk(2, dim=0)
t1.shape, t2.shape
Out[90]: (torch.Size([2, 3, 32, 32]), torch.Size([2, 3, 32, 32]))
t1, t2, t3, t4 = a1.chunk(4, dim=0)
t1.shape, t2.shape, t3.shape, t4.shape
Out[93]:
(torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]))
t1, t2, t3, t4 = a1.chunk(4, dim=0)
t1.shape, t2.shape, t3.shape, t4.shape
Out[59]:
(torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]))
# torch.unbind(input, dim=0) → seq Removes a tensor dimension.
# a1.unbind(dim=i) 等效于 a1.chunk(a1.shape[i], dim=i)
t1, t2, t3, t4 = a1.unbind(dim=0)
t1.shape, t2.shape, t3.shape, t4.shape
Out[61]:
(torch.Size([3, 32, 32]),
torch.Size([3, 32, 32]),
torch.Size([3, 32, 32]),
torch.Size([3, 32, 32]))
t1, t2, t3, t4 = a1.chunk(4, dim=0)
t1.shape, t2.shape, t3.shape, t4.shape
Out[59]:
(torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]),
torch.Size([1, 3, 32, 32]))
squeeze & unsqueeze
# 降低维度,去除指定位置的1 torch.unsqueeze(input, dim)
# 升高维度,在指定位置添加1 torch.squeeze(input, dim=None, *, out=None)
t = torch.randn(5, 1, 4, 1, 10)
t.squeeze().shape
Out[50]: torch.Size([5, 4, 10])
t.squeeze(dim=3).shape
Out[51]: torch.Size([5, 1, 4, 10])
t.unsqueeze(dim=0).shape
Out[52]: torch.Size([1, 5, 1, 4, 1, 10])
reshape & view
Me:直接全用reshape即可
- 当tensor满足连续性要求时,reshape() = view(),和原来tensor共用内存
- 当tensor不满足连续性要求时,reshape() = contiguous() + view(),会产生新的存储区的tensor,与原来tensor不共用内存
tensor多维数组底层使用一块连续内存的一维数组存储,通过元信息里的索引访问各元素。
is_contiguous连续:一维数组元素的存储顺序(使用storage()函数可以查看存储顺序)与Tensor按行优先一维展开的顺序(使用flatten()函数可以查看按行优先一维展开的顺序)是否一致
view():tensor连续 则成功改变形状,存储区没有改变,修改了元信息;
tensor不连续 则报错;
contiguous():对不连续的tensor新建了一个连续的tensor(存储区元信息均发生了改变)
reshape():tensor连续 则同view()成功改变形状,存储区没有改变,修改了元信息;
tensor不连续 则新建了一个tensor(存储区元信息均发生了改变)
# 一个新的tensor a,是连续的
a = torch.arange(12).reshape(3,4)
a.is_contiguous()
Out[19]: True
a.flatten()
Out[22]: tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
a.storage()
Out[25]:
0
1
2
3
4
5
6
7
8
9
10
11
[torch.LongStorage of size 12]
a.storage().data_ptr()
Out[27]: 2214873611520
# 转置后,b仍使用a的存储区,但其是不连续的
b = a.transpose(0, 1)
b.is_contiguous()
Out[21]: False
b.flatten()
Out[23]: tensor([ 0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11])
b.storage()
Out[26]:
0
1
2
3
4
5
6
7
8
9
10
11
[torch.LongStorage of size 12]
b.storage().data_ptr()
Out[28]: 2214873611520
# 对连续tensor a执行view
c = a.view(-1)
c.is_contiguous()
Out[30]: True
c.storage().data_ptr()
Out[33]: 2214873611520
# 对连续tensor a执行reshape
d = a.reshape(-1)
d.is_contiguous()
Out[32]: True
d.storage().data_ptr()
Out[34]: 2214873611520
# 对非连续tensor b执行view,报错
e = b.view(-1)
Traceback (most recent call last):
File "E:\anaconda\envs\py37torch\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-36-858eec869bd0>", line 1, in <module>
e = b.view(-1)
RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
# 对非连续tensor b执行reshape,直接新建tensor(存储区发生了变化)
e = b.reshape(-1)
e.storage().data_ptr()
Out[38]: 2214875150592
# 对非连续tensor b执行连续化(新建,存储区发生了变化),然后再view
f = b.contiguous().view(-1)
f.storage().data_ptr()
Out[40]: 2214875145472
expand & repeat
-
Expand: broadcasting 参数为目标形状
-
Repeat: memory copied 参数为倍数
# -1默认不改变
b = torch.randn(1, 32, 1, 1)
b.expand(4, 32, 14, 14).shape
Out[42]: torch.Size([4, 32, 14, 14])
b.expand(-1, 32, -1, -1).shape
Out[43]: torch.Size([1, 32, 1, 1])
# 注意,如果多一维则是整个再复制,例如下例2
# 例1
b.repeat(4, 32, 1, 1).shape
Out[44]: torch.Size([4, 1024, 1, 1])
b.repeat(5, 4, 32, 1, 1).shape
Out[46]: torch.Size([5, 4, 1024, 1, 1])
t & transpose & permute
a = torch.randn(2, 3)
b = torch.randn(4, 5, 6)
# 1维/2维转置
# 1维转置完全不变
a.t()
Out[47]:
tensor([[-0.5892, 0.5083],
[-1.5597, 1.2658],
[ 0.8393, 1.2811]])
# 交换两维
a.transpose(0, 1).shape
Out[48]: torch.Size([3, 2])
# 交换多维
b.permute(2,0,1).shape
Out[49]: torch.Size([6, 4, 5])
norm范数
norm表示范数,normalize表示归一化
# 范数
a = torch.full([8], 1).float()
b = a.view(2, 4)
c = a.view(2, 2, 2)
# 1-范数 元素绝对值之和
a.norm(1), b.norm(1), c.norm(1)
Out[13]: (tensor(8.), tensor(8.), tensor(8.))
# 2-范数 元素平方和开根号
a.norm(2), b.norm(2), c.norm(2)
Out[14]: (tensor(2.8284), tensor(2.8284), tensor(2.8284))
b.norm(1, dim=1)
Out[15]: tensor([4., 4.])
b.norm(2, dim=1)
Out[16]: tensor([2., 2.])
normalize
norm表示范数,normalize表示归一化
v=vmax(∥v∥p,ϵ)
v= \frac{v}{max(∥v∥_p,ϵ)}
v=max(∥v∥p,ϵ)v
# Performs L_p normalization of inputs over specified dimension.
# torch.nn.functional.normalize(input, p=2.0, dim=1, eps=1e-12, out=None)
# 默认二范数
t = torch.randn(2, 2, 3)
t
Out[65]:
tensor([[[-1.6034, 2.3001, 0.8102],
[-0.3921, 0.1690, 1.9939]],
[[ 0.9074, -0.6176, -0.1238],
[ 0.0509, 0.1963, -0.3056]]])
F.normalize(t, dim=2)
Out[67]:
tensor([[[-0.5494, 0.7881, 0.2776],
[-0.1923, 0.0829, 0.9778]],
[[ 0.8215, -0.5591, -0.1121],
[ 0.1387, 0.5354, -0.8332]]])
max min mean prod
# min max mean prod(元素的累乘)
a
Out[25]:
tensor([[ 0.2903, 0.3979, -0.1571, 2.8208, 0.3580, 0.3855],
[-1.3406, 2.1643, -0.8738, -0.4514, 0.5275, -1.8139],
[ 0.8159, 0.2201, 0.7153, 3.1825, 1.5754, -1.1163],
[ 1.0856, 0.1158, -0.3165, -0.6676, 2.2467, -0.3533]])
a.max(dim=1)
Out[26]:
torch.return_types.max(
values=tensor([2.8208, 2.1643, 3.1825, 2.2467]),
indices=tensor([3, 1, 3, 4]))
a.max(dim=1).values
Out[27]: tensor([2.8208, 2.1643, 3.1825, 2.2467])
a.max(dim=1, keepdim=True).values
Out[28]:
tensor([[2.8208],
[2.1643],
[3.1825],
[2.2467]])
argmax argmin
# argmax argmin
a
Out[25]:
tensor([[ 0.2903, 0.3979, -0.1571, 2.8208, 0.3580, 0.3855],
[-1.3406, 2.1643, -0.8738, -0.4514, 0.5275, -1.8139],
[ 0.8159, 0.2201, 0.7153, 3.1825, 1.5754, -1.1163],
[ 1.0856, 0.1158, -0.3165, -0.6676, 2.2467, -0.3533]])
a.argmax(dim=1)
Out[29]: tensor([3, 1, 3, 4])
a.argmax(dim=1, keepdim=True)
Out[30]:
tensor([[3],
[1],
[3],
[4]])
topk kthvalue
# topk
a
Out[25]:
tensor([[ 0.2903, 0.3979, -0.1571, 2.8208, 0.3580, 0.3855],
[-1.3406, 2.1643, -0.8738, -0.4514, 0.5275, -1.8139],
[ 0.8159, 0.2201, 0.7153, 3.1825, 1.5754, -1.1163],
[ 1.0856, 0.1158, -0.3165, -0.6676, 2.2467, -0.3533]])
a.topk(3, dim=1)
Out[31]:
torch.return_types.topk(
values=tensor([[ 2.8208, 0.3979, 0.3855],
[ 2.1643, 0.5275, -0.4514],
[ 3.1825, 1.5754, 0.8159],
[ 2.2467, 1.0856, 0.1158]]),
indices=tensor([[3, 1, 5],
[1, 4, 3],
[3, 4, 0],
[4, 0, 1]]))
# 返回第3小的值
a.kthvalue(3, dim=1)
Out[32]:
torch.return_types.kthvalue(
values=tensor([ 0.3580, -0.8738, 0.7153, -0.3165]),
indices=tensor([4, 2, 2, 2]))
eq vs. equal
# > >= < <= != ==
a > 0
Out[35]:
tensor([[ True, True, False, True, True, True],
[False, True, False, False, True, False],
[ True, True, True, True, True, False],
[ True, True, False, False, True, False]])
# torch.eq(a, b) vs. torch.equal(a, b)
b = a
torch.eq(a, b)
Out[37]:
tensor([[True, True, True, True, True, True],
[True, True, True, True, True, True],
[True, True, True, True, True, True],
[True, True, True, True, True, True]])
torch.equal(a, b)
Out[38]: True
数学运算
element-wise:
+ add
- sub
* mul
/ div
** pow
sqrt
rsqrt
exp
log(以e为底)
floor ceil round trunc frac
矩阵运算
# mm(2维) matmul(多维) @(多维)
# 多维矩阵相乘 实际为后两维矩阵相乘
# 一维向量右乘矩阵,认为是行向量
(torch.ones(3)@torch.ones(3, 4)).shape
Out[61]: torch.Size([4])
# 一维向量左乘矩阵,认为是列向量
(torch.ones(3, 4)@torch.ones(4)).shape
Out[62]: torch.Size([3])