1. 定义与属性
0维张量(标量 scalar):0,1
1维张量(数组/向量 vector):[0], [0, 1, 2]
2维张量(矩阵 matrix):
3维张量 tensor
…
1.1 定义与访问
定义
# torch.tensor(data, dtype=None, device=None, requires_grad=False, pin_memory=False) → Tensor
a_0d = torch.tensor(0)
a_1d = torch.tensor([0])
a_2d = torch.tensor([[0]])
a_0d_2 = torch.tensor(1)
a_1d_2 = torch.tensor([1., 2.])
a_2d_2 = torch.tensor([[1., 2.],
[2., 4.]])
a_3d = torch.tensor([
[[1, 2],
[2, 4]],
[[3, 6],
[6, 9]]
])
访问
# 读
x = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(x[1][2]) # tensor(6) # not scalar, also tensor
print(x[1][2].item()) # 6 # scalar
# 写
x[0][1] = 8
print(x)
# tensor([[ 1, 8, 3],
# [ 4, 5, 6]])
1.2 属性
tensor.dim() --> scalar | 张量的维度 |
tensor.shape / tensor.size() --> torch.Size()(tuple) | 形状 |
len(tensor) : --> scalar | tensor的长度 |
tensor.item() --> scalar | 仅标量可用,标量的数值 |
tensor.dtype | 数据类型 |
tensor.requires_grad |
a = torch.tensor([1.])
print(a.dim()) # 1
print(a.size()) # torch.Size([1])
print(a.size()[0]) # 1
print(len(a)) # 1
print(a.item()) # 1.0
b = torch.tensor([1., 2.])
print(b.dim()) # 1
print(b.size()) # torch.Size([2])
print(len(b)) # 2
print(b.item()) # ValueError: only one element tensors can be converted to Python scalars
c = torch.tensor([[1., 2.], [2., 4.], [4., 8.]])
print(c.dim()) # 2
print(c.size()) # torch.Size([3, 2])
print(len(c)) # 3
1.3 其他构造方法
ones(*size) | 全1 |
zeros(*size) | 全0 |
eye(*size) | 对角线为1,其余为0 |
empty(*size) | 未初始化的张量 |
torch.arange(start=0, end, step=1) | 从s到e,步长为step, step默认为1 |
linspace(s,e,steps=100) | 从s到e,均匀切分成steps份, step默认为1 |
torch.logspace(start, end, steps=100, base=10.0) | |
torch.normal(mean, std, size) | 正态分布 |
uniform(from,to) | 均匀分布 |
torch.randn(size, dtype=None) | 标准正态分布(均值0,标准差1) |
torch.rand(size, dtype=None) | [0, 1)的均匀分布 |
torch.rand_like(input, dtype=None) | 与input相同形状的均匀分布 |
torch.randint(low=0,high,size,dtype=None) | [low, high)的均匀分布 |
randperm(m) | 随机排列 |
torch.tensor(ndarry)/torch.from_numpy(ndarry) | numpy–>torch.tensor |
填充值
torch.zeros(*size, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False)
→ Tensor
torch.ones(*size)
→ Tensor
torch.empty(*size)
→ Tensor
torch.zeros((2, 4), dtype=torch.int32)
# tensor([[ 0, 0, 0, 0],
# [ 0, 0, 0, 0]], dtype=torch.int32)
torch.ones((2, 4))
torch.empty((2, 3))
# tensor(1.00000e-08 *
# [[ 6.3984, 0.0000, 0.0000],
# [ 0.0000, 0.0000, 0.0000]])
torch.full(size, fill_value)
→ Tensor
torch.full((2, 3), 3.141592)
# tensor([[ 3.1416, 3.1416, 3.1416],
# [ 3.1416, 3.1416, 3.1416]])
一维均分数
torch.arange(start=0, end, step=1)
-->Tensor
返回一个一维向量,规格为⌈end−startstep⌉\left\lceil \frac{\text{end} - \text{start}}{\text{step}} \right\rceil⌈stepend−start⌉,从start
开始,公差为step
,区间为[start,end)
outi+1=outi+stepout_{i+1}=out_i+stepouti+1=outi+step
>>> torch.arange(5)
tensor([ 0, 1, 2, 3, 4])
>>> torch.arange(1, 4)
tensor([ 1, 2, 3])
>>> torch.arange(1, 2.5, 0.5)
tensor([ 1.0000, 1.5000, 2.0000])
torch.linspace(start=0, end, steps=100)
-->Tensor
返回一个一维向量,从start
开始,在[start, end)区间内均匀间隔steps个
>>> torch.linspace(3, 10, steps=5)
tensor([ 3.0000, 4.7500, 6.5000, 8.2500, 10.0000])
>>> torch.linspace(-10, 10, steps=5)
tensor([-10., -5., 0., 5., 10.])
>>> torch.linspace(start=-10, end=10, steps=5)
tensor([-10., -5., 0., 5., 10.])
torch.logspace(start, end, steps=100,base=10.0)
-->Tensor
返回一个一维向量,从basestartbase^{start}basestart开始,在[basestartbase^{start}basestart, baseendbase^{end}baseend )区间内生成对数均匀间隔,共steps个
>>> torch.logspace(start=-10, end=10, steps=5)
tensor([ 1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])
>>> torch.logspace(start=0.1, end=1.0, steps=5)
tensor([ 1.2589, 2.1135, 3.5481, 5.9566, 10.0000])
>>> torch.logspace(start=0.1, end=1.0, steps=1)
tensor([1.2589])
>>> torch.logspace(start=2, end=2, steps=1, base=2)
tensor([4.0])
torch.eye(n, m)
--> Tensor
二维向量,对角线均为1,其余都是0,n为行数,m为列数
>>> torch.eye(3)
tensor([[ 1., 0., 0.],
[ 0., 1., 0.],
[ 0., 0., 1.]])
>>> torch.eye(n=3, m=2)
tensor([[1., 0.],
[0., 1.],
[0., 0.]])
随机分布
torch.normal(mean, std, size)
--> Tensor
离散均匀分布,均值为mean,标准差为std
>>> torch.normal(mean=torch.arange(1., 11.), std=torch.arange(1, 0, -0.1)) # 指定每个数的均值与标准差
tensor([ 1.0425, 3.5672, 2.7969, 4.2925, 4.7229, 6.2134,
8.0505, 8.1408, 9.0563, 10.0566])
>>> torch.normal(mean=0.5, std=torch.arange(1., 6.)) # 指定所有数的均值与每个数的标准差
tensor([-1.2793, -1.0732, -2.0687, 5.1177, -1.2303])
>>> torch.normal(2, 3, size=(1, 4)) # 指定所有数的均值与标准差 同时指定形状
tensor([[-1.3987, -1.9544, 3.6048, 0.7909]])
# mean: the mean for all distributions
# std (float) – the standard deviation for all distributions
torch.randn(size, dtype=None)
--> Tensor
均值为0,标准差为1的标准正态分布
outi∼N(0,1)out_i \sim N(0, 1)outi∼N(0,1)
>>> torch.randn(4)
tensor([-2.1436, 0.9966, 2.3426, -0.6366])
>>> torch.randn(2, 3)
tensor([[ 1.5954, 2.8929, -1.0923],
[ 1.1719, -0.4709, -0.1996]])
torch.rand(size, dtype=None)
--> tensor
在[0, 1)内的均匀分布
>>> torch.rand(4)
tensor([ 0.5204, 0.2503, 0.3525, 0.5673])
>>> torch.rand(2, 3)
tensor([[ 0.8237, 0.5781, 0.6879],
[ 0.3816, 0.7249, 0.0998]])
-
torch.rand_like(input, dtype=None)
– tensor
返回与input同等大小,在[0, 1)之间均匀分布的张量 -
torch.randint(low=0,high,size,dtype=None)
--> Tensor
返回在[low, high)之间均匀分布的张量
>>> torch.randint(3, 5, (3,))
tensor([4, 3, 4])
>>> torch.randint(10, (2, 2))
tensor([[0, 2],
[5, 5]])
2. 张量的运算
Pytorch中张量的函数,均有三种写法:
- b = a.func() ; # a不改变,生成b
- b = torch.func(a) ;
- a.func_(); # inplace
2.1 形状改变
torch.reshape(tensor, shape)torch.view(tensor, shape) | 形状转换 |
torch.transpose(tensor) | 转置 |
torch.cat((tensor1, tensor2), dim) | 连接 |
torch.flatten(tensor, start_dim, end_dim) | 展平 |
torch.reshape(input, shape)
→ Tensor
改变形状,改变前后,不同维度的长度之积应该相等
# 例子1 -- 2行3列 变 3行2列
a = torch.tensor([[1., 2.], [2., 4.], [4., 8.]])
# 注意b1, b2, b3的区别
b1 = a.reshape(2, 3)
# tensor([[1., 2., 2.],
# [4., 4., 8.]])
print(b1.size())
# torch.Size([2, 3])
b2 = a.reshape(2, 3, 1)
# tensor([[[1.],
# [2.],
# [2.]],
#
# [[4.],
# [4.],
# [8.]]])
print(b2.size())
b3 = a.reshape(1, 2, 3)
# tensor([[[1., 2., 2.],
# [4., 4., 8.]]])
# 例子2 —— 4行5列 变 2行10列
a = torch.randint(0, 10, size=(4, 5))
# tensor([[0, 9, 7, 0, 1],
# [0, 0, 7, 7, 4],
# [8, 9, 2, 4, 5],
# [0, 5, 0, 8, 2]])
b = a.reshape(2, 10)
# tensor([[0, 9, 7, 0, 1, 0, 0, 7, 7, 4],
# [8, 9, 2, 4, 5, 0, 5, 0, 8, 2]])
常用操作: reshape(-1, 1) : 表示列为1,行由推理得
w = w.reshape(-1, 1)
- transpose :转置操作
a = torch.rand(2, 3)
print(a)
print(torch.transpose(a, 0, 1))
###
tensor([[0.6246, 0.7994, 0.9683],
[0.3239, 0.1928, 0.8525]])
tensor([[0.6246, 0.3239],
[0.7994, 0.1928],
[0.9683, 0.8525]])
- cat
torch.cat((tensor1, tensor2), dim)
--> Tensor
dim : tensor连接的维度 0-行连接 1-列连接
>>> x = torch.randn(2, 3)
>>> x
tensor([[ 0.6580, -1.0969, -0.4614],
[-0.1034, -0.5790, 0.1497]])
>>> torch.cat((x, x, x), 0)
tensor([[ 0.6580, -1.0969, -0.4614],
[-0.1034, -0.5790, 0.1497],
[ 0.6580, -1.0969, -0.4614],
[-0.1034, -0.5790, 0.1497],
[ 0.6580, -1.0969, -0.4614],
[-0.1034, -0.5790, 0.1497]])
>>> torch.cat((x, x, x), 1)
tensor([[ 0.6580, -1.0969, -0.4614, 0.6580, -1.0969, -0.4614, 0.6580,
-1.0969, -0.4614],
[-0.1034, -0.5790, 0.1497, -0.1034, -0.5790, 0.1497, -0.1034,
-0.5790, 0.1497]])
- flatten 展平
torch.flatten(input, start_dim=0, end_dim=-1)
--> Tensor
>>> t = torch.tensor([[[1, 2],
[3, 4]],
[[5, 6],
[7, 8]]])
>>> torch.flatten(t)
tensor([1, 2, 3, 4, 5, 6, 7, 8])
>>> torch.flatten(t, start_dim=1)
tensor([[1, 2, 3, 4],
[5, 6, 7, 8]])
2.2 元素级运算
abs() | 绝对值 |
acos() asin() atan() | |
torch.ceil() | 向上取整 |
torch.floor() | 向下取整 |
torch.round() | 取整(四舍五入) |
torch.clamp(input, min, max) | 窗口函数 |
torch.exp() | 指数函数 |
torch.log() | 对数函数,以e为底 |
torch.log10() | 对数函数,以10为底 |
torch.logsumexp() | 对加指 |
torch.pow(self, exponent) | 幂运算 |
torch.sqrt() | 开方函数 |
torch.reciprocal() | 倒数 |
torch.frac() | 取小数部分 |
torch.sign() | 取符号(正数返回1,负数返回-1,0返回0) |
torch.sigmoid() | sigmoid函数 |
- torch.clamp
torch.clamp(input, min, max)
>>> a = torch.randn(4)
>>> a
tensor([-1.7120, 0.1734, -0.0478, -0.0922])
>>> torch.clamp(a, min=-0.5, max=0.5)
tensor([-0.5000, 0.1734, -0.0478, -0.0922])
>>> torch.clamp(a, min=0.5)
tensor([ 0.5000, 0.5000, 2.1593, 0.5000])
- torch.digamma(input)
- torch.logsumexp()
- torch.frac(input)
>>> torch.frac(torch.tensor([1, 2.5, -3.2]))
tensor([ 0.0000, 0.5000, -0.2000])
- torch.sign()
取符号函数
# 正数返回1,负数返回-1,0返回0
>>> a = torch.tensor([0.7, -1.2, 0., 2.3])
>>> torch.sign(a)
tensor([ 1., -1., 0., 1.])
- torch.sigmoid()
2.3 四则运算
torch.add() / ‘+ ’ | 加 |
torch.div() / ‘/ ’ | 除 |
torch.fmod() | 除法余数 |
torch.mul() | 元素级乘法 |
torch.matmul() | 向量点积/矩阵乘法 |
torch.mm() | 矩阵乘法 |
torch.cross() | 向量叉积 |
torch.bmm() | 两个tensor必须是三维,后两维矩阵相乘 |
- torch.div(input, other)
- 除数是标量:outi=inputiotherout_i = \frac{input_i}{other}outi=otherinputi
>>> a = torch.randn(5)
>>> a
tensor([ 0.3810, 1.2774, -0.2972, -0.3719, 0.4637])
>>> torch.div(a, 0.5)
tensor([ 0.7620, 2.5548, -0.5944, -0.7439, 0.9275])
- 除数是向量/矩阵:outi=inputiotheriout_i = \frac{input_i}{other_i}outi=otheriinputi
>>> a = torch.randn(4, 4)
>>> a
tensor([[-0.3711, -1.9353, -0.4605, -0.2917],
[ 0.1815, -1.0111, 0.9805, -1.5923],
[ 0.1062, 1.4581, 0.7759, -1.2344],
[-0.1830, -0.0313, 1.1908, -1.4757]])
>>> b = torch.randn(4)
>>> b
tensor([ 0.8032, 0.2930, -0.8113, -0.2308])
>>> torch.div(a, b)
tensor([[-0.4620, -6.6051, 0.5676, 1.2637],
[ 0.2260, -3.4507, -1.2086, 6.8988],
[ 0.1322, 4.9764, -0.9564, 5.3480],
[-0.2278, -0.1068, -1.4678, 6.3936]])
- torch.fmod(input, other)
求除数余数
>>> torch.fmod(torch.tensor([-3., -2, -1, 1, 2, 3]), 2)
tensor([-1., -0., -1., 1., 0., 1.])
>>> torch.fmod(torch.tensor([1., 2, 3, 4, 5]), 1.5)
tensor([ 1.0000, 0.5000, 0.0000, 1.0000, 0.5000])
- torch.mul(input, other)
元素级乘法,类似于torch.div()
- other是常数
>>> a = torch.randn(3)
>>> a
tensor([ 0.2015, -0.4255, 2.6087])
>>> torch.mul(a, 100)
tensor([ 20.1494, -42.5491, 260.8663])
- other是矩阵,注意是广播机制下的元素级乘法,而不是矩阵点积
>>> a = torch.arange(4)
>>> a
tensor([0, 1, 2, 3])
>>> b = torch.mul(a, a)
>>> b
tensor([0, 1, 4, 9])
>>> c = torch.mul(a, a.transpose())
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: transpose() received an invalid combination of arguments - got (), but expected one of:
* (name dim0, name dim1)
* (int dim0, int dim1)
>>> d = torch.mul(a.view(-1,1),a.view(1, -1))
>>> d
tensor([[0, 0, 0, 0],
[0, 1, 2, 3],
[0, 2, 4, 6],
[0, 3, 6, 9]])
- matmul()
vec * vec
向量点积 --> 标量
>>> a = torch.arange(3)
>>> a
tensor([0, 1, 2])
>>> b = torch.arange(3,0,step=-1)
>>> b
tensor([3, 2, 1])
>>> c = torch.matmul(a,b)
>>> c
tensor(4)
>>>
matrix * vec
矩阵(dim=2)乘向量(dim=1) --> 向量(dim=1)
>>> a = torch.arange(6).view(3, 2)
>>> a
tensor([[0, 1],
[2, 3],
[4, 5]])
>>> b = torch.arange(2)
>>> b
tensor([0, 1])
>>> c=torch.matmul(a,b)
>>> c
tensor([1, 3, 5])
matrix * matrix
矩阵乘法
# 接上面
>>> b = b.view(-1, 1)
>>> b
tensor([[0],
[1]])
>>> c=torch.matmul(a,b)
>>> c
tensor([[1],
[3],
[5]])
- torch.cross() 叉积
a = torch.tensor([[1, 2],
[2, 3],
[3, 4]])
b = torch.tensor([[1, 0],
[0, 1],
[1, 1]])
print(torch.cross(a, b))
>> tensor([[ 2, -1],
[ 2, -2],
[-2, 2]])
print(torch.cross(a, b, dim=1))
>> dimension 1 does not have size 3
- torch.bmm(a, b)
输入的两个量的维度必须是3,常用于批处理
(batchsize,n,m)×(batchsize,m,k)→(batchsize,n,k)(batchsize,n,m) \times (batchsize,m,k) \to (batchsize,n,k)(batchsize,n,m)×(batchsize,m,k)→(batchsize,n,k)Z[i,;,;]=X[i,:,:]Y[i,:,:]Z[i,;,;]=X[i,:,:]Y[i,:,:]Z[i,;,;]=X[i,:,:]Y[i,:,:]
a = torch.ones(2,1,3)
b = torch.ones(2,3,3)
c = torch.bmm(a, b)
print(c.shape)
# (2,1,3))
2.4 降维运算
torch.argmax | 最大值的索引 |
torch.argmin() | 最小值的索引 |
torch.max() | 最大值 |
torch.min() | 最小值 |
torch.mean() | 均值 |
torch.median() | 中位数 |
torch.prod() | 乘积 |
torch.sum() | 和 |
torch.logsumexp() | log和exp |
torch.std | 标准差 |
torch.var | 方差 |
- 降维运算的两个重要参数(以max为例)
-
dim:降维的维度,一般为:
- 默认:展开(全部元素最大值)
- dim=0:第0维降维(取每列元素最大值)
- dim=1:第1维降维(取每行元素最大值)
-
keepdim:是否保留原维度
- keepdim=True 保持原维度
- keepdim=False,默认,不保留原维度
-
unbiased:是否使用无偏估计
dim=0, keepdim=False(默认)
>>> a = torch.arange(12).view(3,4)
>>> a
tensor([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> b = a.max(dim=0)
>>> b
tensor([ 8, 9, 10, 11])
dim=1, keepdim=True
>>> a = torch.arange(12).view(3,4)
>>> a
tensor([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> b = a.max(dim=1, keepdim=True)
>>> b
torch.return_types.max(
values=tensor([[ 3],
[ 7],
[11]]),
indices=tensor([[3],
[3],
[3]]))
- 降维运算的返回值–indices,values
values:结果,
indices:原张量中结果对应的底标
>>> a = torch.arange(12).view(3,4)
>>> a
tensor([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
>>> v, i = a.max(dim=1)
>>> v
tensor([ 3, 7, 11])
>>> i
tensor([3, 3, 3])
- torch.logsumexp()