Tensorflow:基础操作_torch tensor ::-1-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_42615068/article/details/96117143

1 Tensor类型与创建

# tensor属性
with tf.divice("gpu"):
	a=tf.constant([1])
a.device()
aa=a.gpu()
b=a.numpy()
b.ndim
b.shape
# 判断数据类型
tf.is_tensor(b)
b.dtype
# 张量的转化
a=np.arange(5)
aa=tf.convert_to_tensor(a, dtype=tf.float32)
# 类型的转化(默认64位，一般先转化为32位)
tf.cast(aa, dtype=tf.int32)
# 转化为numpy
aa.numpy()
# 变量Variable
b=tf.Variable(a, name='input_data'）

# tensor的创建
# from numpy, list
tf.convert_to_tensor(np.ones([2, 3]))
tf.convert_to_tensor([[1], [2]])
# zeros, ones
a=tf.zeros([1, 3, 4])
b=tf.zeros_like(a) # 等价于tf.zeros(a.shape)
c=tf.zeros([])   #创建常量0
# fill
tf.fill([2, 3], 1.5)
# random
tf.random.normal([2, 2], mean=1, stddev=1)
tf.random.truncated_normal([2, 2], mean=1, stddev=2)
tf.random.uniform([2, 2], minval=0, maxval=1)
# constant
tf.constant([1, 2])

#random permutation
#保证打乱时，对应标签和特征等一致
idx=tf.range(5)
idx=tf.random.shuffle(idx)
a=tf.random.normal([10, 256])
b=tf.random.uniform([18], 0, 1)
a=tf.gather(a, idx)
b=tf.gather(b, idx)

深度学习中tensor主要有以下几种维度表示：
1 []：标量
2 [d]: 行向量
3 [h, w]: 矩阵
4 [b, len, vec]: NLP中分别代表batch, 单词长度，嵌入向量长度
5 [b, h, w, c]: 图像
6 [t, b, h, w, c]：Meta learning

2 索引与切片

#start: end: step
#::step
a=tf.random.normal([4, 28, 28, 3])
a[:, 0:28:2, 0:28:2, :].shape   # [4, 14, 14, 3]
a[:, ::2, ::2, :].shape  #[4, 14, 14, 3]
#::-1   A:B:-C 先从A 倒着采 A->B
a=tf.range(4)
a[::-1]  #   [3, 2, 1, 0]
a[::-2]   #  [3, 1]
a[2::-2]   # [2, 0]
#...代表不定长度
a=tf.random.normal([2, 4, 28, 28, 3])
a[0, ...].shape  # [4, 28, 28, 3]
a[..., 0].shape  # [2, 4, 28, 28]
a[0, ..., 2].shape  # [4, 28, 28]
a[1, 0, ..., 0].shape #  [28, 28]

#selective index
#tf.gather(单个维度取值)
a=tf.random.normal([4, 35, 8]   # a代表[classes, students, subjects] 
tf.gather(a, indices=[2, 1, 4], axis=0).shape   # [3, 35, 8]
tf.gather(a, indices=[2, 3, 7], axis=2).shape   # [4, 35, 3]

#tf.gather_nd(多个维度取值,第二个参数最里面的括号代表多维度索引）
#what if sample several students and their several subjects?
#aa=tf.gather(a, axis=1, indices=[several students])
#aaa=tf.gather(aa, axis=2, indices=[several subjects])
#what if sample several(calsses and students)
#for example: [calsses1_students1, classes2_stuents2, classes3_students3, classes4_students4]  output shape: [4, 8]
tf.gather_nd(a, [0]).shape  # [35, 8] 相当于a[0]
tf.gather_nd(a, [0, 1, 2]).shape  # [] 相当于a[0, 1, 2]
tf.gather_nd(a, [[0, 1, 2]).shape  # [1] 相当于[a[0, 1, 2]]
tf.gather_nd(a, [[0, 0], [1, 1], [2, 2]]).shape  # 相当于采样了三个样本[3, 8]
tf.gather_nd(a, [[0, 0, 0], [1, 1, 1], [2, 2, 2]]).shape  #  [3]
tf.gather_nd(a, [[[0, 0, 0], [1, 1, 1], [2, 2, 2]]].shape  #  [1, 3]
#tf.gather 的索引形式推荐为：[[0], [1], ...]  [[0, 0], [1, 1], ...]  [[0, 0, 0], [1, 1, 1], [2, 2, 2], ...]

#tf.boolean_mask
a=tf.random.normal([4, 28, 28, 3])
tf.boolean_mask(a, mask=[True, True, False, False])  # 在第零个维度取前两个 [2, 28, 28, 3]
tf.boolean_mask(a, mask=[True, True, False], axis=3) # 在第三个维度取前两个 [4, 28, 28, 2]
b=tf.ones([2, 3, 4])
tf.boolean_mask(a, mask=[[True, False, False], [False, True, True]]) # mask是两行三列的，相当于固定第三个维度，对前两个维度为True的采样，总共三个True, 所以最后输出维度为[3, 4].

3 维度变换

#tf.reshape:改变view,但是content不变
a=tf.random.uniform([4, 28, 28, 3])
tf.reshape(a, [4, -1, 3]).shape  # [4, 784, 3]
#tf.transpose:改变content
a=tf.random.uniform([4, 3, 2, 1])
tf.transpose(a)  # [1, 2, 3, 4]
tf.transpose(a, perm=[0, 1, 3, 2]) # [4, 3, 1, 2]
# expand dims, axis可以理解为在原来的第axis维度上插入数据
a=tf.random.uniform([4, 35, 8])
tf.expand_dims(a, axis=-1).shape  # [4, 35, 8, 1]
tf.expand_dims(a, axis=-4).shape  # [1, 4, 35, 8] 
# squeeze
a=tf.zeros([1, 2, 1, 3])
tf.squeeze(a, axis=0).shape  # [2, 1, 3]
tf.squeeze(a, axis=2).shape  # [1, 2, 3]
tf.squeeze(a, axis=-2).shape  # [1, 2, 3]
tf.squeeze(a, axis=-4).shape  # [2, 1, 3]

4 Broadcasting

#idea: 
#1 没有维度时，插入一个维度(按小维度对齐)：[4(大), 32, 32, 3(小)]  +  [3]->[1, 1, 1, 3]->[4, 32, 32, 3]
#2 当有维度为1的时候：[4, 35, 8]+[35, 1] -> [4, 35, 8]+[1, 35, 1] -> [4, 35, 8]+[4, 35, 8]
#[4, 1]+[1, 3]  -> [4, 3]+[1, 3] -> [4, 3]+[4, 3]

#tf.broadcast_to（实际不复制，只是运算时的优化，不占用内存）
a=tf.ones([3, 4])
a1=tf.broad_cast(a, [2, 3,  4])    # [2, 3, 4]
#tf.tile（实际真复制，占用内存）
a2=tf.expand_dims(a1, axis=0)  # [1, 3, 4]
a2=tf.tile(a2, [2, 1, 1])   # [2, 3, 4]  第二个参数代表着每个轴上复制几次

5 数学运算

#运算类型
#element-wise: +-*/%//
#matrix-wise: @, matmul(区分tf.multiply)
#dim-wise: reduce_min/max/min/sum
a=tf.random.normal([2, 3, 4])
#log exp
tf.math.log(a)
tf.exp(a)
#log2、log10
tf.math.log(8) / tf.math.log(10)   # 以10为底
tf.math.log(8) / tf.math.log(2)  # 以2为底
#pow sqrt
tf.pow(a, 3)
a**3
tf.sqrt(a, 3)
#@ matmul
a=tf.ones([4, 2, 3])
b=tf.fill([4, 3, 5])
a@b  # [4, 2, 5]: 固定4， [2, 3] [3, 5]做矩阵乘法
tf.matmul(a, b)  # [4, 2, 5]

6 前向传播

import os
import tensorflow as tf
from tensorflow.keras import datasets

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 屏蔽掉无关信息


#x: [60k, 28, 28]  y:[60k]
(x, y), (x_test, y_test) = datasets.mnist.load_data()
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.float32) 
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32) 
#create batch
train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)

#[b, 784] => [b, 256] => [b, 128] => [b, 10] 
#[dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncted_normal([784, 256], stddev=0.1))# 方差过大时会发生梯度爆炸情况，该参数较为关键
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncted_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncted_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

lr = 1e-3
for epoch in range(10):
	for step, (x, y) in enumerate(train_db):
		with tf.GradientType() as type:  # 只记录tf.Variable 类型的梯度
			x = tf.reshape(x, [-1, 28*28])
			h1 = x @ w1 + b1
			h1 = tf.nn.relu(h1)
			h2 = h1 @ w2 + b2
		    h2 = tf.nn.relu(h2)
	        out = h2 @ w3 + b3
	        #compute loss
	        y_onehot = tf.one_hot(y, depth=10)
	        loss = tf.square(y_onehot - y)
	        loss = tf.reduce_mean(loss)
		#compute gradient
		grads = type.gradient(loss, [w1, b1, w2, b2, w3, b3])
		# w1 = w1 - lr * grads[0]  # 该式中为tf.Variable类型 - tf.Tensor 类型，最终结果为tf.Tensor类型，而追踪梯度为tf.Variable类型，所以最后会报错
		w1.assign_sub(lr * grads[0])  # 原地更新，结果为tf.Variable类型
		b1.assign_sub(lr * grads[1]) 
		w2.assign_sub(lr * grads[2]) 
		b2.assign_sub(lr * grads[3]) 
		w3.assign_sub(lr * grads[4]) 
		b3.assign_sub(lr * grads[5]) 
		
		if step % 100 == 0:
	 		print(epoch, step, 'loss:', float(loss))
	 #test
	 for step, (x, y) in enumerate(test_db):
	 	x = tf.reshape(x, [-1, 28*28])
  		h1 = x @ w1 + b1
   		h1 = tf.nn.relu(h1)
   		h2 = h1 @ w2 + b2
        h2 = tf.nn.relu(h2)
       	out = h2 @ w3 + b3
       	prob=tf.nn.softmax(out, axis=1)
       	pred=tf.argmax(prob, axis=1)
       	pred=tf.cast(pred, dtype=tf.int32)      # 类型要一致		
		correct=tf.cast(tf.equal(y, pred), dtype=tf.int32)
     	correct=tf.reduce_sum(correct)
     	total_correct+=int(correct)
     	total_num+=x.shape[0]
       	acc=total_correct/total_num
       	print('acc:', acc)