1 Tensor类型与创建
# tensor属性
with tf.divice("gpu"):
a=tf.constant([1])
a.device()
aa=a.gpu()
b=a.numpy()
b.ndim
b.shape
# 判断数据类型
tf.is_tensor(b)
b.dtype
# 张量的转化
a=np.arange(5)
aa=tf.convert_to_tensor(a, dtype=tf.float32)
# 类型的转化(默认64位,一般先转化为32位)
tf.cast(aa, dtype=tf.int32)
# 转化为numpy
aa.numpy()
# 变量Variable
b=tf.Variable(a, name='input_data')
# tensor的创建
# from numpy, list
tf.convert_to_tensor(np.ones([2, 3]))
tf.convert_to_tensor([[1], [2]])
# zeros, ones
a=tf.zeros([1, 3, 4])
b=tf.zeros_like(a) # 等价于tf.zeros(a.shape)
c=tf.zeros([]) #创建常量0
# fill
tf.fill([2, 3], 1.5)
# random
tf.random.normal([2, 2], mean=1, stddev=1)
tf.random.truncated_normal([2, 2], mean=1, stddev=2)
tf.random.uniform([2, 2], minval=0, maxval=1)
# constant
tf.constant([1, 2])
#random permutation
#保证打乱时,对应标签和特征等一致
idx=tf.range(5)
idx=tf.random.shuffle(idx)
a=tf.random.normal([10, 256])
b=tf.random.uniform([18], 0, 1)
a=tf.gather(a, idx)
b=tf.gather(b, idx)
深度学习中tensor主要有以下几种维度表示:
1 []:标量
2 [d]: 行向量
3 [h, w]: 矩阵
4 [b, len, vec]: NLP中分别代表batch, 单词长度,嵌入向量长度
5 [b, h, w, c]: 图像
6 [t, b, h, w, c]:Meta learning
2 索引与切片
#start: end: step
#::step
a=tf.random.normal([4, 28, 28, 3])
a[:, 0:28:2, 0:28:2, :].shape # [4, 14, 14, 3]
a[:, ::2, ::2, :].shape #[4, 14, 14, 3]
#::-1 A:B:-C 先从A 倒着采 A->B
a=tf.range(4)
a[::-1] # [3, 2, 1, 0]
a[::-2] # [3, 1]
a[2::-2] # [2, 0]
#...代表不定长度
a=tf.random.normal([2, 4, 28, 28, 3])
a[0, ...].shape # [4, 28, 28, 3]
a[..., 0].shape # [2, 4, 28, 28]
a[0, ..., 2].shape # [4, 28, 28]
a[1, 0, ..., 0].shape # [28, 28]
#selective index
#tf.gather(单个维度取值)
a=tf.random.normal([4, 35, 8] # a代表[classes, students, subjects]
tf.gather(a, indices=[2, 1, 4], axis=0).shape # [3, 35, 8]
tf.gather(a, indices=[2, 3, 7], axis=2).shape # [4, 35, 3]
#tf.gather_nd(多个维度取值,第二个参数最里面的括号代表多维度索引)
#what if sample several students and their several subjects?
#aa=tf.gather(a, axis=1, indices=[several students])
#aaa=tf.gather(aa, axis=2, indices=[several subjects])
#what if sample several(calsses and students)
#for example: [calsses1_students1, classes2_stuents2, classes3_students3, classes4_students4] output shape: [4, 8]
tf.gather_nd(a, [0]).shape # [35, 8] 相当于a[0]
tf.gather_nd(a, [0, 1, 2]).shape # [] 相当于a[0, 1, 2]
tf.gather_nd(a, [[0, 1, 2]).shape # [1] 相当于[a[0, 1, 2]]
tf.gather_nd(a, [[0, 0], [1, 1], [2, 2]]).shape # 相当于采样了三个样本[3, 8]
tf.gather_nd(a, [[0, 0, 0], [1, 1, 1], [2, 2, 2]]).shape # [3]
tf.gather_nd(a, [[[0, 0, 0], [1, 1, 1], [2, 2, 2]]].shape # [1, 3]
#tf.gather 的索引形式推荐为:[[0], [1], ...] [[0, 0], [1, 1], ...] [[0, 0, 0], [1, 1, 1], [2, 2, 2], ...]
#tf.boolean_mask
a=tf.random.normal([4, 28, 28, 3])
tf.boolean_mask(a, mask=[True, True, False, False]) # 在第零个维度取前两个 [2, 28, 28, 3]
tf.boolean_mask(a, mask=[True, True, False], axis=3) # 在第三个维度取前两个 [4, 28, 28, 2]
b=tf.ones([2, 3, 4])
tf.boolean_mask(a, mask=[[True, False, False], [False, True, True]]) # mask是两行三列的,相当于固定第三个维度,对前两个维度为True的采样,总共三个True, 所以最后输出维度为[3, 4].
3 维度变换
#tf.reshape:改变view,但是content不变
a=tf.random.uniform([4, 28, 28, 3])
tf.reshape(a, [4, -1, 3]).shape # [4, 784, 3]
#tf.transpose:改变content
a=tf.random.uniform([4, 3, 2, 1])
tf.transpose(a) # [1, 2, 3, 4]
tf.transpose(a, perm=[0, 1, 3, 2]) # [4, 3, 1, 2]
# expand dims, axis可以理解为在原来的第axis维度上插入数据
a=tf.random.uniform([4, 35, 8])
tf.expand_dims(a, axis=-1).shape # [4, 35, 8, 1]
tf.expand_dims(a, axis=-4).shape # [1, 4, 35, 8]
# squeeze
a=tf.zeros([1, 2, 1, 3])
tf.squeeze(a, axis=0).shape # [2, 1, 3]
tf.squeeze(a, axis=2).shape # [1, 2, 3]
tf.squeeze(a, axis=-2).shape # [1, 2, 3]
tf.squeeze(a, axis=-4).shape # [2, 1, 3]
4 Broadcasting
#idea:
#1 没有维度时,插入一个维度(按小维度对齐):[4(大), 32, 32, 3(小)] + [3]->[1, 1, 1, 3]->[4, 32, 32, 3]
#2 当有维度为1的时候:[4, 35, 8]+[35, 1] -> [4, 35, 8]+[1, 35, 1] -> [4, 35, 8]+[4, 35, 8]
#[4, 1]+[1, 3] -> [4, 3]+[1, 3] -> [4, 3]+[4, 3]
#tf.broadcast_to(实际不复制,只是运算时的优化,不占用内存)
a=tf.ones([3, 4])
a1=tf.broad_cast(a, [2, 3, 4]) # [2, 3, 4]
#tf.tile(实际真复制,占用内存)
a2=tf.expand_dims(a1, axis=0) # [1, 3, 4]
a2=tf.tile(a2, [2, 1, 1]) # [2, 3, 4] 第二个参数代表着每个轴上复制几次
5 数学运算
#运算类型
#element-wise: +-*/%//
#matrix-wise: @, matmul(区分tf.multiply)
#dim-wise: reduce_min/max/min/sum
a=tf.random.normal([2, 3, 4])
#log exp
tf.math.log(a)
tf.exp(a)
#log2、log10
tf.math.log(8) / tf.math.log(10) # 以10为底
tf.math.log(8) / tf.math.log(2) # 以2为底
#pow sqrt
tf.pow(a, 3)
a**3
tf.sqrt(a, 3)
#@ matmul
a=tf.ones([4, 2, 3])
b=tf.fill([4, 3, 5])
a@b # [4, 2, 5]: 固定4, [2, 3] [3, 5]做矩阵乘法
tf.matmul(a, b) # [4, 2, 5]
6 前向传播
import os
import tensorflow as tf
from tensorflow.keras import datasets
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # 屏蔽掉无关信息
#x: [60k, 28, 28] y:[60k]
(x, y), (x_test, y_test) = datasets.mnist.load_data()
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.float32)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)
#create batch
train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(128)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(128)
train_iter = iter(train_db)
sample = next(train_iter)
print('batch:', sample[0].shape, sample[1].shape)
#[b, 784] => [b, 256] => [b, 128] => [b, 10]
#[dim_in, dim_out], [dim_out]
w1 = tf.Variable(tf.random.truncted_normal([784, 256], stddev=0.1))# 方差过大时会发生梯度爆炸情况,该参数较为关键
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncted_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncted_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))
lr = 1e-3
for epoch in range(10):
for step, (x, y) in enumerate(train_db):
with tf.GradientType() as type: # 只记录tf.Variable 类型的梯度
x = tf.reshape(x, [-1, 28*28])
h1 = x @ w1 + b1
h1 = tf.nn.relu(h1)
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2)
out = h2 @ w3 + b3
#compute loss
y_onehot = tf.one_hot(y, depth=10)
loss = tf.square(y_onehot - y)
loss = tf.reduce_mean(loss)
#compute gradient
grads = type.gradient(loss, [w1, b1, w2, b2, w3, b3])
# w1 = w1 - lr * grads[0] # 该式中为tf.Variable类型 - tf.Tensor 类型,最终结果为tf.Tensor类型,而追踪梯度为tf.Variable类型,所以最后会报错
w1.assign_sub(lr * grads[0]) # 原地更新,结果为tf.Variable类型
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])
if step % 100 == 0:
print(epoch, step, 'loss:', float(loss))
#test
for step, (x, y) in enumerate(test_db):
x = tf.reshape(x, [-1, 28*28])
h1 = x @ w1 + b1
h1 = tf.nn.relu(h1)
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2)
out = h2 @ w3 + b3
prob=tf.nn.softmax(out, axis=1)
pred=tf.argmax(prob, axis=1)
pred=tf.cast(pred, dtype=tf.int32) # 类型要一致
correct=tf.cast(tf.equal(y, pred), dtype=tf.int32)
correct=tf.reduce_sum(correct)
total_correct+=int(correct)
total_num+=x.shape[0]
acc=total_correct/total_num
print('acc:', acc)