TensorFlow变量与梯度计算-优快云博客

本文链接：https://blog.youkuaiyun.com/qq_43283527/article/details/122747064

Create a variable

import tensorflow as tf
my_tensor = tf.constant([[1.0, 2.0], [3.0, 4.0]])
my_variable = tf.Variable(my_tensor)

# You can reassign the tensor using tf.Variable.assign。
a = tf.Variable([2.0, 3.0])
a.assign([1, 2])   # This will keep the same dtype, float32
print(a)

As noted above, variables are backed by tensors. You can reassign the tensor using tf.Variable.assign. Calling assign does not (usually) allocate a new tensor; instead, the existing tensor’s memory is reused.

a = tf.Variable([2.0, 3.0])
# This will keep the same dtype, float32
a.assign([1, 2]) 
print(a)

# There are other versions of assign
print(a.assign_add([2,3]).numpy())  # [7. 9.]      # 重新复制，张量相加
print(a.assign_sub([7,9]).numpy())  # [0. 0.]      # 重新复制，张量相减


# Variables can also be named which can help you track and debug them. You can give two variables the same name.
a = tf.Variable(my_tensor, name="Mark")

梯度计算和自动微分

Computing gradients

w = tf.Variable(tf.random.normal((3, 2)), name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')
x = [[1., 2., 3.]]

with tf.GradientTape(persistent=True) as tape:
    y = x @ w + b
    loss = tf.reduce_mean(y**2)
    
[dl_dw, dl_db] = tape.gradient(loss, [w, b])

my_vars = {                # this time passing a dictionary of variables
    'w': w,
    'b': b
}

grad = tape.gradient(loss, my_vars)
grad['b']

Gradients with respect to a model

layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])

with tf.GradientTape() as tape:
    # Forward pass
    y = layer(x)
    loss = tf.reduce_mean(y**2)

# Calculate gradients with respect to every trainable variable
grad = tape.gradient(loss, layer.trainable_variables)

for var, g in zip(layer.trainable_variables, grad):
    print(f'{var.name}, shape: {g.shape}')

Controlling what the tape watches

# A trainable variable
x0 = tf.Variable(3.0, name='x0')
# Not trainable
x1 = tf.Variable(3.0, name='x1', trainable=False)
# Not a Variable: A variable + tensor returns a tensor.
x2 = tf.Variable(2.0, name='x2') + 1.0
# Not a variable
x3 = tf.constant(3.0, name='x3')

with tf.GradientTape() as tape:
    y = (x0**2) + (x1**2) + (x2**2)

grad = tape.gradient(y, [x0, x1, x2, x3])

for g in grad:
    print(g)

tf.GradientTape provides hooks that give the user control over what is or is not watched.

To record gradients with respect to a tf.Tensor, you need to call GradientTape.watch(x):

x = tf.constant(3.0)
with tf.GradientTape() as tape:
    tape.watch(x)
    y = x**2

# dy = 2x * dx
dy_dx = tape.gradient(y, x)       # 此时，非变量也能进行求梯度
print(dy_dx.numpy())