循环神经网络(RNN)
申明:本书的理论和代码来自于开源书籍,TensorFlow深度学习
地址:https://github.com/jiajunhua/dragen1860-Deep-Learning-with-TensorFlow-book
1. TensorFlow2的RNN简单实现
在 TensorFlow 中,RNN 表示通用意义上的循环神经网络,对于我们目前介绍的基础循环神经网络,它一般叫做 SimpleRNN。SimpleRNN 与 SimpleRNNCell 的区别在于,带 Cell 的层仅仅是完成了一个时间戳的前向运算,不带 Cell 的层一般是基于Cell 层实现的,它在内部已经完成了多个时间戳的循环运算,因此使用起来更为方便快
捷。
2.1. 单层SimpleRNNCell
[b,s,n], b为句子数量,s为句子长度,n为词向量长度,h为状态长度
from tensorflow.keras import Sequential,layers,losses
import tensorflow as tf
# 初始化状态向量,用列表包裹,统一格式
h0 = [tf.zeros([4, 64])]
x = tf.random.normal([4, 80, 100]) # 生成输入张量,4 个 80 单词的句子
xt = x[:,0,:] # 所有句子的第 1 个单词
# 构建输入特征 n=100,序列长度 s=80,状态长度=64 的 Cell
cell = layers.SimpleRNNCell(64)
out, h1 = cell(xt, h0) # 前向计算
print(out.shape, h1[0].shape)
输出:(4, 64) (4, 64) # [b,ℎ]
h = h0 # h 保存每个时间戳上的状态向量列表
# 在序列长度的维度解开输入,得到 xt:[b,n]
for xt in tf.unstack(x, axis=1):
out, h = cell(xt, h) # 前向计算,out 和 h 均被覆盖
# 最终输出可以聚合每个时间戳上的输出,也可以只取最后时间戳的输出
out = out
其中我们用到了三个变量 W h h W_{hh} Whh 的shape为[64,64], W x h W_{xh} Wxh 的shape为[100,64],偏置b的shape为[64,],h的shape为[4,64],x的shape为[4,100]
2.2. 多层 层SimpleRNNCell
x = tf.random.normal([4,80,100])
xt = x[:,0,:] # 取第一个时间戳的输入 x0
# 构建 2 个 Cell,先 cell0,后 cell1,内存状态向量长度都为 64
cell0 = layers.SimpleRNNCell(64)
cell1 = layers.SimpleRNNCell(64)
h0 = [tf.zeros([4,64])] # cell0 的初始状态向量
h1 = [tf.zeros([4,64])] # cell1 的初始状态向量
for xt in tf.unstack(x, axis=1):
# xt 作为输入,输出为 out0
out0, h0 = cell0(xt, h0)
# 上一个 cell 的输出 out0 作为本 cell 的输入
out1, h1 = cell1(out0, h1)
2.3 SimpleRNN
# 单层
layer = layers.SimpleRNN(64) # 创建状态向量长度为 64 的 SimpleRNN 层
x = tf.random.normal([4, 80, 100])
out = layer(x) # 和普通卷积网络一样,一行代码即可获得输出
out.shape
# 多层
net = keras.Sequential([ # 构建 2 层 RNN 网络
# 除最末层外,都需要返回所有时间戳的输出,用作下一层的输入
layers.SimpleRNN(64, return_sequences=True),
layers.SimpleRNN(64),
])
out = net(x) # 前向计算
2.RNN 情感分类问题实战
SimpleRNNCell实现
from tensorflow.keras import Sequential,layers,losses,optimizers
import tensorflow as tf
batchsz = 128 # 批量大小
total_words = 10000 # 词汇表大小 N_vocab
max_review_len = 80 # 句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len = 100 # 词向量特征长度 n
# 加载 IMDB 数据集,此处的数据采用数字编码,一个数字代表一个单词
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# 打印输入的形状,标签的形状
print(x_train.shape, len(x_train[0]), y_train.shape)
print(x_test.shape, len(x_test[0]), y_test.shape)
print(x_train[0])
print(y_train[0])
# 如:aleination 85167 manojlovic 34319
word_index = tf.keras.datasets.imdb.get_word_index()
# 书中有这一段代码,我不是很理解,加入这段操作后,句子的翻译就变了,为啥要这段代码?????
# 前面 4 个 ID 是特殊位
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0 # 填充标志
word_index["<START>"] = 1 # 起始标志
word_index["<UNK>"] = 2 # 未知单词的标志
word_index["<UNUSED>"] = 3
# 翻转编码表
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
# decode_review(x_train[0]) 在这变了
# 截断和填充句子,使得等长,此处长句子保留句子后面的部分,短句子在前面填充
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
# print(x_train[0])
# 构建数据集,打散,批量,并丢掉最后一个不够 batchsz 的 batch
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
# 统计数据集属性
# print('x_train shape:', x_train.shape, tf.reduce_max(y_train),tf.reduce_min(y_train))
# print('x_test shape:', x_test.shape)
# 创建自定义的模型类 MyRNN,继承自 Model 基类,需要新建 Embedding 层,两个 RNN 层
# RNN 的状态向量长度ℎ = units参数
class MyRNN(tf.keras.Model):
# Cell 方式构建多层网络
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64],构建 Cell 初始化状态向量,重复使用
self.state0 = [tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units])]
# Embedding 参数 param1:词汇表的大小,即最大整数索引+ 1 | param2:密集嵌入的尺寸 | param3:输入序列长度
# 词向量编码 [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len)
# 构建 2 个 Cell,使用 dropout 技术防止过拟合
self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
# 构建分类网络,用于将 CELL 的输出特征进行分类,2 分类
# [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
#self.outlayer = Sequential([layers.Dense(units),layers.Dropout(rate=0.5),layers.ReLU(),layers.Dense(1)])
def call(self, inputs, training=None):
x = inputs # [b, 80]
# 获取词向量: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# 通过 2 个 RNN CELL,[b, 80, 100] => [b, 64]
state0 = self.state0
state1 = self.state1
for word in tf.unstack(x, axis=1): # word: [b, 100]
out0, state0 = self.rnn_cell0(word, state0, training)
out1, state1 = self.rnn_cell1(out0, state1, training)
# 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
x = self.outlayer(out1)
# 通过激活函数,p(y is pos|x)
prob = tf.sigmoid(x)
return prob
# 训练与测试
def main():
units = 64 # RNN 状态向量长度 n
epochs = 20 # 训练 epochs
model = MyRNN(units) # 创建模型
# 装配
model.compile(optimizer = optimizers.Adam(0.001),loss = losses.BinaryCrossentropy(),metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)
main()
我训练完后,loss: 0.9742 - accuracy: 0.8028(发现每次训练完精确度都不一样,有时很低,不知道为啥)
SimpleRNN实现
from tensorflow.keras import Sequential,layers,losses,optimizers
import tensorflow as tf
batchsz = 128 # 批量大小
total_words = 10000 # 词汇表大小 N_vocab
max_review_len = 80 # 句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len = 100 # 词向量特征长度 n
# 加载 IMDB 数据集,此处的数据采用数字编码,一个数字代表一个单词
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# 打印输入的形状,标签的形状
print(x_train.shape, len(x_train[0]), y_train.shape)
print(x_test.shape, len(x_test[0]), y_test.shape)
print(x_train[0])
print(y_train[0])
# 如:aleination 85167 manojlovic 34319
word_index = tf.keras.datasets.imdb.get_word_index()
# 书中有这一段代码,我不是很理解,加入这段操作后,句子的翻译就变了,为啥要这段代码?????
# 前面 4 个 ID 是特殊位
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0 # 填充标志
word_index["<START>"] = 1 # 起始标志
word_index["<UNK>"] = 2 # 未知单词的标志
word_index["<UNUSED>"] = 3
# 翻转编码表
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
# decode_review(x_train[0]) 在这变了
# 截断和填充句子,使得等长,此处长句子保留句子后面的部分,短句子在前面填充
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
# print(x_train[0])
# 构建数据集,打散,批量,并丢掉最后一个不够 batchsz 的 batch
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
# 统计数据集属性
# print('x_train shape:', x_train.shape, tf.reduce_max(y_train),tf.reduce_min(y_train))
# print('x_test shape:', x_test.shape)
# 创建自定义的模型类 MyRNN,继承自 Model 基类,需要新建 Embedding 层,两个 RNN 层
# RNN 的状态向量长度ℎ = units参数
class MyRNN(tf.keras.Model):
# Cell 方式构建多层网络
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64],构建 Cell 初始化状态向量,重复使用
# self.state0 = [tf.zeros([batchsz, units])]
# self.state1 = [tf.zeros([batchsz, units])]
# Embedding 参数 param1:词汇表的大小,即最大整数索引+ 1 | param2:密集嵌入的尺寸 | param3:输入序列长度
# 词向量编码 [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len)
# 构建 2 个 Cell,使用 dropout 技术防止过拟合
# self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
# self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
# 多层
self.net = tf.keras.Sequential([ # 构建 2 层 RNN 网络
# 除最末层外,都需要返回所有时间戳的输出,用作下一层的输入
layers.SimpleRNN(units, return_sequences=True),
layers.SimpleRNN(units),
])
# 构建分类网络,用于将 CELL 的输出特征进行分类,2 分类
# [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
#self.outlayer = Sequential([layers.Dense(units),layers.Dropout(rate=0.5),layers.ReLU(),layers.Dense(1)])
def call(self, inputs, training=None):
x = inputs # [b, 80]
# 获取词向量: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# 通过 2 个 RNN CELL,[b, 80, 100] => [b, 64]
# state0 = self.state0
# state1 = self.state1
# for word in tf.unstack(x, axis=1): # word: [b, 100]
# out0, state0 = self.rnn_cell0(word, state0, training)
# out1, state1 = self.rnn_cell1(out0, state1, training)
# 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
out = self.net(x)
x = self.outlayer(out)
# 通过激活函数,p(y is pos|x)
prob = tf.sigmoid(x)
return prob
# 训练与测试
def main():
units = 64 # RNN 状态向量长度 n
epochs = 20 # 训练 epochs
model = MyRNN(units) # 创建模型
# 装配
model.compile(optimizer = optimizers.Adam(0.001),loss = losses.BinaryCrossentropy(),metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)
main()
loss: 1.2357 - accuracy: 0.7784
3.LSTM
3.1 LSTMCell
x = tf.random.normal([2,80,100])
xt = x[:,0,:] # 得到一个时间戳的输入
cell = layers.LSTMCell(64) # 创建 LSTM Cell
# 初始化状态和输出 List,[h,c]
state = [tf.zeros([2,64]),tf.zeros([2,64])]
# state: [Ht,Ct]
out, state = cell(xt, state) # 前向计算
# 查看返回元素的 id
id(out),id(state[0]),id(state[1])
输出:(2834106159528, 2834106159528, 2834106162344)
# 在序列长度维度上解开,循环送入 LSTM Cell 单元
for xt in tf.unstack(x, axis=1):
# 前向计算
out, state = cell(xt, state)
3.2LSTM
单层LSTM
# 创建一层 LSTM 层,内存向量长度为 64
layer = layers.LSTM(64)
# 序列通过 LSTM 层,默认返回最后一个时间戳的输出 h
out = layer(x)out.shape # out [2, 64]
# 创建 LSTM 层时,设置返回每个时间戳上的输出
layer = layers.LSTM(64, return_sequences=True)
# 前向计算,每个时间戳上的输出自动进行了 concat,拼成一个张量
out = layer(x)
out.shape # out [2, 80, 64]
多层LSTM
net = tf.keras.Sequential([
layers.LSTM(64, return_sequences=True), # 非末层需要返回所有时间戳输出
layers.LSTM(64)
])
# 一次通过网络模型,即可得到最末层、最后一个时间戳的输出
out = net(x)
out.shape # [2, 64]
4.GRU
4.1 GRUCell
单层
# 初始化状态向量,GRU 只有一个
h = [tf.zeros([2,64])]
cell = layers.GRUCell(64) # 新建 GRU Cell,向量长度为 64
# 在时间戳维度上解开,循环通过 cell
for xt in tf.unstack(x, axis=1):
out, h = cell(xt, h)
# 输出形状
out.shape # [2, 64]
4.2 GRU
多层
net = tf.keras.Sequential([
layers.GRU(64, return_sequences=True),
layers.GRU(64)
])
out = net(x)
out.shape # [2, 64]
5. LSTM 情感分类问题再战
LSTMCell
from tensorflow.keras import Sequential,layers,losses,optimizers
import tensorflow as tf
batchsz = 128 # 批量大小
total_words = 10000 # 词汇表大小 N_vocab
max_review_len = 80 # 句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len = 100 # 词向量特征长度 n
# 加载 IMDB 数据集,此处的数据采用数字编码,一个数字代表一个单词
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# 打印输入的形状,标签的形状
# print(x_train.shape, len(x_train[0]), y_train.shape)
# print(x_test.shape, len(x_test[0]), y_test.shape)
# print(x_train[0])
# print(y_train[0])
# 如:aleination 85167 manojlovic 34319
word_index = tf.keras.datasets.imdb.get_word_index()
# 书中有这一段代码,我不是很理解,加入这段操作后,句子的翻译就变了,为啥要这段代码?????
# 前面 4 个 ID 是特殊位
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0 # 填充标志
word_index["<START>"] = 1 # 起始标志
word_index["<UNK>"] = 2 # 未知单词的标志
word_index["<UNUSED>"] = 3
# 翻转编码表
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
# decode_review(x_train[0]) 在这变了
# 截断和填充句子,使得等长,此处长句子保留句子后面的部分,短句子在前面填充
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
# print(x_train[0])
# 构建数据集,打散,批量,并丢掉最后一个不够 batchsz 的 batch
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
# 统计数据集属性
# print('x_train shape:', x_train.shape, tf.reduce_max(y_train),tf.reduce_min(y_train))
# print('x_test shape:', x_test.shape)
# 创建自定义的模型类 MyRNN,继承自 Model 基类,需要新建 Embedding 层,两个 RNN 层
# RNN 的状态向量长度ℎ = units参数
class MyRNN(tf.keras.Model):
# Cell 方式构建多层网络
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64],构建 Cell 初始化状态向量,重复使用
self.state0 = [tf.zeros([batchsz, units]),tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units]),tf.zeros([batchsz, units])]
# Embedding 参数 param1:词汇表的大小,即最大整数索引+ 1 | param2:密集嵌入的尺寸 | param3:输入序列长度
# 词向量编码 [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len)
# 构建 2 个 Cell,使用 dropout 技术防止过拟合
self.rnn_cell0 = layers.LSTMCell(units, dropout=0.5)
self.rnn_cell1 = layers.LSTMCell(units, dropout=0.5)
# 构建分类网络,用于将 CELL 的输出特征进行分类,2 分类
# [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
#self.outlayer = Sequential([layers.Dense(units),layers.Dropout(rate=0.5),layers.ReLU(),layers.Dense(1)])
def call(self, inputs, training=None):
x = inputs # [b, 80]
# 获取词向量: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# 通过 2 个 RNN CELL,[b, 80, 100] => [b, 64]
state0 = self.state0
state1 = self.state1
for word in tf.unstack(x, axis=1): # word: [b, 100]
out0, state0 = self.rnn_cell0(word, state0, training)
out1, state1 = self.rnn_cell1(out0, state1, training)
# 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
x = self.outlayer(out1)
# 通过激活函数,p(y is pos|x)
prob = tf.sigmoid(x)
return prob
# 训练与测试
def main():
units = 64 # RNN 状态向量长度 n
epochs = 20 # 训练 epochs
model = MyRNN(units) # 创建模型
# 装配
model.compile(optimizer = optimizers.Adam(0.001),loss = losses.BinaryCrossentropy(),metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)
main()
loss: 1.0263 - accuracy: 0.8075
LSTM
from tensorflow.keras import Sequential,layers,losses,optimizers
import tensorflow as tf
batchsz = 128 # 批量大小
total_words = 10000 # 词汇表大小 N_vocab
max_review_len = 80 # 句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len = 100 # 词向量特征长度 n
# 加载 IMDB 数据集,此处的数据采用数字编码,一个数字代表一个单词
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# 打印输入的形状,标签的形状
print(x_train.shape, len(x_train[0]), y_train.shape)
print(x_test.shape, len(x_test[0]), y_test.shape)
print(x_train[0])
print(y_train[0])
# 如:aleination 85167 manojlovic 34319
word_index = tf.keras.datasets.imdb.get_word_index()
# 书中有这一段代码,我不是很理解,加入这段操作后,句子的翻译就变了,为啥要这段代码?????
# 前面 4 个 ID 是特殊位
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0 # 填充标志
word_index["<START>"] = 1 # 起始标志
word_index["<UNK>"] = 2 # 未知单词的标志
word_index["<UNUSED>"] = 3
# 翻转编码表
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
# decode_review(x_train[0]) 在这变了
# 截断和填充句子,使得等长,此处长句子保留句子后面的部分,短句子在前面填充
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
# print(x_train[0])
# 构建数据集,打散,批量,并丢掉最后一个不够 batchsz 的 batch
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
# 统计数据集属性
# print('x_train shape:', x_train.shape, tf.reduce_max(y_train),tf.reduce_min(y_train))
# print('x_test shape:', x_test.shape)
# 创建自定义的模型类 MyRNN,继承自 Model 基类,需要新建 Embedding 层,两个 RNN 层
# RNN 的状态向量长度ℎ = units参数
class MyRNN(tf.keras.Model):
# Cell 方式构建多层网络
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64],构建 Cell 初始化状态向量,重复使用
# Embedding 参数 param1:词汇表的大小,即最大整数索引+ 1 | param2:密集嵌入的尺寸 | param3:输入序列长度
# 词向量编码 [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len)
# 多层
self.net = tf.keras.Sequential([ # 构建 2 层 RNN 网络
# 除最末层外,都需要返回所有时间戳的输出,用作下一层的输入
layers.LSTM(units, dropout=0.5, return_sequences=True),
layers.LSTM(units, dropout=0.5),
])
# 构建分类网络,用于将 CELL 的输出特征进行分类,2 分类
# [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
#self.outlayer = Sequential([layers.Dense(units),layers.Dropout(rate=0.5),layers.ReLU(),layers.Dense(1)])
def call(self, inputs, training=None):
x = inputs # [b, 80]
# 获取词向量: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# 通过 2 个 RNN CELL,[b, 80, 100] => [b, 64]
# state0 = self.state0
# state1 = self.state1
# for word in tf.unstack(x, axis=1): # word: [b, 100]
# out0, state0 = self.rnn_cell0(word, state0, training)
# out1, state1 = self.rnn_cell1(out0, state1, training)
# 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
out = self.net(x)
x = self.outlayer(out)
# 通过激活函数,p(y is pos|x)
prob = tf.sigmoid(x)
return prob
# 训练与测试
def main():
units = 64 # RNN 状态向量长度 n
epochs = 20 # 训练 epochs
model = MyRNN(units) # 创建模型
# 装配
model.compile(optimizer = optimizers.Adam(0.001),loss = losses.BinaryCrossentropy(),metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)
main()
loss: 0.9263 - accuracy: 0.8148
6.GRU情感分类问题再战
GRUCell
from tensorflow.keras import Sequential,layers,losses,optimizers
import tensorflow as tf
batchsz = 128 # 批量大小
total_words = 10000 # 词汇表大小 N_vocab
max_review_len = 80 # 句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len = 100 # 词向量特征长度 n
# 加载 IMDB 数据集,此处的数据采用数字编码,一个数字代表一个单词
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# 打印输入的形状,标签的形状
# print(x_train.shape, len(x_train[0]), y_train.shape)
# print(x_test.shape, len(x_test[0]), y_test.shape)
# print(x_train[0])
# print(y_train[0])
# 如:aleination 85167 manojlovic 34319
word_index = tf.keras.datasets.imdb.get_word_index()
# 书中有这一段代码,我不是很理解,加入这段操作后,句子的翻译就变了,为啥要这段代码?????
# 前面 4 个 ID 是特殊位
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0 # 填充标志
word_index["<START>"] = 1 # 起始标志
word_index["<UNK>"] = 2 # 未知单词的标志
word_index["<UNUSED>"] = 3
# 翻转编码表
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
# decode_review(x_train[0]) 在这变了
# 截断和填充句子,使得等长,此处长句子保留句子后面的部分,短句子在前面填充
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
# print(x_train[0])
# 构建数据集,打散,批量,并丢掉最后一个不够 batchsz 的 batch
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
# 统计数据集属性
# print('x_train shape:', x_train.shape, tf.reduce_max(y_train),tf.reduce_min(y_train))
# print('x_test shape:', x_test.shape)
# 创建自定义的模型类 MyRNN,继承自 Model 基类,需要新建 Embedding 层,两个 RNN 层
# RNN 的状态向量长度ℎ = units参数
class MyRNN(tf.keras.Model):
# Cell 方式构建多层网络
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64],构建 Cell 初始化状态向量,重复使用
self.state0 = [tf.zeros([batchsz, units]),tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units]),tf.zeros([batchsz, units])]
# Embedding 参数 param1:词汇表的大小,即最大整数索引+ 1 | param2:密集嵌入的尺寸 | param3:输入序列长度
# 词向量编码 [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len)
# 构建 2 个 Cell,使用 dropout 技术防止过拟合
self.rnn_cell0 = layers.GRUCell(units, dropout=0.5)
self.rnn_cell1 = layers.GRUCell(units, dropout=0.5)
# 构建分类网络,用于将 CELL 的输出特征进行分类,2 分类
# [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
#self.outlayer = Sequential([layers.Dense(units),layers.Dropout(rate=0.5),layers.ReLU(),layers.Dense(1)])
def call(self, inputs, training=None):
x = inputs # [b, 80]
# 获取词向量: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# 通过 2 个 RNN CELL,[b, 80, 100] => [b, 64]
state0 = self.state0
state1 = self.state1
for word in tf.unstack(x, axis=1): # word: [b, 100]
out0, state0 = self.rnn_cell0(word, state0, training)
out1, state1 = self.rnn_cell1(out0, state1, training)
# 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
x = self.outlayer(out1)
# 通过激活函数,p(y is pos|x)
prob = tf.sigmoid(x)
return prob
# 训练与测试
def main():
units = 64 # RNN 状态向量长度 n
epochs = 20 # 训练 epochs
model = MyRNN(units) # 创建模型
# 装配
model.compile(optimizer = optimizers.Adam(0.001),loss = losses.BinaryCrossentropy(),metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)
main()
loss: 1.1548 - accuracy: 0.8066
GRU
from tensorflow.keras import Sequential,layers,losses,optimizers
import tensorflow as tf
batchsz = 128 # 批量大小
total_words = 10000 # 词汇表大小 N_vocab
max_review_len = 80 # 句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len = 100 # 词向量特征长度 n
# 加载 IMDB 数据集,此处的数据采用数字编码,一个数字代表一个单词
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# 打印输入的形状,标签的形状
print(x_train.shape, len(x_train[0]), y_train.shape)
print(x_test.shape, len(x_test[0]), y_test.shape)
print(x_train[0])
print(y_train[0])
# 如:aleination 85167 manojlovic 34319
word_index = tf.keras.datasets.imdb.get_word_index()
# 书中有这一段代码,我不是很理解,加入这段操作后,句子的翻译就变了,为啥要这段代码?????
# 前面 4 个 ID 是特殊位
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0 # 填充标志
word_index["<START>"] = 1 # 起始标志
word_index["<UNK>"] = 2 # 未知单词的标志
word_index["<UNUSED>"] = 3
# 翻转编码表
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
# decode_review(x_train[0]) 在这变了
# 截断和填充句子,使得等长,此处长句子保留句子后面的部分,短句子在前面填充
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
# print(x_train[0])
# 构建数据集,打散,批量,并丢掉最后一个不够 batchsz 的 batch
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
# 统计数据集属性
# print('x_train shape:', x_train.shape, tf.reduce_max(y_train),tf.reduce_min(y_train))
# print('x_test shape:', x_test.shape)
# 创建自定义的模型类 MyRNN,继承自 Model 基类,需要新建 Embedding 层,两个 RNN 层
# RNN 的状态向量长度ℎ = units参数
class MyRNN(tf.keras.Model):
# Cell 方式构建多层网络
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64],构建 Cell 初始化状态向量,重复使用
# Embedding 参数 param1:词汇表的大小,即最大整数索引+ 1 | param2:密集嵌入的尺寸 | param3:输入序列长度
# 词向量编码 [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len)
# 多层
self.net = tf.keras.Sequential([ # 构建 2 层 RNN 网络
# 除最末层外,都需要返回所有时间戳的输出,用作下一层的输入
layers.GRU(units, dropout=0.5, return_sequences=True),
layers.GRU(units, dropout=0.5),
])
# 构建分类网络,用于将 CELL 的输出特征进行分类,2 分类
# [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
#self.outlayer = Sequential([layers.Dense(units),layers.Dropout(rate=0.5),layers.ReLU(),layers.Dense(1)])
def call(self, inputs, training=None):
x = inputs # [b, 80]
# 获取词向量: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
out = self.net(x)
x = self.outlayer(out)
# 通过激活函数,p(y is pos|x)
prob = tf.sigmoid(x)
return prob
# 训练与测试
def main():
units = 64 # RNN 状态向量长度 n
epochs = 20 # 训练 epochs
model = MyRNN(units) # 创建模型
# 装配
model.compile(optimizer = optimizers.Adam(0.001),loss = losses.BinaryCrossentropy(),metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)
main()
loss: 0.9483 - accuracy: 0.8104
7.预训练的词向量
import os
import numpy as np
print('Indexing word vectors.')
embeddings_index = {} # 提取单词及其向量,保存在字典中
# 词向量模型文件存储路径
GLOVE_DIR = r'D:\jupyter_notebook\HandsOnDeepLearningTF\glove.6B'
with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'),encoding='utf-8') as f:
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
print('Found %s word vectors.' % len(embeddings_index))
from tensorflow.keras import Sequential,layers,losses,optimizers
import tensorflow as tf
batchsz = 128 # 批量大小
total_words = 10000 # 词汇表大小 N_vocab
max_review_len = 80 # 句子最大长度 s,大于的句子部分将截断,小于的将填充
embedding_len = 100 # 词向量特征长度 n
MAX_NUM_WORDS = 10000
# 加载 IMDB 数据集,此处的数据采用数字编码,一个数字代表一个单词
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
# 如:aleination 85167 manojlovic 34319
word_index = tf.keras.datasets.imdb.get_word_index()
# 书中有这一段代码,我不是很理解,加入这段操作后,句子的翻译就变了,为啥要这段代码?????
# 前面 4 个 ID 是特殊位
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0 # 填充标志
word_index["<START>"] = 1 # 起始标志
word_index["<UNK>"] = 2 # 未知单词的标志
word_index["<UNUSED>"] = 3
# 翻转编码表
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
# decode_review(x_train[0]) 在这变了
# 截断和填充句子,使得等长,此处长句子保留句子后面的部分,短句子在前面填充
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen=max_review_len)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
# print(x_train[0])
# 构建数据集,打散,批量,并丢掉最后一个不够 batchsz 的 batch
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
# 统计数据集属性
# print('x_train shape:', x_train.shape, tf.reduce_max(y_train),tf.reduce_min(y_train))
# print('x_test shape:', x_test.shape)
num_words = min(total_words, len(word_index))
embedding_matrix = np.zeros((num_words, embedding_len)) #词向量表
for word, i in word_index.items():
if i >= MAX_NUM_WORDS:
continue # 过滤掉其他词汇
embedding_vector = embeddings_index.get(word) # 从 GloVe 查询词向量
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector # 写入对应位置
print(embedding_matrix.shape)
# 创建自定义的模型类 MyRNN,继承自 Model 基类,需要新建 Embedding 层,两个 RNN 层
# RNN 的状态向量长度ℎ = units参数
class MyRNN(tf.keras.Model):
# Cell 方式构建多层网络
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64],构建 Cell 初始化状态向量,重复使用
# Embedding 参数 param1:词汇表的大小,即最大整数索引+ 1 | param2:密集嵌入的尺寸 | param3:输入序列长度
# 词向量编码 [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,input_length=max_review_len,trainable=False)
self.embedding.build(input_shape=(None, max_review_len))
# 利用 GloVe 模型初始化 Embedding 层
self.embedding.set_weights([embedding_matrix])#初始化
# 多层
self.net = tf.keras.Sequential([ # 构建 2 层 RNN 网络
# 除最末层外,都需要返回所有时间戳的输出,用作下一层的输入
layers.GRU(units, dropout=0.5, return_sequences=True),
layers.GRU(units, dropout=0.5),
])
# 构建分类网络,用于将 CELL 的输出特征进行分类,2 分类
# [b, 80, 100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
#self.outlayer = Sequential([layers.Dense(units),layers.Dropout(rate=0.5),layers.ReLU(),layers.Dense(1)])
def call(self, inputs, training=None):
x = inputs # [b, 80]
# 获取词向量: [b, 80] => [b, 80, 100]
x = self.embedding(x)
# 通过 2 个 RNN CELL,[b, 80, 100] => [b, 64]
# 末层最后一个输出作为分类网络的输入: [b, 64] => [b, 1]
out = self.net(x)
x = self.outlayer(out)
# 通过激活函数,p(y is pos|x)
prob = tf.sigmoid(x)
return prob
# 训练与测试
def main():
units = 64 # RNN 状态向量长度 n
epochs = 50 # 训练 epochs
model = MyRNN(units) # 创建模型
# 装配
model.compile(optimizer = optimizers.Adam(0.001),loss = losses.BinaryCrossentropy(),metrics=['accuracy'])
# 训练和验证
model.fit(db_train, epochs=epochs, validation_data=db_test)
# 测试
model.evaluate(db_test)
main()
loss: 0.3247 - accuracy: 0.8554
下载页面:https://nlp.stanford.edu/projects/glove/
感觉这几个代码跑通了,自己也没有入门卷积神经网络,有好多东西还是不懂,并且感觉自己啥也做不出来,没有啥成果。我的天,请上天传授我神经网络大法吧,让我打通任督二脉,融会贯通。哎,不乱想了,还是继续搬砖吧…