在这里插入# 输入
# 构建loss函数
# 优化optimizer
# run epoch
import numpy as np
import tensorflow as tf
from tensorflow.contrib.rnn import LSTMCell
from data import *
train_data=read_corpus('/home/wyy/PycharmProjects/wyy1/zh-NER-TF-master/data_path/train_data')
test_data=read_corpus('/home/wyy/PycharmProjects/wyy1/zh-NER-TF-master/data_path/test_data')
num_tags=6
epoch_num=10
batch_size=64
num_batches=(len(train_data)+64-1)//64
# batch_yield
vocab=read_dictionary('/home/wyy/PycharmProjects/wyy1/zh-NER-TF-master/data_path/word2id.pkl')
tag2label = {"O": 0,
"B-PER": 1, "I-PER": 2,
"B-LOC": 3, "I-LOC": 4,
"B-ORG": 5, "I-ORG": 6
}
one_batch=batch_yield(train_data,batch_size,vocab,tag2label)
for i in range(10):
for seqs,labels in one_batch:
print(seqs)
print(labels)
seq_list,seq_len_list=pad_sequence()
# 有输入了,需要建立模型
# lookup_layer
embeddings = random_embedding(vocab,300)#(3905,300)
_word_embeddings=tf.Variable(embeddings,dtype=tf.float32,trainable=True)
# ???
word_embeddings=tf.nn.embedding_lookup(params=_word_embeddings,ids=)
word_embeddings=tf.nn.dropout(word_embeddings,keep_prob=0.5)
# biLSTM_layer
# num_units:表示每个LSTM单元的输出为300*1(输出结果维度)
cell_fw=LSTMCell(num_units=300)
cell_bw=LSTMCell(num_units=300)
(output_fw_seq,output_bw_seq),_=tf.nn.bidirectional_dynamic_rnn(
cell_bw=cell_bw,
cell_fw=cell_fw,
inputs=word_embeddings,
sequence_length=[100],#这里其实是个列表
dtype=tf.float32
)
output=tf.concat([output_fw_seq,output_bw_seq],axis=-1)
output=tf.nn.dropout(output,keep_prob=0.5)
s=tf.shape(output)
output=tf.reshape(output,[-1,600])
W = tf.get_variable(name="W",shape=[600, num_tags],initializer=tf.contrib.layers.xavier_initializer(),dtype=tf.float32)
b = tf.get_variable(name="b",shape=[self.num_tags],initializer=tf.zeros_initializer(),dtype=tf.float32)
# 预测值
pred=tf.matmul(output,w)+b
logits=tf.reshape(pred,[-1,s[1],6])
# loss
代码片
lstm之命名实体识别ner
最新推荐文章于 2025-04-16 21:45:32 发布
本文详细介绍了如何使用双向长短期记忆网络(Bi-LSTM)进行中文命名实体识别(NER)任务的全过程,从数据预处理到模型搭建,再到训练与评估,为读者提供了完整的代码实例。
713

被折叠的 条评论
为什么被折叠?



