- 输入序列:一个长度为T的输入序列
- 输出序列:长度为T的输出序列
和
是one-hot向量,用于表示句子中第t个单词的索引号
:将
周围
个标记连接到
的左侧和右侧构造输入
- 通过在句首尾部加<start>和<end>,每个输入和输出具有统一的长度(
和1),我们使用简单的前馈神经网络从
预测
作为从每个窗口预测标签的简单有效模型
我们使用具有ReLU激活的单个隐藏层,softmax处理输出,采用交叉熵作损失
(a) i. 提供2个包含具有不明确类型的命名实体的句子示例
- "Spokesperson for Levis, Bill Murray, said . . . ":Levis既可以表示人也可以是组织
- "Heartbreak is a new virus,":Heartbreak既可以是明明命名实体(病毒名)也可以是一个名词
ii. 通常命名实体可能是罕见单词,使用用例等特征有助于系统概括
iii. 用例和词性特征有助于预测一个单词是否是命名实体的一部分
(b) i. 维数:
ii. 计算复杂性
的时间复杂度:
的时间复杂度:
的时间复杂度:
(c) 实现基于窗口的分类器模型
i. 在make_windowed_data函数中将一批输入序列转换为一批窗口输入-输出对
def make_windowed_data(data,start,end,window_size=1):
windowed_data=[]
for sentence,labels in data:
orig_n=len(sentence)
# extend sentence
sentence=[start]*window_size+sentence+[end]*window_size
l=0 # index labels
# loop over the original sentence
for i in range(window_size,orig_n+window_size):
temp_feats=[]
# loop over the window for feature in original sentence
for j in range(i-window_size,i+window_size+1):
temp_feats.extend(sentence[j])
# put token features together with label
temp_f_l=(temp_feats,labels[l])
# put into windowed data:
windowed_data.append(temp_f_l)
# iterate our labels index
l+=1
return windowed_data
ii. 在WindowModel实现前馈模型
class WindowModel(NERModel):
def add_placeholders(self):
### YOUR CODE HERE (~3-5 lines)
self.input_placeholder=tf.placeholder(tf.int32,[None,self.config.n_window_features])
self.labels_placeholder=tf.placeholder(tf.int32,[None,])
self.dropout_placeholder=tf.placeholder(tf.float32)
### END YOUR CODE
def create_feed_dict(self, inputs_batch, labels_batch=None,dropout=1):
### YOUR CODE HERE (~5-10 lines)
feed_dict={self.input_placeholder:inputs_batch,\
self.dropout_placeholder:dropout}
if labels_batch is not None:
feed_dict[self.labels_placeholder]=labels_batch
### END YOUR CODE
return feed_dict
def add_embedding(self):
embedded=tf.Variable(self.pretrained_embeddings)
embeddings=tf.nn.embedding_lookup(embedded,self.input_placeholder)
embeddings=tf.reshape(embeddings,[-1,self.config.n_window_features*self.config.embed_size])
return embeddings
def add_prediction_op(self):
"""Adds the 1-hidden-layer NN:
h = Relu(xW + b1)
h_drop = Dropout(h, dropout_rate)
pred = h_dropU + b2"""
x=self.add_embedding()
dropout_rate=self.dropout_placeholder
### YOUR CODE HERE (~10-20 lines)
b1=tf.get_variable(name='b1',shape=[self.config.hidden_size,],\
initializer=tf.contrib.layers.xavier_initializer(seed=1))
b2=tf.get_variable(name='b2',shape=[self.config.n_classes],\
initializer=tf.contrib.layers.xavier_initializer(seed=2))
W=tf.get_variable(name='W',shape=[self.config.n_window_features*self.config.embed_size,self.config.hidden_size],\
initializer=tf.contrib.layers.xavier_initializer(seed=3))
U=tf.get_variable(name='U',shape=[self.config.hidden_size,self.config.n_classes],\
initializer=tf.contrib.layers.xavier_initializer(seed=4))
z1=tf.matmul(x,W)+b1
h=tf.nn.relu(z1)
h_drop=tf.nn.dropout(h,dropout_rate)
pred=tf.matmul(h_drop,U)+b2
### END YOUR CODE
return pred
def add_loss_op(self, pred):
### YOUR CODE HERE (~2-5 lines)
loss=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred,labels=self.labels_placeholder)
loss=tf.reduce_mean(loss)
### END YOUR CODE
return loss
def add_training_op(self, loss):
### YOUR CODE HERE (~1-2 lines)
adam_optim=tf.train.AdamOptimizer(self.config.lr)
train_op=adam_optim.minimize(loss)
### END YOUR CODE
return train_op
def preprocess_sequence_data(self, examples):
return make_windowed_data(examples,start=self.helper.START,end=self.helper.END,
window_size=self.config.window_size)
def consolidate_predictions(self, examples_raw, examples, preds):
ret=[]
i=0
for sentence,labels in examples_raw:
labels_=preds[i:i+len(sentence)]
i+=len(sentence)
ret.append([sentence,labels,labels_])
return ret
def predict_on_batch(self, sess, inputs_batch):
feed=self.create_feed_dict(inputs_batch)
predictions=sess.run(tf.argmax(self.pred,axis=1),feed_dict=feed)
return predictions
def train_on_batch(self, sess, inputs_batch, labels_batch):
feed=self.create_feed_dict(inputs_batch,labels_batch=labels_batch,
dropout=self.config.dropout)
_,loss=sess.run([self.train_op,self.loss],feed_dict=feed)
return loss
def __init__(self,helper,config,pretrained_embeddings,report=None):
super(WindowModel,self).__init__(helper,config,report)
self.pretrained_embeddings=pretrained_embeddings
# Defining placeholders
self.input_placeholder=None
self.labels_placeholder=None
self.dropout_placeholder=None
self.build()
利用以下主函数可以在pycharm中直接执行
if __name__ == '__main__':
parser=argparse.ArgumentParser(description='Trains and tests an NER model')
parser.add_argument('-dt', '--data-train', type=argparse.FileType('r'), default="data/tiny.conll",
help="Training data")
parser.add_argument('-dd', '--data-dev', type=argparse.FileType('r'), default='data/tiny.conll',
help="Dev data")
parser.add_argument('-v', '--vocab', type=argparse.FileType('r'), default="data/vocab.txt",
help="Path to word vectors file")
parser.add_argument('-vv', '--vectors', type=argparse.FileType('r'), default="data/wordVectors.txt",
help="Path to worod vectors file")
args=parser.parse_args()
parser.set_defaults(func=do_test2)
logger.info("Testing implementation of WindowModel")
config = Config()
helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args)
embeddings = load_embeddings(args, helper)
config.embed_size = embeddings.shape[1]
with tf.Graph().as_default():
logger.info("Building model...", )
start = time.time()
model = WindowModel(helper, config, embeddings)
logger.info("took %.2f seconds", time.time() - start)
init = tf.global_variables_initializer()
saver = None
with tf.Session() as session:
session.run(init)
model.fit(session, saver, train, dev)
logger.info("Model did not crash!")
logger.info("Passed!")