统计NER中的实体标签
if __name__ == '__main__':
with open('./data/resume/train/train.txt', 'r', encoding='utf-8') as f:
# sum = 270
list = []
word_ = ''
lines = f.readlines()
for line in lines:
for i,words in enumerate(line):
if words.isspace()==True or words == '\t':
word = line[i+1:].strip()
if word not in list:
list.append(word)
with open('./data/resume/train/train-label.txt', 'a', encoding='utf-8') as g:
for i in list:
g.write(i+'\n')
print(list)
print(len(list))
f.close()
数据格式如下: