默认就是精确切分
搜索引擎格式切分
关键字抽取
import jieba.analyse as analyse
seg_list = jieba.cut(text,cut_all=False)
print("分词结果:")
print(" ".join(seg_list))
#获取关键字
with open('data/nba.txt',encoding='utf8') as f:
lines = f.read() #读入文本保存到字符串中
withWeight = True
tags = analyse.extract_tags(lines,topK=20,withWeight=withWeight,allowPOS=())
if withWeight is True:
print(tags) #元组列表
print("-------")
for tag in tags:
print("tag: %s\t\t weight: %f" % (tag[0],tag[1]))
else:
print(tags)
print("--------")
print(" ".join(tags))
词性标注