import spacy
def check_zh_doc_similarity():
nlp = spacy.load('zh_core_web_lg')
doc1 = nlp('你好吗?')
doc2 = nlp('你还好吗?')
doc3 = nlp('今天你还好吗?')
doc4 = nlp('你的身体今天还好吗?')
print(doc1.similarity(doc2)) # 0.7544851165307768
print(doc2.similarity(doc3)) # 0.9664107589955437
print(doc1.similarity(doc3)) # 0.730822854943996
print(doc1.similarity(doc4)) # 0.6528684500574182
if __name__ == '__main__':
check_zh_doc_similarity()
使用spacy进行文本相似度处理
最新推荐文章于 2024-12-15 08:47:42 发布