from sklearn.cluster import KMeans
from bert_serving.client import BertClient
import csv
# 先命令行启动服务
# bert-serving-start -model_dir C:\Users\jason\PycharmProjects\code\chinese_L-12_H-768_A-12 -num_worker=1
'''kmeans聚类所有跟合作有关的句子成3类'''
filename = "sentence_rela_hezuo.csv"
with open(filename, "r", newline='', encoding='utf-8-sig') as f:
with open("cluster3_hezuo_0.txt", "a", newline='', encoding='utf-8') as f0:
with open("cluster3_hezuo_1.txt", "a", newline='', encoding='utf-8') as f1:
with open("cluster3_hezuo_2.txt", "a", newline='', encoding='utf-8') as f2:
with open("cluster3_hezuo_center.txt", "a", newline='', encoding='utf-8') as fc:
f_reader = csv.reader(f)
sentence_list = [row[0] for row in f_reader] # 1273个字符串 句子
bc = BertClient(check_length=False)
vecs = bc.encode(sentence_list) # (1273,768) np.ndarray数组