使用bert_serving.client之前要先下载bert与训练模型,并在cmd中启动服务,才能运行以下程序,下面给出了用bert对列表里的句子进行向量化的方法,以及对应的维度。
# -*- coding:utf-8 -*-
from bert_serving.client import BertClient
# 先命令行启动服务
# bert-serving-start -model_dir C:\Users\jason\PycharmProjects\code\chinese_L-12_H-768_A-12 -num_worker=1
bc = BertClient()
li = ['中国', '美国', '澳大利亚', '张三', '李四光', '王五']
vecs = bc.encode(li)
print(vecs)
'''
[[-0.0390969 0.31393334 -0.27065212 ... 0.0390013 0.20890802
-0.60300004]
[-0.24806768 0.38532856 -0.52268803 ... -0.10465756 0.1740241
-0.14918919]
[ 0.02828223 -0.0312206 -0.32512292 ... 0.04085304 -0.16347478
0.17889954]
[-0.11576433 0.00136624 -0.49223953 ... -0.28085837 0.07880631
-0.16971125]
[ 0.01214948 -0.24585013 -0.33325458 ... 0.19059595 -0.05608995
0.2963084 ]
[ 0.02908353 0.04309463 -0.36560717 ... 0.08774196 0.55095494
-0.43377402]]
'''
print("vecs类型:", type(vecs))
print("vecs维度", vecs.shape) # vecs维度 (6, 768)
for i, vec