本次需要下载模型bert-base-chinese向量化模型,Qwen2-0.5B-Instruct对话模型,实现共分为三大块。
1.数据进行读取,切分,向量化后用faiss保存在本地。
2.对话模型进行推理,调用对话功能
3.收到用户问题,进行向量检索,把检索内容和问题一起送到大模型
下面需要注意的是数据只能是TXT文本,如果需要其他格式的要你自己改
按行读取,切分,效果不是很好(本次演示没有使用langchain,如需更好的效果,可以自己写切分或者用langchain切分也可以)
第一块的实现代码如下:
from transformers import BertTokenizer, BertModel
import numpy as np
import torch
# 加载预训练的BERT模型和分词器
tokenizer = BertTokenizer.from_pretrained(r'bert-base-chinese')
model = BertModel.from_pretrained(r'bert-base-chinese')
data='笔记本和台式机有什么区别'
print(type(data))
# 示例文本数据
# texts = [
# "我喜欢看电影",
# "这部电影很精彩",
# "我喜欢读书",
# "阅读是我的爱好"
# ]
with open('train1.txt', 'r', encoding='utf-8') as file:
texts = [line.strip() for line in file]
print('texts:',texts)
# 对文本进行分词并转换为模型可接受的格式
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
data_inputs=tokenizer(data, return_tensors="pt", padding=True, truncation=True, max_length=512)
# # 使用BERT模型获取文本的表示
print(data_inputs)
with torch.no_grad():
outputs = model(**inputs)
data_outputs=model(**data_inputs)
# # 通常我们取最后一层的[CLS]标记的隐藏状态作为句子的表示
# # 这里假设我们只需要第一个token(即[CLS]标记)的输出
sentence_embeddings = outputs.last_hidden_state[:, 0, :].numpy()
data_sentence_embeddings=data_outputs.last_hidden_state[:, 0, :].numpy()
# # 保存向量到文件
np.save('bert_embeddings.npy', sentence_embeddings)
# 加载向量(如果需要)
loaded_embeddings = np.load('bert_embeddings.npy')
# 相似度检索示例(使用余弦相似度)
from sklearn.metrics.pairwise import cosine_similarity
# 假设我们想要查询第一个文本与其他文本的相似度
query_vec = sentence_embeddings[0]
query_vec_reshaped = query_vec[np.newaxis, :]
data_query_vec=data_sentence_embeddings[0]
data_query_vec_reshaped=data_query_vec[np.newaxis, :]
print(query_vec)
similarities = cosine_similarity(query_vec_reshaped, loaded_embeddings)
print(similarities)
data_similarities=cosine_similarity(data_query_vec_reshaped,loaded_embeddings)
# 打印结果
for i, similarity in enumerate(similarities[0]):
print(f"文本 {i + 1} 与查询文本的相似度: {similarity:.4f}")
max_similarity_idx = np.argmax(similarities[0])
max_similarity_idx1=np.argmax(similarities[0][1])
data_max_similarity_idx=np.argmax(data_similarities[0])
print(f"与查询文本相似度最高的文本是: {texts[max_similarity_idx]}")
# print(f"与查询文本相似度最高的文本是: {texts[max_similarity_idx1]}")
print(f'\n\n',texts[data_max_similarity_idx])
# 注意:FAISS对于大规模数据集和高效的相似度搜索特别有用。
# 对于小数据集,你可以直接使用NumPy或SciPy的函数来计算相似度。
下面需要注意的是要把它单独放到一个py文件内,第三个代码会调用这个类建议命名chatglm_model_input.py
第二块代码实现:
from modelscope import AutoModel, snapshot_download
from modelscope import AutoModelForCausalLM, AutoTokenizer
import torch
class GLMmodel():
def __init__(self,Qwen:bool=None,ChatGLM3:bool=None ,GLM4:bool=None, model_name_or :str ="model_name_or"):
super(GLMmodel, self).__init__()
historya=''
self.historya=historya
self.model_name_or=model_name_or
self.ChatGLM3=ChatGLM3
self.Qwen=Qwen
if self.ChatGLM3==True:
chatglm3_model_dir = snapshot_download(self.model_name_or, revision="v1.0.0")
chatglm3_tokenizer = AutoTokenizer.from_pretrained(chatglm3_model_dir, trust_remote_code=True)
self.chatglm3_tokenizer=chatglm3_tokenizer
chatglm3_model = AutoModel.from_pretrained(chatglm3_model_dir, trust_remote_code=True).half().cuda()
chatglm3_model=chatglm3_model.eval()
self.chatglm3_model=chatglm3_model
elif self.Qwen==True:
model = AutoModelForCausalLM.from_pretrained(
self.model_name_or,
torch_dtype="auto",
device_map="auto"
)
self.qwen2_model=model
tokenizer = AutoTokenizer.from_pretrained(self.model_name_or)
self.qwen2_tokenizer=tokenizer
elif GLM4==True:
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained(self.model_name_or, trust_remote_code=True)
self.glm4_tokenizer=tokenizer
model = AutoModelForCausalLM.from_pretrained(
self.model_name_or,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True
).to(device).eval()
self.glm4_model=model
def Chatglm3_6b_output(self,data:str) -> str:
response, history = self.chatglm3_model.chat(self.chatglm3_tokenizer, data, history=self.historya)
self.historya+=history
return response
def Chatglm3_6b_output_history(self,data:str) -> str:
response,history=self.chatglm3_model.chat(self.chatglm3_tokenizer,data,history=[])
return response
def Qwen2_output(self,data:str)-> str:
device = "cuda"
model_inputs = self.qwen2_tokenizer([data], return_tensors="pt").to(device)
generated_ids = self.qwen2_model.generate(
model_inputs.input_ids,
max_new_tokens=400,
repetition_penalty=1.15
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = self.qwen2_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
def GLM4_output(self,data:str)-> str:
device = "cuda"
model=self.glm4_model
tokenizer=self.glm4_tokenizer
inputs = tokenizer.apply_chat_template([{"role": "user", "content": data}],
add_generation_prompt=True,
tokenize=True,
return_tensors="pt",
return_dict=True
)
inputs = inputs.to(device)
gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
with torch.no_grad():
outputs = model.generate(**inputs, **gen_kwargs)
outputs = outputs[:, inputs['input_ids'].shape[1]:]
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
return tokenizer.decode(outputs[0],skip_special_tokens=True)
第三块代码:
from transformers import BertTokenizer,BertModel
import numpy as np
import torch
from chatglm_model_input import GLMmodel
import warnings
warnings.filterwarnings('ignore')
tokenizer=BertTokenizer.from_pretrained(r'bert-base-chinese')
model=BertModel.from_pretrained(r'bert-base-chinese')
while True:
data=input('问题:')
data_inputs=tokenizer(data, return_tensors="pt", padding=True, truncation=True, max_length=512)
with torch.no_grad():
data_outputs = model(**data_inputs)
data_sentence_embeddings = data_outputs.last_hidden_state[:, 0, :].numpy()
loaded_embeddings = np.load(r'bert_embeddings.npy')
from sklearn.metrics.pairwise import cosine_similarity
data_query_vec=data_sentence_embeddings[0]
data_query_vec_reshaped=data_query_vec[np.newaxis, :]
data_similarities=cosine_similarity(data_query_vec_reshaped,loaded_embeddings)
for i, similarity in enumerate(data_similarities[0]):
print(f"文本 {i + 1} 与查询文本的相似度: {similarity:.4f}")
max_similarity_idx = np.argmax(data_similarities[0])
with open(r'train1.txt', 'r', encoding='utf-8') as file:
texts = [line.strip() for line in file]
print(f"与查询文本相似度最高的文本是: {texts[max_similarity_idx]}")
llm=GLMmodel(Qwen=True,model_name_or=r'Qwen2-0.5B-Instruct')
out=llm.Qwen2_output(f'请根据匹配的结果{texts[max_similarity_idx]}和用户问题{data}进行回答')
print('大模型回答:',out)
4万+

被折叠的 条评论
为什么被折叠?



