以https://github.com/sherdencooper/GPTFuzz代码为例,介绍llm网络的构建
1. 如何调用chatgpt的api实现llm
from openai import OpenAI
class LLM:
def __init__(self):
self.model = None
self.tokenizer = None
def generate(self, prompt):
raise NotImplementedError("LLM must implement generate method.")
def predict(self, sequences):
raise NotImplementedError("LLM must implement predict method.")
class OpenAILLM(LLM):
def __init__(self,
model_path,
api_key=None,
system_message=None
):
super().__init__()
if not api_key.startswith('sk-'):
raise ValueError('OpenAI API key should start with sk-')
if model_path not in ['gpt-3.5-turbo', 'gpt-4']:
raise ValueError(
'OpenAI model path should be gpt-3.5-turbo or gpt-4')
self.client = OpenAI(base_url="https://api.gptsapi.net/v1", api_key = api_key)
self.model_path = model_path
self.system_message = system_message if system_message is not None else "You are a helpful assistant."
def generate(self, prompt, temperature=0, max_tokens=512, n=1, max_trials=10, failure_sleep_time=5):
for _ in range(max_trials):
try:
results = self.client.chat.completions.create(
model=self.model_path,
messages=[
{"role": "system", "content": self.system_message},
{"role": "user", "content": prompt},
],
temperature=temperature,
max_tokens=max_tokens,
n=n,
)
return [results.choices[i].message.content for i in range(n)]
except Exception as e:
logging.warning(
f"OpenAI API call failed due to {e}. Retrying {_+1} / {max_trials} times...")
time.sleep(failure_sleep_time)
return [" " for _ in range(n)]
def generate_batch(self, prompts, temperature=0, max_tokens=512, n=1, max_trials=10, failure_sleep_time=5):
results = []
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {executor.submit(self.generate, prompt, temperature, max_tokens, n,
max_trials, failure_sleep_time): prompt for prompt in prompts}
for future in concurrent.futures.as_completed(futures):
results.extend(future.result())
return results
2. 如何调用vllm库实现llm
from vllm import LLM as vllm
from vllm import SamplingParams
from fastchat.model import load_model, get_conversation_template
class LocalVLLM(LLM):
def __init__(self,
model_path,
gpu_memory_utilization=0.95,
system_message=None
):
super().__init__()
self.model_path = model_path
self.model = vllm(
self.model_path, gpu_memory_utilization=gpu_memory_utilization)
if system_message is None and 'Llama-2' in model_path:
# monkey patch for latest FastChat to use llama2's official system message
self.system_message = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. " \
"Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. " \
"Please ensure that your responses are socially unbiased and positive in nature.\n\n" \
"If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " \
"If you don't know the answer to a question, please don't share false information."
else:
self.system_message = system_message
def set_system_message(self, conv_temp):
if self.system_message is not None:
conv_temp.set_system_message(self.system_message)
def generate(self, prompt, temperature=0, max_tokens=512):
prompts = [prompt]
return self.generate_batch(prompts, temperature, max_tokens)
def generate_batch(self, prompts, temperature=0, max_tokens=512):
prompt_inputs = []
for prompt in prompts:
conv_temp = get_conversation_template(self.model_path)
self.set_system_message(conv_temp)
conv_temp.append_message(conv_temp.roles[0], prompt)
conv_temp.append_message(conv_temp.roles[1], None)
prompt_input = conv_temp.get_prompt()
prompt_inputs.append(prompt_input)
sampling_params = SamplingParams(temperature=temperature, max_tokens=max_tokens)
results = self.model.generate(
prompt_inputs, sampling_params, use_tqdm=False)
outputs = []
for result in results:
outputs.append(result.outputs[0].text)
return outputs
1098

被折叠的 条评论
为什么被折叠?



