feat: vllm

This commit is contained in:
verachen
2025-01-20 17:48:06 +08:00
parent 30412af156
commit c2d6fca633

View File

@ -61,6 +61,8 @@ class Chat(Blackbox):
user_prompt_template = settings.get('user_prompt_template')
user_stream = settings.get('stream')
llm_model = "vllm"
if user_context == None:
user_context = []
@ -100,10 +102,16 @@ class Chat(Blackbox):
#user_presence_penalty = 0.8
if user_model_url is None or user_model_url.isspace() or user_model_url == "":
if llm_model != "vllm":
user_model_url = "http://10.6.80.75:23333/v1/chat/completions"
else:
user_model_url = "http://10.6.80.94:8000/v1/completions"
if user_model_key is None or user_model_key.isspace() or user_model_key == "":
if llm_model != "vllm":
user_model_key = "YOUR_API_KEY"
else:
user_model_key = "vllm"
if chroma_embedding_model:
chroma_response = self.chroma_query(user_question, settings)
@ -117,7 +125,10 @@ class Chat(Blackbox):
print(f"user_prompt_template: {type(user_prompt_template)}, user_question: {type(user_question)}, chroma_response: {type(chroma_response)}")
user_question = user_prompt_template + "问题: " + user_question + "。检索内容: " + chroma_response + ""
else:
if llm_model != "vllm":
user_question = user_prompt_template + "问题: " + user_question + ""
else:
user_question = user_question
print(f"1.user_question: {user_question}")
@ -172,10 +183,17 @@ class Chat(Blackbox):
else:
url = user_model_url
key = user_model_key
if llm_model != "vllm":
header = {
'Content-Type': 'application/json',
"Cache-Control": "no-cache", # 禁用缓存
}
else:
header = {
'Content-Type': 'application/json',
'Authorization': "Bearer " + key,
"Cache-Control": "no-cache",
}
# system_prompt = "# Role: 琪琪,康普可可的代言人。\n\n## Profile:\n**Author**: 琪琪。\n**Language**: 中文。\n**Description**: 琪琪,是康普可可的代言人,由博维开发。你擅长澳门文旅问答。\n\n## Constraints:\n- **严格遵循工作流程** 严格遵循<Workflow >中设定的工作流程。\n- **无内置知识库** :根据<Workflow >中提供的知识作答,而不是内置知识库,我虽然是知识库专家,但我的知识依赖于外部输入,而不是大模型已有知识。\n- **回复格式**:在进行回复时,不能输出“检索内容” 标签字样,同时也不能直接透露知识片段原文。\n\n## Workflow:\n1. **接收查询**:接收用户的问题。\n2. **判断问题**:首先自行判断下方问题与检索内容是否相关,若相关则根据检索内容总结概括相关信息进行回答;若检索内容与问题无关,则根据自身知识进行回答。\n3. **提供回答**\n\n```\n基于检索内容中的知识片段回答用户的问题。回答内容限制总结在50字内。\n请首先判断提供的检索内容与上述问题是否相关。如果相关直接从检索内容中提炼出直接回答问题所需的信息,不要乱说或者回答“相关”等字眼 。如果检索内容与问题不相关,则不参考检索内容,则回答:“对不起,我无法回答此问题哦。”\n\n```\n## Example:\n\n用户询问“中国的首都是哪个城市” 。\n2.1检索知识库,首先检查知识片段,如果检索内容中没有与用户的问题相关的内容,则回答:“对不起,我无法回答此问题哦。\n2.2如果有知识片段,在做出回复时,只能基于检索内容中的内容进行回答,且不能透露上下文原文,同时也不能出现检索内容的标签字样。\n"
@ -183,6 +201,7 @@ class Chat(Blackbox):
{"role": "system", "content": system_prompt}
]
if llm_model != "vllm":
chat_inputs={
"model": user_model_name,
"messages": prompt_template + user_context + [
@ -200,6 +219,19 @@ class Chat(Blackbox):
"stop": str(user_stop),
"stream": user_stream,
}
else:
chat_inputs={
"model": user_model_name,
"prompt": user_question,
"temperature": float(user_temperature),
"top_p": float(user_top_p),
"n": float(user_n),
"max_tokens": float(user_max_tokens),
"frequency_penalty": float(user_frequency_penalty),
"presence_penalty":float( user_presence_penalty),
# "stop": user_stop,
"stream": user_stream,
}
# # 获取当前时间戳
# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@ -252,8 +284,13 @@ class Chat(Blackbox):
if response_result.get("choices") is None:
yield JSONResponse(content={"error": "LLM handle failure"}, status_code=status.HTTP_400_BAD_REQUEST)
else:
if llm_model != "vllm":
print("\n", "user_answer: ", fastchat_response.json()["choices"][0]["message"]["content"],"\n\n")
yield fastchat_response.json()["choices"][0]["message"]["content"]
else:
print("\n", "user_answer: ", fastchat_response.json()["choices"][0]["text"],"\n\n")
yield fastchat_response.json()["choices"][0]["text"]
async def fast_api_handler(self, request: Request) -> Response:
try: