mirror of
https://github.com/BoardWare-Genius/jarvis-models.git
synced 2025-12-13 16:53:24 +00:00
feat: vllm
This commit is contained in:
@ -61,6 +61,8 @@ class Chat(Blackbox):
|
||||
user_prompt_template = settings.get('user_prompt_template')
|
||||
user_stream = settings.get('stream')
|
||||
|
||||
llm_model = "vllm"
|
||||
|
||||
if user_context == None:
|
||||
user_context = []
|
||||
|
||||
@ -100,10 +102,16 @@ class Chat(Blackbox):
|
||||
#user_presence_penalty = 0.8
|
||||
|
||||
if user_model_url is None or user_model_url.isspace() or user_model_url == "":
|
||||
user_model_url = "http://10.6.80.75:23333/v1/chat/completions"
|
||||
if llm_model != "vllm":
|
||||
user_model_url = "http://10.6.80.75:23333/v1/chat/completions"
|
||||
else:
|
||||
user_model_url = "http://10.6.80.94:8000/v1/completions"
|
||||
|
||||
if user_model_key is None or user_model_key.isspace() or user_model_key == "":
|
||||
user_model_key = "YOUR_API_KEY"
|
||||
if llm_model != "vllm":
|
||||
user_model_key = "YOUR_API_KEY"
|
||||
else:
|
||||
user_model_key = "vllm"
|
||||
|
||||
if chroma_embedding_model:
|
||||
chroma_response = self.chroma_query(user_question, settings)
|
||||
@ -117,7 +125,10 @@ class Chat(Blackbox):
|
||||
print(f"user_prompt_template: {type(user_prompt_template)}, user_question: {type(user_question)}, chroma_response: {type(chroma_response)}")
|
||||
user_question = user_prompt_template + "问题: " + user_question + "。检索内容: " + chroma_response + "。"
|
||||
else:
|
||||
user_question = user_prompt_template + "问题: " + user_question + "。"
|
||||
if llm_model != "vllm":
|
||||
user_question = user_prompt_template + "问题: " + user_question + "。"
|
||||
else:
|
||||
user_question = user_question
|
||||
|
||||
print(f"1.user_question: {user_question}")
|
||||
|
||||
@ -172,10 +183,17 @@ class Chat(Blackbox):
|
||||
else:
|
||||
url = user_model_url
|
||||
key = user_model_key
|
||||
header = {
|
||||
'Content-Type': 'application/json',
|
||||
"Cache-Control": "no-cache", # 禁用缓存
|
||||
}
|
||||
if llm_model != "vllm":
|
||||
header = {
|
||||
'Content-Type': 'application/json',
|
||||
"Cache-Control": "no-cache", # 禁用缓存
|
||||
}
|
||||
else:
|
||||
header = {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': "Bearer " + key,
|
||||
"Cache-Control": "no-cache",
|
||||
}
|
||||
|
||||
# system_prompt = "# Role: 琪琪,康普可可的代言人。\n\n## Profile:\n**Author**: 琪琪。\n**Language**: 中文。\n**Description**: 琪琪,是康普可可的代言人,由博维开发。你擅长澳门文旅问答。\n\n## Constraints:\n- **严格遵循工作流程**: 严格遵循<Workflow >中设定的工作流程。\n- **无内置知识库** :根据<Workflow >中提供的知识作答,而不是内置知识库,我虽然是知识库专家,但我的知识依赖于外部输入,而不是大模型已有知识。\n- **回复格式**:在进行回复时,不能输出“检索内容” 标签字样,同时也不能直接透露知识片段原文。\n\n## Workflow:\n1. **接收查询**:接收用户的问题。\n2. **判断问题**:首先自行判断下方问题与检索内容是否相关,若相关则根据检索内容总结概括相关信息进行回答;若检索内容与问题无关,则根据自身知识进行回答。\n3. **提供回答**:\n\n```\n基于检索内容中的知识片段回答用户的问题。回答内容限制总结在50字内。\n请首先判断提供的检索内容与上述问题是否相关。如果相关,直接从检索内容中提炼出直接回答问题所需的信息,不要乱说或者回答“相关”等字眼 。如果检索内容与问题不相关,则不参考检索内容,则回答:“对不起,我无法回答此问题哦。”\n\n```\n## Example:\n\n用户询问:“中国的首都是哪个城市?” 。\n2.1检索知识库,首先检查知识片段,如果检索内容中没有与用户的问题相关的内容,则回答:“对不起,我无法回答此问题哦。\n2.2如果有知识片段,在做出回复时,只能基于检索内容中的内容进行回答,且不能透露上下文原文,同时也不能出现检索内容的标签字样。\n"
|
||||
|
||||
@ -183,23 +201,37 @@ class Chat(Blackbox):
|
||||
{"role": "system", "content": system_prompt}
|
||||
]
|
||||
|
||||
chat_inputs={
|
||||
"model": user_model_name,
|
||||
"messages": prompt_template + user_context + [
|
||||
{
|
||||
"role": "user",
|
||||
"content": user_question
|
||||
}
|
||||
],
|
||||
"temperature": str(user_temperature),
|
||||
"top_p": str(user_top_p),
|
||||
"n": str(user_n),
|
||||
"max_tokens": str(user_max_tokens),
|
||||
"frequency_penalty": str(user_frequency_penalty),
|
||||
"presence_penalty": str(user_presence_penalty),
|
||||
"stop": str(user_stop),
|
||||
"stream": user_stream,
|
||||
}
|
||||
if llm_model != "vllm":
|
||||
chat_inputs={
|
||||
"model": user_model_name,
|
||||
"messages": prompt_template + user_context + [
|
||||
{
|
||||
"role": "user",
|
||||
"content": user_question
|
||||
}
|
||||
],
|
||||
"temperature": str(user_temperature),
|
||||
"top_p": str(user_top_p),
|
||||
"n": str(user_n),
|
||||
"max_tokens": str(user_max_tokens),
|
||||
"frequency_penalty": str(user_frequency_penalty),
|
||||
"presence_penalty": str(user_presence_penalty),
|
||||
"stop": str(user_stop),
|
||||
"stream": user_stream,
|
||||
}
|
||||
else:
|
||||
chat_inputs={
|
||||
"model": user_model_name,
|
||||
"prompt": user_question,
|
||||
"temperature": float(user_temperature),
|
||||
"top_p": float(user_top_p),
|
||||
"n": float(user_n),
|
||||
"max_tokens": float(user_max_tokens),
|
||||
"frequency_penalty": float(user_frequency_penalty),
|
||||
"presence_penalty":float( user_presence_penalty),
|
||||
# "stop": user_stop,
|
||||
"stream": user_stream,
|
||||
}
|
||||
|
||||
# # 获取当前时间戳
|
||||
# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
@ -252,8 +284,13 @@ class Chat(Blackbox):
|
||||
if response_result.get("choices") is None:
|
||||
yield JSONResponse(content={"error": "LLM handle failure"}, status_code=status.HTTP_400_BAD_REQUEST)
|
||||
else:
|
||||
print("\n", "user_answer: ", fastchat_response.json()["choices"][0]["message"]["content"],"\n\n")
|
||||
yield fastchat_response.json()["choices"][0]["message"]["content"]
|
||||
if llm_model != "vllm":
|
||||
print("\n", "user_answer: ", fastchat_response.json()["choices"][0]["message"]["content"],"\n\n")
|
||||
yield fastchat_response.json()["choices"][0]["message"]["content"]
|
||||
else:
|
||||
print("\n", "user_answer: ", fastchat_response.json()["choices"][0]["text"],"\n\n")
|
||||
yield fastchat_response.json()["choices"][0]["text"]
|
||||
|
||||
|
||||
async def fast_api_handler(self, request: Request) -> Response:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user