diff --git a/src/blackbox/chat.py b/src/blackbox/chat.py index b7d2527..4973e0d 100644 --- a/src/blackbox/chat.py +++ b/src/blackbox/chat.py @@ -60,6 +60,8 @@ class Chat(Blackbox): system_prompt = settings.get('system_prompt') user_prompt_template = settings.get('user_prompt_template') user_stream = settings.get('stream') + + llm_model = "vllm" if user_context == None: user_context = [] @@ -100,10 +102,16 @@ class Chat(Blackbox): #user_presence_penalty = 0.8 if user_model_url is None or user_model_url.isspace() or user_model_url == "": - user_model_url = "http://10.6.80.75:23333/v1/chat/completions" + if llm_model != "vllm": + user_model_url = "http://10.6.80.75:23333/v1/chat/completions" + else: + user_model_url = "http://10.6.80.94:8000/v1/completions" if user_model_key is None or user_model_key.isspace() or user_model_key == "": - user_model_key = "YOUR_API_KEY" + if llm_model != "vllm": + user_model_key = "YOUR_API_KEY" + else: + user_model_key = "vllm" if chroma_embedding_model: chroma_response = self.chroma_query(user_question, settings) @@ -117,7 +125,10 @@ class Chat(Blackbox): print(f"user_prompt_template: {type(user_prompt_template)}, user_question: {type(user_question)}, chroma_response: {type(chroma_response)}") user_question = user_prompt_template + "问题: " + user_question + "。检索内容: " + chroma_response + "。" else: - user_question = user_prompt_template + "问题: " + user_question + "。" + if llm_model != "vllm": + user_question = user_prompt_template + "问题: " + user_question + "。" + else: + user_question = user_question print(f"1.user_question: {user_question}") @@ -172,10 +183,17 @@ class Chat(Blackbox): else: url = user_model_url key = user_model_key - header = { - 'Content-Type': 'application/json', - "Cache-Control": "no-cache", # 禁用缓存 - } + if llm_model != "vllm": + header = { + 'Content-Type': 'application/json', + "Cache-Control": "no-cache", # 禁用缓存 + } + else: + header = { + 'Content-Type': 'application/json', + 'Authorization': "Bearer " + key, + "Cache-Control": "no-cache", + } # system_prompt = "# Role: 琪琪,康普可可的代言人。\n\n## Profile:\n**Author**: 琪琪。\n**Language**: 中文。\n**Description**: 琪琪,是康普可可的代言人,由博维开发。你擅长澳门文旅问答。\n\n## Constraints:\n- **严格遵循工作流程**: 严格遵循中设定的工作流程。\n- **无内置知识库** :根据中提供的知识作答,而不是内置知识库,我虽然是知识库专家,但我的知识依赖于外部输入,而不是大模型已有知识。\n- **回复格式**:在进行回复时,不能输出“检索内容” 标签字样,同时也不能直接透露知识片段原文。\n\n## Workflow:\n1. **接收查询**:接收用户的问题。\n2. **判断问题**:首先自行判断下方问题与检索内容是否相关,若相关则根据检索内容总结概括相关信息进行回答;若检索内容与问题无关,则根据自身知识进行回答。\n3. **提供回答**:\n\n```\n基于检索内容中的知识片段回答用户的问题。回答内容限制总结在50字内。\n请首先判断提供的检索内容与上述问题是否相关。如果相关,直接从检索内容中提炼出直接回答问题所需的信息,不要乱说或者回答“相关”等字眼 。如果检索内容与问题不相关,则不参考检索内容,则回答:“对不起,我无法回答此问题哦。”\n\n```\n## Example:\n\n用户询问:“中国的首都是哪个城市?” 。\n2.1检索知识库,首先检查知识片段,如果检索内容中没有与用户的问题相关的内容,则回答:“对不起,我无法回答此问题哦。\n2.2如果有知识片段,在做出回复时,只能基于检索内容中的内容进行回答,且不能透露上下文原文,同时也不能出现检索内容的标签字样。\n" @@ -183,23 +201,37 @@ class Chat(Blackbox): {"role": "system", "content": system_prompt} ] - chat_inputs={ - "model": user_model_name, - "messages": prompt_template + user_context + [ - { - "role": "user", - "content": user_question - } - ], - "temperature": str(user_temperature), - "top_p": str(user_top_p), - "n": str(user_n), - "max_tokens": str(user_max_tokens), - "frequency_penalty": str(user_frequency_penalty), - "presence_penalty": str(user_presence_penalty), - "stop": str(user_stop), - "stream": user_stream, - } + if llm_model != "vllm": + chat_inputs={ + "model": user_model_name, + "messages": prompt_template + user_context + [ + { + "role": "user", + "content": user_question + } + ], + "temperature": str(user_temperature), + "top_p": str(user_top_p), + "n": str(user_n), + "max_tokens": str(user_max_tokens), + "frequency_penalty": str(user_frequency_penalty), + "presence_penalty": str(user_presence_penalty), + "stop": str(user_stop), + "stream": user_stream, + } + else: + chat_inputs={ + "model": user_model_name, + "prompt": user_question, + "temperature": float(user_temperature), + "top_p": float(user_top_p), + "n": float(user_n), + "max_tokens": float(user_max_tokens), + "frequency_penalty": float(user_frequency_penalty), + "presence_penalty":float( user_presence_penalty), + # "stop": user_stop, + "stream": user_stream, + } # # 获取当前时间戳 # timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -252,9 +284,14 @@ class Chat(Blackbox): if response_result.get("choices") is None: yield JSONResponse(content={"error": "LLM handle failure"}, status_code=status.HTTP_400_BAD_REQUEST) else: - print("\n", "user_answer: ", fastchat_response.json()["choices"][0]["message"]["content"],"\n\n") - yield fastchat_response.json()["choices"][0]["message"]["content"] - + if llm_model != "vllm": + print("\n", "user_answer: ", fastchat_response.json()["choices"][0]["message"]["content"],"\n\n") + yield fastchat_response.json()["choices"][0]["message"]["content"] + else: + print("\n", "user_answer: ", fastchat_response.json()["choices"][0]["text"],"\n\n") + yield fastchat_response.json()["choices"][0]["text"] + + async def fast_api_handler(self, request: Request) -> Response: try: data = await request.json()