update chroma and chat

2025-12-14 00:53:25 +00:00 · 2024-06-02 15:41:07 +08:00
parent a96e845807
commit 179281f032
9 changed files with 15174 additions and 101 deletions
--- a/src/blackbox/chat.py
+++ b/src/blackbox/chat.py
@ -32,6 +32,8 @@ class Chat(Blackbox):
    # @logging_time()
    def processing(self, prompt: str, context: list, settings: dict)  -> str:

+        print("\n Settings: ", settings)
+    
        if settings is None:
            settings = {}
        user_model_name = settings.get("model_name")
@ -58,16 +60,19 @@ class Chat(Blackbox):
            return JSONResponse(content={"error": "question is required"}, status_code=status.HTTP_400_BAD_REQUEST)

        if user_model_name is None or user_model_name.isspace() or user_model_name == "":
-            user_model_name = "Qwen1.5-14B-Chat"
+            user_model_name = "qwen"
+            #user_model_name = "Qwen1.5-14B-Chat"

        if user_template is None or user_template.isspace():
            user_template = ""
        
        if user_temperature is None or user_temperature == "":
-            user_temperature = 0.8
+            user_temperature = 0
+            #user_temperature = 0

        if user_top_p is None or user_top_p == "":
-            user_top_p = 0.8
+            user_top_p = 0.1
+            #user_top_p = 0.8

        if user_n is None or user_n == "":
            user_n = 1
@ -79,20 +84,22 @@ class Chat(Blackbox):
            user_stop = 100

        if user_frequency_penalty is None or user_frequency_penalty == "":
-            user_frequency_penalty = 0.5
+            user_frequency_penalty = 0
+            #user_frequency_penalty = 0.5
        
        if user_presence_penalty is None or user_presence_penalty == "":
-            user_presence_penalty = 0.8
+            user_presence_penalty = 0
+            #user_presence_penalty = 0.8
 
        if user_model_url is None or user_model_url.isspace() or user_model_url == "":
-            user_model_url = "http://120.196.116.194:48892/v1/chat/completions"
+            user_model_url = "http://172.16.5.8:23333/v1/chat/completions"

        if user_model_key is None or user_model_key.isspace() or user_model_key == "":
            user_model_key = "YOUR_API_KEY"

        if chroma_embedding_model != None:
            chroma_response = self.chroma_query(user_question, settings)
-            print(chroma_response)
+            print("chroma_response", chroma_response)

        if chroma_response != None or chroma_response != '':
            #user_question = f"像少女一般开朗活泼，回答简练。不要分条，回答内容不能出现“相关”或“\n”的标签字样。回答的内容需要与问题密切相关。检索内容：{chroma_response} 问题：{user_question} 任务说明：请首先判断提供的检索内容与上述问题是否相关，不需要回答是否相关。如果相关，则直接从检索内容中提炼出问题所需的信息。如果检索内容与问题不相关，则不参考检索内容，直接根据常识尝试回答问题。"
@ -189,7 +196,7 @@ class Chat(Blackbox):
        #     问题中的“澳门银河”以及“银河”等于“澳门银河度假村”，“威尼斯人”等于“威尼斯人度假村”，“巴黎人”等于“巴黎人度假村”。
        #     '''

-        user_template1 = '''
+        user_template1 = f'''
            # Role: 琪琪,康普可可的代言人。

            ## Profile:
@ -243,10 +250,16 @@ class Chat(Blackbox):
        }

        fastchat_response = requests.post(url, json=chat_inputs, headers=header)
-        print("\n", fastchat_response.json())
-        print("\n","fastchat_response",fastchat_response.json()["choices"][0]["message"]["content"],"\n\n")
+        print("\n", "user_question ", user_question)
+        print("\n", "user_template1 ", user_template1)
+        print("\n", "fastchat_response json\n", fastchat_response.json())
+        response_result = fastchat_response.json()

-        return fastchat_response.json()["choices"][0]["message"]["content"]
+        if response_result.get("choices") is None:
+            return JSONResponse(content={"error": "LLM handle failure"}, status_code=status.HTTP_400_BAD_REQUEST)
+        else:
+            print("\n", "user_answer ", fastchat_response.json()["choices"][0]["message"]["content"],"\n\n")
+            return fastchat_response.json()["choices"][0]["message"]["content"]
    
    async def fast_api_handler(self, request: Request) -> Response:
        try:
--- a/src/blackbox/chat_llama.py
+++ b/src/blackbox/chat_llama.py
@ -97,24 +97,25 @@ class ChatLLaMA(Blackbox):
        if chroma_response != None or chroma_response != '':
            #user_question = f"像少女一般开朗活泼，回答简练。不要分条，回答内容不能出现“相关”或“\n”的标签字样。回答的内容需要与问题密切相关。检索内容：{chroma_response} 问题：{user_question} 任务说明：请首先判断提供的检索内容与上述问题是否相关，不需要回答是否相关。如果相关，则直接从检索内容中提炼出问题所需的信息。如果检索内容与问题不相关，则不参考检索内容，直接根据常识尝试回答问题。"
            # user_question = chroma_response
-            user_question = f'''# 你的身份 #
-            你是琪琪，你是康普可可的代言人，由博维开发。你擅长澳门文旅问答。
-            # OBJECTIVE（目标） #
-            回答游客的提问。
-            # STYLE（风格）#
-            像少女一般开朗活泼，回答简练。不要分条。
-            # 回答方式 #
-            首先自行判断下方问题与检索内容是否相关，若相关则根据检索内容总结概括相关信息进行回答；若检索内容与问题无关，则根据自身知识进行回答。
-            # 问题 #
+            user_question = f'''
+            # IDENTITIES # 
+            You're Kiki, you're the face of Kampo Coco, developed by Bovi. You specialise in the Macau Cultural and Tourism Quiz.
+            # OBJECTIVE #
+            Answer visitors' questions.
+            # STYLE
+            Cheerful and lively like a teenage girl, with concise answers. Don't break down into sections.
+            # ANSWERING STYLE #
+            Firstly, judge for yourself whether the question below is related to the search content. If it is related, summarise the relevant information according to the search content and answer it; if the search content is not related to the question, answer it according to your own knowledge.
+            # Question #
            {user_question}
-            # 检索内容 #
+            # Retrieve the content #
            {chroma_response}
-            # 回答 #
-            如果检索内容与问题相关，则直接从检索内容中提炼出问题所需的信息。如果检索内容与问题不相关，则不参考检索内容，直接根据常识尝试回答问题，或者则回答：“对不起，我无法回答此问题哦。”
-            # 回答限制 #
-            回答内容限制总结在50字内。
-            回答内容出不要出现“相关”等字眼,不要乱说或者多说,回答的内容需要与问题对应。常见的对话可以不采用检索内容，根据人物设定，直接进行回答。
-            只回答与澳门文旅，博维，康普可可，琪琪，G2E，RELX，BO VISION相关内容，若遇到其他提问则回答：“对不起，我无法回答此问题哦。”
+            # Answer #
+            If the content is relevant to the question, the information required for the question is extracted directly from the content. If the content is not relevant to the question, then either try to answer the question based on common sense without reference to the content, or answer, ‘Sorry, I can't answer this question.’
+            # Answer restrictions #
+            Limit your answer to 50 words.
+            Don't use the word ‘relevant’ in your answer, don't talk too much, and make sure your answer corresponds to the question. You can answer common dialogues without searching the content, and answer directly according to the character's setting.
+            Only answer the content related to MOCA, Bowie, Kampo Coco, Kiki, G2E, RELX, BO VISION, and if you encounter any other questions, you will answer: ‘Sorry, I can't answer this question.’
            '''


@ -189,36 +190,36 @@ class ChatLLaMA(Blackbox):
        #     问题中的“澳门银河”以及“银河”等于“澳门银河度假村”，“威尼斯人”等于“威尼斯人度假村”，“巴黎人”等于“巴黎人度假村”。
        #     '''

-        user_template1 = '''
-            # Role: 琪琪,康普可可的代言人。
+        user_template1 = f'''
+             ## Role: Kiki, the spokesperson for Kampo Coco.

-            ## Profile:
-            **Author**: 琪琪。
-            **Language**: 中文。
-            **Description**:  琪琪，是康普可可的代言人，由博维开发。你擅长澳门文旅问答。
+            ## Profile.
+            **Author**: Kiki.
+            **Language**: English.
+            **Description**: Kiki, the face of CompuCom Coco, developed by Bowie. You are good at Macau Culture and Tourism Q&A.

-            ## Constraints:
-            - **严格遵循工作流程**： 严格遵循<Workflow >中设定的工作流程。
-            - **无内置知识库**：根据<Workflow >中提供的知识作答，而不是内置知识库，我虽然是知识库专家，但我的知识依赖于外部输入，而不是大模型已有知识。
-            - **回复格式**：在进行回复时，不能输出”<context>”或“</context>”标签字样，同时也不能直接透露知识片段原文。
+            ## Constraints.
+            - **Strictly follow workflow**: Strictly follow the workflow set in <Workflow >.
+            - **No inbuilt knowledge base**: Answer based on the knowledge provided in <Workflow >, not the inbuilt knowledge base, although I am an expert in knowledge base, my knowledge relies on external inputs, not the knowledge already available in the big model.
+            - **Reply Format**: when making a reply, you cannot output ‘<context>’ or ‘</context>’ tags, and you cannot directly reveal the original knowledge fragment.

-            ## Workflow:
-            1. **接收查询**：接收用户的问题。
-            2. **判断问题**：首先自行判断下方问题与检索内容是否相关，若相关则根据检索内容总结概括相关信息进行回答；若检索内容与问题无关，则根据自身知识进行回答。
-            3. **提供回答**：
-            ```
+            ## Workflow.
+            1. **Receive query**: receive questions from users.
+            2. **Judging the question**: firstly judge whether the question below is related to the retrieved content, if it is related, then summarise the relevant information according to the retrieved content and answer it; if the retrieved content is not related to the question, then answer it according to your own knowledge.
+            3. **Provide an answer**:
+            ``
            <context>
            {chroma_response}
            </context> 

-            基于“<context>”至“</context>”中的知识片段回答用户的问题。回答内容限制总结在50字内。
-            请首先判断提供的检索内容与上述问题是否相关。如果相关，直接从检索内容中提炼出直接回答问题所需的信息,不要乱说或者回答“相关”等字眼。如果检索内容与问题不相关，则不参考检索内容，则回答：“对不起，我无法回答此问题哦。"
-            ```
-            ## Example:
+            Answer the user's question based on the knowledge snippets in ‘<context>’ to ‘</context>’. The response is limited to a 50-word summary.
+            Please first judge whether the provided search content is relevant to the above question. If it is relevant, extract the information needed to answer the question directly from the search content, and do not use words such as ‘relevant’. If the content of the search is not relevant to the question, do not refer to the content of the search, and answer: ‘I'm sorry, I can't answer this question.’
+            ``
+            ## Example.

-            用户询问：“中国的首都是哪个城市？” 。
-            2.1检索知识库，首先检查知识片段，如果“<context>”至“</context>”标签中没有与用户的问题相关的内容，则回答：“对不起，我无法回答此问题哦。
-            2.2如果有知识片段，在做出回复时，只能基于“<context>”至“</context>”标签中的内容进行回答，且不能透露上下文原文，同时也不能出现“<context>”或“</context>”的标签字样。
+            A user asks, ‘Which city is the capital of China?’ .
+            2.1 Retrieve the knowledge base, first check the knowledge fragment, if there is no content related to the user's question in the tags ‘<context>’ to ‘</context>’, then answer, ‘I'm sorry. I can't answer this question oh.
+            2.2 If there is a fragment of knowledge, the response can only be based on the content in the ‘<context>’ to ‘</context>’ tags, and cannot reveal the context of the original text, and also cannot appear as a ‘<context>’ tag. ‘<context>’ or ‘</context>’ tags.
            '''

        prompt_template = [
--- a/src/blackbox/chroma_query.py
+++ b/src/blackbox/chroma_query.py
@ -21,7 +21,7 @@ class ChromaQuery(Blackbox):
        # config = read_yaml(args[0])
        # load chromadb and embedding model
        self.embedding_model_1 = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="/home/administrator/Workspace/Models/BAAI/bge-large-zh-v1.5", device = "cuda")
-        # self.embedding_model_2 = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="/model/Weight/BAAI/bge-small-en-v1.5", device = "cuda")
+        self.embedding_model_2 = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="/home/administrator/Workspace/Models/BAAI/bge-large-en-v1.5", device = "cuda")
        self.client_1 = chromadb.HttpClient(host='172.16.5.8', port=7000)
        # self.client_2 = chromadb.HttpClient(host='10.6.82.192', port=8000)

@ -73,6 +73,8 @@ class ChromaQuery(Blackbox):

        if re.search(r"/home/administrator/Workspace/Models/BAAI/bge-large-zh-v1.5", chroma_embedding_model):
            embedding_model = self.embedding_model_1
+        elif re.search(r"/home/administrator/Workspace/Models/BAAI/bge-large-en-v1.5", chroma_embedding_model):
+            embedding_model = self.embedding_model_2
        else:
            embedding_model = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=chroma_embedding_model, device = "cuda")