feat: update the parameter name

2025-12-13 16:53:24 +00:00 · 2025-08-18 17:21:55 +08:00
parent af62ccc2ea
commit 8358a265c8
1 changed files with 21 additions and 42 deletions
--- a/src/blackbox/chroma_upsert.py
+++ b/src/blackbox/chroma_upsert.py
@ -43,25 +43,7 @@ class ChromaUpsert(Blackbox):
        return isinstance(data, list)

    # @logging_time(logger=logger)    
-    def processing(self, file, string, context: list, settings: dict) -> str:
-        # 用户的操作历史
-        if context is None:
-            context = []
-            
-        # context = [
-        #     {
-        #         "collection_id": "123",
-        #         "action": "query",
-        #         "content": "你吃饭了吗",
-        #         "answer": "吃了",
-        #     },
-        #     {
-        #         "collection_id": "123",
-        #         "action": "upsert",
-        #         "content": "file_name or string",
-        #         "answer": "collection 123 has 12472 documents. /tmp/Cheap and Quick：Efficient Vision-Language Instruction Tuning for Large Language Models.pdf ids is 0~111",
-        #     },
-        # ]
+    def processing(self, file, text, text_ids, settings: dict) -> str:

        if settings is None:
            settings = {}
@ -101,7 +83,8 @@ class ChromaUpsert(Blackbox):
        else:
            embedding_model = SentenceTransformerEmbeddings(model_name=chroma_embedding_model, device = "cuda:0")

-
+        response_file =''
+        response_string = ''
        if file is not None:
            file_type = file.split(".")[-1]
            print("file_type: ",file_type)
@ -129,35 +112,28 @@ class ChromaUpsert(Blackbox):

            Chroma.from_documents(documents=docs, embedding=embedding_model, ids=ids, collection_name=chroma_collection_id, client=client)
            
-            collection_number = client.get_collection(chroma_collection_id).count()
-            response_file = f"collection {chroma_collection_id} has {collection_number} documents. {file} ids is 0~{len(docs)-1}"
+            response_file = f"\n{file} ids is 0~{len(docs)-1}"

-        if string is not None:
-            # 生成一个新的id  ids_string: 1
-            # ids =  setting.ChromaSetting.string_ids[0] + 1
-            ids =  "1"
+        if text is not None and text_ids is not None:
+            Chroma.from_texts(texts=[text], embedding=embedding_model, ids=[text_ids], collection_name=chroma_collection_id, client=client)

-            Chroma.from_texts(texts=[string], embedding=embedding_model, ids=[ids], collection_name=chroma_collection_id, client=client)
+            response_string = f"\n{text} ids is {ids}"


-            collection_number = client.get_collection(chroma_collection_id).count()
-            response_string = f"collection {chroma_collection_id} has {collection_number} documents. {string} ids is {ids}"
+        vector_count = client.get_collection(chroma_collection_id).count()
+        response_documents_num = f"collection {chroma_collection_id} has {vector_count} vectors."

+        print(client.get_collection(chroma_collection_id).get())

-        if file is not None and string is not None:
-            return response_file + " \n and " + response_string
-        elif file is not None and string is None:
-            return response_file
-        elif file is None and string is not None:
-            return response_string
+        return response_documents_num + response_file + response_string



    async def fast_api_handler(self, request: Request) -> Response:
 
        user_file = (await request.form()).get("file")
-        user_string = (await request.form()).get("string")
-        context = (await request.form()).get("context")
+        user_text = (await request.form()).get("text")
+        user_text_ids = (await request.form()).get("text_ids")
        setting: dict = (await request.form()).get("settings")

        if isinstance(setting, str):
@ -166,8 +142,11 @@ class ChromaUpsert(Blackbox):
            except json.JSONDecodeError:
                return JSONResponse(content={"error": "Invalid settings format"}, status_code=status.HTTP_400_BAD_REQUEST)
        
-        if user_file is None and user_string is None:
-            return JSONResponse(content={"error": "file or string is required"}, status_code=status.HTTP_400_BAD_REQUEST)
+        if user_file is None and user_text is None:
+            return JSONResponse(content={"error": "file or text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
+        
+        if user_text is not None and user_text_ids is None:
+            return JSONResponse(content={"error": "text_ids is required when text is provided"}, status_code=status.HTTP_400_BAD_REQUEST)

        if user_file is not None:
            pdf_bytes = await user_file.read()
@ -182,7 +161,7 @@ class ChromaUpsert(Blackbox):
            safe_filename = None

        try:
-            txt = self.processing(safe_filename, user_string, context, setting)
+            txt = self.processing(safe_filename, user_text, user_text_ids, setting)
            print(txt)
        except ValueError as e:
            return JSONResponse(content={"error": str(e)}, status_code=status.HTTP_400_BAD_REQUEST)