From 8358a265c86b8c71548f5f86a27f8a5fa0d0f451 Mon Sep 17 00:00:00 2001
From: tom <tpcacbbz@gmail.com>
Date: Mon, 18 Aug 2025 17:21:55 +0800
Subject: [PATCH] feat: update the parameter name

---
 src/blackbox/chroma_upsert.py | 63 ++++++++++++-----------------------
 1 file changed, 21 insertions(+), 42 deletions(-)

diff --git a/src/blackbox/chroma_upsert.py b/src/blackbox/chroma_upsert.py
index f266e0a..b4d1816 100755
--- a/src/blackbox/chroma_upsert.py
+++ b/src/blackbox/chroma_upsert.py
@@ -43,25 +43,7 @@ class ChromaUpsert(Blackbox):
         return isinstance(data, list)
 
     # @logging_time(logger=logger)    
-    def processing(self, file, string, context: list, settings: dict) -> str:
-        # 用户的操作历史
-        if context is None:
-            context = []
-            
-        # context = [
-        #     {
-        #         "collection_id": "123",
-        #         "action": "query",
-        #         "content": "你吃饭了吗",
-        #         "answer": "吃了",
-        #     },
-        #     {
-        #         "collection_id": "123",
-        #         "action": "upsert",
-        #         "content": "file_name or string",
-        #         "answer": "collection 123 has 12472 documents. /tmp/Cheap and Quick：Efficient Vision-Language Instruction Tuning for Large Language Models.pdf ids is 0~111",
-        #     },
-        # ]
+    def processing(self, file, text, text_ids, settings: dict) -> str:
 
         if settings is None:
             settings = {}
@@ -101,7 +83,8 @@ class ChromaUpsert(Blackbox):
         else:
             embedding_model = SentenceTransformerEmbeddings(model_name=chroma_embedding_model, device = "cuda:0")
 
-
+        response_file =''
+        response_string = ''
         if file is not None:
             file_type = file.split(".")[-1]
             print("file_type: ",file_type)
@@ -129,35 +112,28 @@ class ChromaUpsert(Blackbox):
 
             Chroma.from_documents(documents=docs, embedding=embedding_model, ids=ids, collection_name=chroma_collection_id, client=client)
             
-            collection_number = client.get_collection(chroma_collection_id).count()
-            response_file = f"collection {chroma_collection_id} has {collection_number} documents. {file} ids is 0~{len(docs)-1}"
+            response_file = f"\n{file} ids is 0~{len(docs)-1}"
 
-        if string is not None:
-            # 生成一个新的id  ids_string: 1
-            # ids =  setting.ChromaSetting.string_ids[0] + 1
-            ids =  "1"
+        if text is not None and text_ids is not None:
+            Chroma.from_texts(texts=[text], embedding=embedding_model, ids=[text_ids], collection_name=chroma_collection_id, client=client)
 
-            Chroma.from_texts(texts=[string], embedding=embedding_model, ids=[ids], collection_name=chroma_collection_id, client=client)
+            response_string = f"\n{text} ids is {ids}"
 
 
-            collection_number = client.get_collection(chroma_collection_id).count()
-            response_string = f"collection {chroma_collection_id} has {collection_number} documents. {string} ids is {ids}"
-        
-        
-        if file is not None and string is not None:
-            return response_file + " \n and " + response_string
-        elif file is not None and string is None:
-            return response_file
-        elif file is None and string is not None:
-            return response_string
+        vector_count = client.get_collection(chroma_collection_id).count()
+        response_documents_num = f"collection {chroma_collection_id} has {vector_count} vectors."
+
+        print(client.get_collection(chroma_collection_id).get())
+
+        return response_documents_num + response_file + response_string
 
 
 
     async def fast_api_handler(self, request: Request) -> Response:
  
         user_file = (await request.form()).get("file")
-        user_string = (await request.form()).get("string")
-        context = (await request.form()).get("context")
+        user_text = (await request.form()).get("text")
+        user_text_ids = (await request.form()).get("text_ids")
         setting: dict = (await request.form()).get("settings")
 
         if isinstance(setting, str):
@@ -166,8 +142,11 @@ class ChromaUpsert(Blackbox):
             except json.JSONDecodeError:
                 return JSONResponse(content={"error": "Invalid settings format"}, status_code=status.HTTP_400_BAD_REQUEST)
         
-        if user_file is None and user_string is None:
-            return JSONResponse(content={"error": "file or string is required"}, status_code=status.HTTP_400_BAD_REQUEST)
+        if user_file is None and user_text is None:
+            return JSONResponse(content={"error": "file or text is required"}, status_code=status.HTTP_400_BAD_REQUEST)
+        
+        if user_text is not None and user_text_ids is None:
+            return JSONResponse(content={"error": "text_ids is required when text is provided"}, status_code=status.HTTP_400_BAD_REQUEST)
 
         if user_file is not None:
             pdf_bytes = await user_file.read()
@@ -182,7 +161,7 @@ class ChromaUpsert(Blackbox):
             safe_filename = None
 
         try:
-            txt = self.processing(safe_filename, user_string, context, setting)
+            txt = self.processing(safe_filename, user_text, user_text_ids, setting)
             print(txt)
         except ValueError as e:
             return JSONResponse(content={"error": str(e)}, status_code=status.HTTP_400_BAD_REQUEST)