From 8358a265c86b8c71548f5f86a27f8a5fa0d0f451 Mon Sep 17 00:00:00 2001 From: tom Date: Mon, 18 Aug 2025 17:21:55 +0800 Subject: [PATCH] feat: update the parameter name --- src/blackbox/chroma_upsert.py | 63 ++++++++++++----------------------- 1 file changed, 21 insertions(+), 42 deletions(-) diff --git a/src/blackbox/chroma_upsert.py b/src/blackbox/chroma_upsert.py index f266e0a..b4d1816 100755 --- a/src/blackbox/chroma_upsert.py +++ b/src/blackbox/chroma_upsert.py @@ -43,25 +43,7 @@ class ChromaUpsert(Blackbox): return isinstance(data, list) # @logging_time(logger=logger) - def processing(self, file, string, context: list, settings: dict) -> str: - # 用户的操作历史 - if context is None: - context = [] - - # context = [ - # { - # "collection_id": "123", - # "action": "query", - # "content": "你吃饭了吗", - # "answer": "吃了", - # }, - # { - # "collection_id": "123", - # "action": "upsert", - # "content": "file_name or string", - # "answer": "collection 123 has 12472 documents. /tmp/Cheap and Quick:Efficient Vision-Language Instruction Tuning for Large Language Models.pdf ids is 0~111", - # }, - # ] + def processing(self, file, text, text_ids, settings: dict) -> str: if settings is None: settings = {} @@ -101,7 +83,8 @@ class ChromaUpsert(Blackbox): else: embedding_model = SentenceTransformerEmbeddings(model_name=chroma_embedding_model, device = "cuda:0") - + response_file ='' + response_string = '' if file is not None: file_type = file.split(".")[-1] print("file_type: ",file_type) @@ -129,35 +112,28 @@ class ChromaUpsert(Blackbox): Chroma.from_documents(documents=docs, embedding=embedding_model, ids=ids, collection_name=chroma_collection_id, client=client) - collection_number = client.get_collection(chroma_collection_id).count() - response_file = f"collection {chroma_collection_id} has {collection_number} documents. {file} ids is 0~{len(docs)-1}" + response_file = f"\n{file} ids is 0~{len(docs)-1}" - if string is not None: - # 生成一个新的id ids_string: 1 - # ids = setting.ChromaSetting.string_ids[0] + 1 - ids = "1" + if text is not None and text_ids is not None: + Chroma.from_texts(texts=[text], embedding=embedding_model, ids=[text_ids], collection_name=chroma_collection_id, client=client) - Chroma.from_texts(texts=[string], embedding=embedding_model, ids=[ids], collection_name=chroma_collection_id, client=client) + response_string = f"\n{text} ids is {ids}" - collection_number = client.get_collection(chroma_collection_id).count() - response_string = f"collection {chroma_collection_id} has {collection_number} documents. {string} ids is {ids}" - - - if file is not None and string is not None: - return response_file + " \n and " + response_string - elif file is not None and string is None: - return response_file - elif file is None and string is not None: - return response_string + vector_count = client.get_collection(chroma_collection_id).count() + response_documents_num = f"collection {chroma_collection_id} has {vector_count} vectors." + + print(client.get_collection(chroma_collection_id).get()) + + return response_documents_num + response_file + response_string async def fast_api_handler(self, request: Request) -> Response: user_file = (await request.form()).get("file") - user_string = (await request.form()).get("string") - context = (await request.form()).get("context") + user_text = (await request.form()).get("text") + user_text_ids = (await request.form()).get("text_ids") setting: dict = (await request.form()).get("settings") if isinstance(setting, str): @@ -166,8 +142,11 @@ class ChromaUpsert(Blackbox): except json.JSONDecodeError: return JSONResponse(content={"error": "Invalid settings format"}, status_code=status.HTTP_400_BAD_REQUEST) - if user_file is None and user_string is None: - return JSONResponse(content={"error": "file or string is required"}, status_code=status.HTTP_400_BAD_REQUEST) + if user_file is None and user_text is None: + return JSONResponse(content={"error": "file or text is required"}, status_code=status.HTTP_400_BAD_REQUEST) + + if user_text is not None and user_text_ids is None: + return JSONResponse(content={"error": "text_ids is required when text is provided"}, status_code=status.HTTP_400_BAD_REQUEST) if user_file is not None: pdf_bytes = await user_file.read() @@ -182,7 +161,7 @@ class ChromaUpsert(Blackbox): safe_filename = None try: - txt = self.processing(safe_filename, user_string, context, setting) + txt = self.processing(safe_filename, user_text, user_text_ids, setting) print(txt) except ValueError as e: return JSONResponse(content={"error": str(e)}, status_code=status.HTTP_400_BAD_REQUEST)