update chroma upsert

This commit is contained in:
ACBBZ
2024-05-07 06:25:03 +00:00
parent 31dbe29b8b
commit 3c622039e3

View File

@ -14,6 +14,8 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTex
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
import chromadb import chromadb
from ..utils import chroma_setting
from injector import singleton from injector import singleton
@singleton @singleton
class ChromaUpsert(Blackbox): class ChromaUpsert(Blackbox):
@ -32,7 +34,7 @@ class ChromaUpsert(Blackbox):
data = args[0] data = args[0]
return isinstance(data, list) return isinstance(data, list)
def processing(self, collection_id, file, string, context, setting) -> str: def processing(self, collection_id, file, string, context, setting: chroma_setting) -> str:
# 用户的操作历史 # 用户的操作历史
if context is None: if context is None:
context = [] context = []
@ -41,37 +43,19 @@ class ChromaUpsert(Blackbox):
{ {
"collection_id": "123", "collection_id": "123",
"action": "query", "action": "query",
"content": "你吃饭了吗" "content": "你吃饭了吗",
"answer": "吃了",
}, },
{ {
"collection_id": "123", "collection_id": "123",
"action": "upsert", "action": "upsert",
"content": "file_name or string" "content": "file_name or string",
"answer": "success, collection has 100 documents.",
}, },
] ]
# 用户的配置文件 每次操作都会更新 if collection_id is None and setting.ChromaSetting.collection_ids[0] != []:
setting = { collection_id = setting.ChromaSetting.collection_ids[0]
# collection_name
"collections": ["123", "collection_id2"],
# 插入的字符串的id从1开始
"ids_string": [0, 0],
# 插入的文件的文件名和ids
"ids_file": [
# collection_id1 插入的文件 和 对应的ids列表
{
"file_name1": ["file_name1", ids],
"file_name2": ["file_name2", ["1","2","3","4"]]
},
# collection_id2的文件和ids
{}
]
}
if collection_id is None and setting["collections"][0] != []:
collection_id = setting["collections"][0]
else: else:
collection_id = "123" collection_id = "123"
@ -105,7 +89,7 @@ class ChromaUpsert(Blackbox):
if string is not None: if string is not None:
# 生成一个新的id ids_string: 1 # 生成一个新的id ids_string: 1
ids = setting['ids_string'][0] + 1 ids = setting.ChromaSetting.string_ids[0] + 1
Chroma.from_texts(texts=[string], embedding=self.embedding_model, ids=[ids], collection_name=collection_id, client=self.client) Chroma.from_texts(texts=[string], embedding=self.embedding_model, ids=[ids], collection_name=collection_id, client=self.client)