mirror of
https://github.com/BoardWare-Genius/jarvis-models.git
synced 2025-12-13 16:53:24 +00:00
feat: update pdf upsert
This commit is contained in:
@ -8,7 +8,7 @@ import requests
|
||||
import json
|
||||
|
||||
from langchain_community.document_loaders.csv_loader import CSVLoader
|
||||
from langchain_community.document_loaders import UnstructuredMarkdownLoader, DirectoryLoader, TextLoader, UnstructuredHTMLLoader, JSONLoader, Docx2txtLoader, UnstructuredExcelLoader, UnstructuredPDFLoader
|
||||
from langchain_community.document_loaders import UnstructuredMarkdownLoader, DirectoryLoader, TextLoader, UnstructuredHTMLLoader, JSONLoader, Docx2txtLoader, UnstructuredExcelLoader, PyPDFLoader
|
||||
from langchain_community.vectorstores import Chroma
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
||||
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
||||
@ -96,7 +96,7 @@ class ChromaUpsert(Blackbox):
|
||||
file_type = file.split(".")[-1]
|
||||
print("file_type: ",file_type)
|
||||
if file_type == "pdf":
|
||||
loader = UnstructuredPDFLoader(file)
|
||||
loader = PyPDFLoader(file)
|
||||
elif file_type == "txt":
|
||||
loader = TextLoader(file)
|
||||
elif file_type == "csv":
|
||||
|
||||
Reference in New Issue
Block a user