diff --git a/src/blackbox/chroma_upsert.py b/src/blackbox/chroma_upsert.py index fda0e14..98b13f9 100755 --- a/src/blackbox/chroma_upsert.py +++ b/src/blackbox/chroma_upsert.py @@ -8,7 +8,7 @@ import requests import json from langchain_community.document_loaders.csv_loader import CSVLoader -from langchain_community.document_loaders import UnstructuredMarkdownLoader, DirectoryLoader, TextLoader, UnstructuredHTMLLoader, JSONLoader, Docx2txtLoader, UnstructuredExcelLoader, UnstructuredPDFLoader +from langchain_community.document_loaders import UnstructuredMarkdownLoader, DirectoryLoader, TextLoader, UnstructuredHTMLLoader, JSONLoader, Docx2txtLoader, UnstructuredExcelLoader, PyPDFLoader from langchain_community.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings @@ -96,7 +96,7 @@ class ChromaUpsert(Blackbox): file_type = file.split(".")[-1] print("file_type: ",file_type) if file_type == "pdf": - loader = UnstructuredPDFLoader(file) + loader = PyPDFLoader(file) elif file_type == "txt": loader = TextLoader(file) elif file_type == "csv":