mirror of
https://github.com/BoardWare-Genius/jarvis-models.git
synced 2025-12-13 16:53:24 +00:00
feat: update pdf upsert
This commit is contained in:
@ -8,7 +8,7 @@ import requests
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from langchain_community.document_loaders.csv_loader import CSVLoader
|
from langchain_community.document_loaders.csv_loader import CSVLoader
|
||||||
from langchain_community.document_loaders import UnstructuredMarkdownLoader, DirectoryLoader, TextLoader, UnstructuredHTMLLoader, JSONLoader, Docx2txtLoader, UnstructuredExcelLoader, UnstructuredPDFLoader
|
from langchain_community.document_loaders import UnstructuredMarkdownLoader, DirectoryLoader, TextLoader, UnstructuredHTMLLoader, JSONLoader, Docx2txtLoader, UnstructuredExcelLoader, PyPDFLoader
|
||||||
from langchain_community.vectorstores import Chroma
|
from langchain_community.vectorstores import Chroma
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
|
||||||
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
||||||
@ -96,7 +96,7 @@ class ChromaUpsert(Blackbox):
|
|||||||
file_type = file.split(".")[-1]
|
file_type = file.split(".")[-1]
|
||||||
print("file_type: ",file_type)
|
print("file_type: ",file_type)
|
||||||
if file_type == "pdf":
|
if file_type == "pdf":
|
||||||
loader = UnstructuredPDFLoader(file)
|
loader = PyPDFLoader(file)
|
||||||
elif file_type == "txt":
|
elif file_type == "txt":
|
||||||
loader = TextLoader(file)
|
loader = TextLoader(file)
|
||||||
elif file_type == "csv":
|
elif file_type == "csv":
|
||||||
|
|||||||
Reference in New Issue
Block a user