|
@@ -18,6 +18,7 @@ from langchain_community.document_loaders import (
|
|
|
TextLoader,
|
|
|
PyPDFLoader,
|
|
|
CSVLoader,
|
|
|
+ Docx2txtLoader,
|
|
|
)
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
from langchain_community.vectorstores import Chroma
|
|
@@ -135,7 +136,12 @@ def store_doc(
|
|
|
):
|
|
|
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
|
|
|
|
|
|
- if file.content_type not in ["application/pdf", "text/plain", "text/csv"]:
|
|
|
+ if file.content_type not in [
|
|
|
+ "application/pdf",
|
|
|
+ "text/plain",
|
|
|
+ "text/csv",
|
|
|
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
|
+ ]:
|
|
|
raise HTTPException(
|
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
|
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
|
|
@@ -156,6 +162,11 @@ def store_doc(
|
|
|
|
|
|
if file.content_type == "application/pdf":
|
|
|
loader = PyPDFLoader(file_path)
|
|
|
+ elif (
|
|
|
+ file.content_type
|
|
|
+ == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
|
+ ):
|
|
|
+ loader = Docx2txtLoader(file_path)
|
|
|
elif file.content_type == "text/plain":
|
|
|
loader = TextLoader(file_path)
|
|
|
elif file.content_type == "text/csv":
|