Browse Source

Merge pull request #418 from ollama-webui/dev

feat: docx support
Timothy Jaeryang Baek 1 năm trước cách đây
mục cha
commit
b1d9d511a1

+ 12 - 1
backend/apps/rag/main.py

@@ -18,6 +18,7 @@ from langchain_community.document_loaders import (
     TextLoader,
     PyPDFLoader,
     CSVLoader,
+    Docx2txtLoader,
 )
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
@@ -135,7 +136,12 @@ def store_doc(
 ):
     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
 
-    if file.content_type not in ["application/pdf", "text/plain", "text/csv"]:
+    if file.content_type not in [
+        "application/pdf",
+        "text/plain",
+        "text/csv",
+        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    ]:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
@@ -156,6 +162,11 @@ def store_doc(
 
         if file.content_type == "application/pdf":
             loader = PyPDFLoader(file_path)
+        elif (
+            file.content_type
+            == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+        ):
+            loader = Docx2txtLoader(file_path)
         elif file.content_type == "text/plain":
             loader = TextLoader(file_path)
         elif file.content_type == "text/csv":

+ 1 - 1
backend/requirements.txt

@@ -16,12 +16,12 @@ aiohttp
 peewee
 bcrypt
 
-
 langchain
 langchain-community
 chromadb
 sentence_transformers
 pypdf
+docx2txt
 
 PyJWT
 pyjwt[crypto]

+ 16 - 2
src/lib/components/chat/MessageInput.svelte

@@ -143,7 +143,14 @@
 					const file = inputFiles[0];
 					if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
 						reader.readAsDataURL(file);
-					} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
+					} else if (
+						[
+							'application/pdf',
+							'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+							'text/plain',
+							'text/csv'
+						].includes(file['type'])
+					) {
 						uploadDoc(file);
 					} else {
 						toast.error(`Unsupported File Type '${file['type']}'.`);
@@ -249,7 +256,14 @@
 							const file = inputFiles[0];
 							if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
 								reader.readAsDataURL(file);
-							} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
+							} else if (
+								[
+									'application/pdf',
+									'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+									'text/plain',
+									'text/csv'
+								].includes(file['type'])
+							) {
 								uploadDoc(file);
 								filesInputElement.value = '';
 							} else {