1 rok temu · c1ec604f21
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -19,6 +19,8 @@ from langchain_community.document_loaders import (
 
				     PyPDFLoader,
			
 
				     CSVLoader,
			
 
				     Docx2txtLoader,
			
 
				+    UnstructuredWordDocumentLoader,
			
 
				+    UnstructuredMarkdownLoader,
			
 
				 )
			
 
				 from langchain.text_splitter import RecursiveCharacterTextSplitter
			
 
				 from langchain_community.vectorstores import Chroma
			
@@ -140,17 +142,27 @@ def store_doc(
 
				 ):
			
 
				     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
			
 
				 
			
 
				+    print(file.content_type)
			
 
				     if file.content_type not in [
			
 
				         "application/pdf",
			
 
				         "text/plain",
			
 
				         "text/csv",
			
 
				         "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
			
 
				+        "application/octet-stream",
			
 
				     ]:
			
 
				         raise HTTPException(
			
 
				             status_code=status.HTTP_400_BAD_REQUEST,
			
 
				             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
			
 
				         )
			
 
				 
			
 
				+    if file.content_type == "application/octet-stream" and file.filename.split(".")[
			
 
				+        -1
			
 
				+    ] not in ["md"]:
			
 
				+        raise HTTPException(
			
 
				+            status_code=status.HTTP_400_BAD_REQUEST,
			
 
				+            detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
			
 
				+        )
			
 
				+
			
 
				     try:
			
 
				         filename = file.filename
			
 
				         file_path = f"{UPLOAD_DIR}/{filename}"
			
@@ -175,6 +187,9 @@ def store_doc(
 
				             loader = TextLoader(file_path)
			
 
				         elif file.content_type == "text/csv":
			
 
				             loader = CSVLoader(file_path)
			
 
				+        elif file.content_type == "application/octet-stream":
			
 
				+            if file.filename.split(".")[-1] == "md":
			
 
				+                loader = UnstructuredMarkdownLoader(file_path)
			
 
				 
			
 
				         data = loader.load()
			
 
				         result = store_data_in_vector_db(data, collection_name)
			
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -22,6 +22,7 @@ chromadb
 
				 sentence_transformers
			
 
				 pypdf
			
 
				 docx2txt
			
 
				+unstructured
			
 
				 
			
 
				 PyJWT
			
 
				 pyjwt[crypto]
			
--- a/src/lib/components/chat/MessageInput.svelte
+++ b/src/lib/components/chat/MessageInput.svelte
@@ -149,9 +149,13 @@
 
				 
			
 
				 				if (inputFiles && inputFiles.length > 0) {
			
 
				 					const file = inputFiles[0];
			
 
				+					console.log(file, file.name.split('.').at(-1));
			
 
				 					if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
			
 
				 						reader.readAsDataURL(file);
			
 
				-					} else if (SUPPORTED_FILE_TYPE.includes(file['type'])) {
			
 
				+					} else if (
			
 
				+						SUPPORTED_FILE_TYPE.includes(file['type']) ||
			
 
				+						['md'].includes(file.name.split('.').at(-1))
			
 
				+					) {
			
 
				 						uploadDoc(file);
			
 
				 					} else {
			
 
				 						toast.error(`Unsupported File Type '${file['type']}'.`);
			
--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@@ -14,6 +14,7 @@ export const REQUIRED_OLLAMA_VERSION = '0.1.16';
 
				 export const SUPPORTED_FILE_TYPE = [
			
 
				 	'application/pdf',
			
 
				 	'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
			
 
				+	'text/markdown',
			
 
				 	'text/plain',
			
 
				 	'text/csv'
			
 
				 ];