1 year ago · aa1d386042
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -144,37 +144,21 @@ def store_doc(
 
				     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
			
 
				 
			
 
				     print(file.content_type)
			
 
				-    if file.content_type not in [
			
 
				-        "application/pdf",
			
 
				-        "text/plain",
			
 
				-        "text/csv",
			
 
				-        "text/xml",
			
 
				-        "text/x-python",
			
 
				-        "text/css",
			
 
				-        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
			
 
				-        "application/octet-stream",
			
 
				-        "application/x-javascript",
			
 
				-    ]:
			
 
				-        raise HTTPException(
			
 
				-            status_code=status.HTTP_400_BAD_REQUEST,
			
 
				-            detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
			
 
				-        )
			
 
				-    text_xml=["text/xml"]
			
 
				+    
			
 
				+    text_xml=["xml"]
			
 
				     octet_markdown=["md"]
			
 
				-    octet_plain=[
			
 
				+    known_source_ext=[
			
 
				         "go", "py", "java", "sh", "bat", "ps1", "cmd", "js", 
			
 
				         "css", "cpp", "hpp","h", "c", "cs", "sql", "log", "ini",
			
 
				         "pl" "pm", "r", "dart", "dockerfile", "env", "php", "hs",
			
 
				         "hsc", "lua", "nginxconf", "conf", "m", "mm", "plsql", "perl",
			
 
				         "rb", "rs", "db2", "scala", "bash", "swift", "vue", "svelte"
			
 
				         ]
			
 
				+    docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
			
 
				+    known_doc_ext=["doc","docx"]
			
 
				     file_ext=file.filename.split(".")[-1].lower()
			
 
				-    if file.content_type == "application/octet-stream" and file_ext not in (octet_markdown + octet_plain):
			
 
				-        raise HTTPException(
			
 
				-            status_code=status.HTTP_400_BAD_REQUEST,
			
 
				-            detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
			
 
				-        )
			
 
				-
			
 
				+    known_type=True
			
 
				+    
			
 
				     try:
			
 
				         filename = file.filename
			
 
				         file_path = f"{UPLOAD_DIR}/{filename}"
			
@@ -188,27 +172,22 @@ def store_doc(
 
				             collection_name = calculate_sha256(f)[:63]
			
 
				         f.close()
			
 
				 
			
 
				-        if file.content_type == "application/pdf":
			
 
				+        if file_ext=="pdf":
			
 
				             loader = PyPDFLoader(file_path)
			
 
				-        elif (
			
 
				-            file.content_type
			
 
				-            == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
			
 
				-        ):
			
 
				+        elif (file.content_type ==docx_type or file_ext in known_doc_ext):
			
 
				             loader = Docx2txtLoader(file_path)
			
 
				-        
			
 
				-        elif file.content_type == "text/csv":
			
 
				+        elif file_ext=="csv":
			
 
				             loader = CSVLoader(file_path)
			
 
				-        elif file.content_type in text_xml:
			
 
				+        elif file_ext in text_xml:
			
 
				             loader=UnstructuredXMLLoader(file_path)
			
 
				-        elif file.content_type == "text/plain" or file.content_type.find("text/")>=0:
			
 
				+        elif file_ext in known_source_ext or file.content_type.find("text/")>=0:
			
 
				             loader = TextLoader(file_path)
			
 
				-        elif file.content_type == "application/octet-stream":
			
 
				-            if file_ext in octet_markdown:
			
 
				-                loader = UnstructuredMarkdownLoader(file_path)
			
 
				-            if file_ext in octet_plain:
			
 
				-                loader = TextLoader(file_path)
			
 
				-        elif file.content_type == "application/x-javascript":
			
 
				+        elif file_ext in octet_markdown:
			
 
				+            loader = UnstructuredMarkdownLoader(file_path)
			
 
				+        else:
			
 
				             loader = TextLoader(file_path)
			
 
				+            known_type=False
			
 
				+
			
 
				 
			
 
				         data = loader.load()
			
 
				         result = store_data_in_vector_db(data, collection_name)
			
@@ -218,6 +197,7 @@ def store_doc(
 
				                 "status": True,
			
 
				                 "collection_name": collection_name,
			
 
				                 "filename": filename,
			
 
				+                "known_type":known_type,
			
 
				             }
			
 
				         else:
			
 
				             raise HTTPException(
			
--- a/src/lib/components/chat/MessageInput.svelte
+++ b/src/lib/components/chat/MessageInput.svelte
@@ -173,7 +173,8 @@
 
				 					) {
			
 
				 						uploadDoc(file);
			
 
				 					} else {
			
 
				-						toast.error(`Unsupported File Type '${file['type']}'.`);
			
 
				+						toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
			
 
				+						uploadDoc(file);
			
 
				 					}
			
 
				 				} else {
			
 
				 					toast.error(`File not found.`);
			
@@ -308,8 +309,9 @@
 
				 								uploadDoc(file);
			
 
				 								filesInputElement.value = '';
			
 
				 							} else {
			
 
				-								toast.error(`Unsupported File Type '${file['type']}'.`);
			
 
				-								inputFiles = null;
			
 
				+								toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
			
 
				+								uploadDoc(file);
			
 
				+								filesInputElement.value = '';
			
 
				 							}
			
 
				 						} else {
			
 
				 							toast.error(`File not found.`);
			
--- a/src/routes/(app)/documents/+page.svelte
+++ b/src/routes/(app)/documents/+page.svelte
@@ -73,7 +73,8 @@
 
				 				) {
			
 
				 					uploadDoc(file);
			
 
				 				} else {
			
 
				-					toast.error(`Unsupported File Type '${file['type']}'.`);
			
 
				+					toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
			
 
				+					uploadDoc(file);
			
 
				 				}
			
 
				 			} else {
			
 
				 				toast.error(`File not found.`);
			
@@ -153,7 +154,8 @@
 
				 						) {
			
 
				 							uploadDoc(file);
			
 
				 						} else {
			
 
				-							toast.error(`Unsupported File Type '${file['type']}'.`);
			
 
				+							toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
			
 
				+							uploadDoc(file);
			
 
				 						}
			
 
				 
			
 
				 						inputFiles = null;