hai 7 meses · 92dd173b27
--- a/backend/open_webui/apps/retrieval/main.py
+++ b/backend/open_webui/apps/retrieval/main.py
@@ -726,7 +726,6 @@ def process_file(
 
				         )
			
 
				         docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
			
 
				         text_content = " ".join([doc.page_content for doc in docs])
			
 
				-
			
 
				         log.debug(f"text_content: {text_content}")
			
 
				 
			
 
				         Files.update_files_metadata_by_id(
			
@@ -795,10 +794,17 @@ def process_text(
 
				             metadata={"name": form_data.name, "created_by": user.id},
			
 
				         )
			
 
				     ]
			
 
				+    text_content = form_data.content
			
 
				+    log.debug(f"text_content: {text_content}")
			
 
				+
			
 
				     result = save_docs_to_vector_db(docs, collection_name)
			
 
				 
			
 
				     if result:
			
 
				-        return {"status": True, "collection_name": collection_name}
			
 
				+        return {
			
 
				+            "status": True,
			
 
				+            "collection_name": collection_name,
			
 
				+            "content": text_content,
			
 
				+        }
			
 
				     else:
			
 
				         raise HTTPException(
			
 
				             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
			
@@ -806,68 +812,6 @@ def process_text(
 
				         )
			
 
				 
			
 
				 
			
 
				-@app.get("/process/dir")
			
 
				-def process_docs_dir(user=Depends(get_admin_user)):
			
 
				-    for path in Path(DOCS_DIR).rglob("./**/*"):
			
 
				-        try:
			
 
				-            if path.is_file() and not path.name.startswith("."):
			
 
				-                tags = extract_folders_after_data_docs(path)
			
 
				-                filename = path.name
			
 
				-                file_content_type = mimetypes.guess_type(path)
			
 
				-
			
 
				-                with open(path, "rb") as f:
			
 
				-                    collection_name = calculate_sha256(f)[:63]
			
 
				-
			
 
				-                loader = Loader(
			
 
				-                    engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
			
 
				-                    TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL,
			
 
				-                    PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
			
 
				-                )
			
 
				-                docs = loader.load(filename, file_content_type[0], str(path))
			
 
				-
			
 
				-                try:
			
 
				-                    result = save_docs_to_vector_db(docs, collection_name)
			
 
				-
			
 
				-                    if result:
			
 
				-                        sanitized_filename = sanitize_filename(filename)
			
 
				-                        doc = Documents.get_doc_by_name(sanitized_filename)
			
 
				-
			
 
				-                        if doc is None:
			
 
				-                            doc = Documents.insert_new_doc(
			
 
				-                                user.id,
			
 
				-                                DocumentForm(
			
 
				-                                    **{
			
 
				-                                        "name": sanitized_filename,
			
 
				-                                        "title": filename,
			
 
				-                                        "collection_name": collection_name,
			
 
				-                                        "filename": filename,
			
 
				-                                        "content": (
			
 
				-                                            json.dumps(
			
 
				-                                                {
			
 
				-                                                    "tags": list(
			
 
				-                                                        map(
			
 
				-                                                            lambda name: {"name": name},
			
 
				-                                                            tags,
			
 
				-                                                        )
			
 
				-                                                    )
			
 
				-                                                }
			
 
				-                                            )
			
 
				-                                            if len(tags)
			
 
				-                                            else "{}"
			
 
				-                                        ),
			
 
				-                                    }
			
 
				-                                ),
			
 
				-                            )
			
 
				-                except Exception as e:
			
 
				-                    log.exception(e)
			
 
				-                    pass
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            log.exception(e)
			
 
				-
			
 
				-    return True
			
 
				-
			
 
				-
			
 
				 @app.post("/process/youtube")
			
 
				 def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
			
 
				     try:
			
@@ -882,12 +826,15 @@ def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_u
 
				             translation=app.state.YOUTUBE_LOADER_TRANSLATION,
			
 
				         )
			
 
				         docs = loader.load()
			
 
				+        text_content = " ".join([doc.page_content for doc in docs])
			
 
				+        log.debug(f"text_content: {text_content}")
			
 
				         save_docs_to_vector_db(docs, collection_name, overwrite=True)
			
 
				 
			
 
				         return {
			
 
				             "status": True,
			
 
				             "collection_name": collection_name,
			
 
				             "filename": form_data.url,
			
 
				+            "content": text_content,
			
 
				         }
			
 
				     except Exception as e:
			
 
				         log.exception(e)
			
@@ -910,12 +857,15 @@ def process_web(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
 
				             requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
			
 
				         )
			
 
				         docs = loader.load()
			
 
				+        text_content = " ".join([doc.page_content for doc in docs])
			
 
				+        log.debug(f"text_content: {text_content}")
			
 
				         save_docs_to_vector_db(docs, collection_name, overwrite=True)
			
 
				 
			
 
				         return {
			
 
				             "status": True,
			
 
				             "collection_name": collection_name,
			
 
				             "filename": form_data.url,
			
 
				+            "content": text_content,
			
 
				         }
			
 
				     except Exception as e:
			
 
				         log.exception(e)
			
@@ -1067,6 +1017,7 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
 
				 
			
 
				         loader = get_web_loader(urls)
			
 
				         docs = loader.load()
			
 
				+
			
 
				         save_docs_to_vector_db(docs, collection_name, overwrite=True)
			
 
				 
			
 
				         return {
			
@@ -1082,6 +1033,68 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
 
				         )
			
 
				 
			
 
				 
			
 
				+@app.get("/process/dir")
			
 
				+def process_docs_dir(user=Depends(get_admin_user)):
			
 
				+    for path in Path(DOCS_DIR).rglob("./**/*"):
			
 
				+        try:
			
 
				+            if path.is_file() and not path.name.startswith("."):
			
 
				+                tags = extract_folders_after_data_docs(path)
			
 
				+                filename = path.name
			
 
				+                file_content_type = mimetypes.guess_type(path)
			
 
				+
			
 
				+                with open(path, "rb") as f:
			
 
				+                    collection_name = calculate_sha256(f)[:63]
			
 
				+
			
 
				+                loader = Loader(
			
 
				+                    engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
			
 
				+                    TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL,
			
 
				+                    PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
			
 
				+                )
			
 
				+                docs = loader.load(filename, file_content_type[0], str(path))
			
 
				+
			
 
				+                try:
			
 
				+                    result = save_docs_to_vector_db(docs, collection_name)
			
 
				+
			
 
				+                    if result:
			
 
				+                        sanitized_filename = sanitize_filename(filename)
			
 
				+                        doc = Documents.get_doc_by_name(sanitized_filename)
			
 
				+
			
 
				+                        if doc is None:
			
 
				+                            doc = Documents.insert_new_doc(
			
 
				+                                user.id,
			
 
				+                                DocumentForm(
			
 
				+                                    **{
			
 
				+                                        "name": sanitized_filename,
			
 
				+                                        "title": filename,
			
 
				+                                        "collection_name": collection_name,
			
 
				+                                        "filename": filename,
			
 
				+                                        "content": (
			
 
				+                                            json.dumps(
			
 
				+                                                {
			
 
				+                                                    "tags": list(
			
 
				+                                                        map(
			
 
				+                                                            lambda name: {"name": name},
			
 
				+                                                            tags,
			
 
				+                                                        )
			
 
				+                                                    )
			
 
				+                                                }
			
 
				+                                            )
			
 
				+                                            if len(tags)
			
 
				+                                            else "{}"
			
 
				+                                        ),
			
 
				+                                    }
			
 
				+                                ),
			
 
				+                            )
			
 
				+                except Exception as e:
			
 
				+                    log.exception(e)
			
 
				+                    pass
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            log.exception(e)
			
 
				+
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				 class QueryDocForm(BaseModel):
			
 
				     collection_name: str
			
 
				     query: str
			
--- a/src/lib/components/chat/MessageInput/Commands.svelte
+++ b/src/lib/components/chat/MessageInput/Commands.svelte
@@ -30,7 +30,7 @@
 
				 	const uploadWeb = async (url) => {
			
 
				 		console.log(url);
			
 
				 
			
 
				-		const doc = {
			
 
				+		const fileItem = {
			
 
				 			type: 'doc',
			
 
				 			name: url,
			
 
				 			collection_name: '',
			
@@ -40,12 +40,14 @@
 
				 		};
			
 
				 
			
 
				 		try {
			
 
				-			files = [...files, doc];
			
 
				+			files = [...files, fileItem];
			
 
				 			const res = await processWeb(localStorage.token, '', url);
			
 
				 
			
 
				 			if (res) {
			
 
				-				doc.status = 'processed';
			
 
				-				doc.collection_name = res.collection_name;
			
 
				+				fileItem.status = 'processed';
			
 
				+				fileItem.collection_name = res.collection_name;
			
 
				+				fileItem.content = res.content;
			
 
				+
			
 
				 				files = files;
			
 
				 			}
			
 
				 		} catch (e) {
			
@@ -58,7 +60,7 @@
 
				 	const uploadYoutubeTranscription = async (url) => {
			
 
				 		console.log(url);
			
 
				 
			
 
				-		const doc = {
			
 
				+		const fileItem = {
			
 
				 			type: 'doc',
			
 
				 			name: url,
			
 
				 			collection_name: '',
			
@@ -68,12 +70,13 @@
 
				 		};
			
 
				 
			
 
				 		try {
			
 
				-			files = [...files, doc];
			
 
				+			files = [...files, fileItem];
			
 
				 			const res = await processYoutubeVideo(localStorage.token, url);
			
 
				 
			
 
				 			if (res) {
			
 
				-				doc.status = 'processed';
			
 
				-				doc.collection_name = res.collection_name;
			
 
				+				fileItem.status = 'processed';
			
 
				+				fileItem.collection_name = res.collection_name;
			
 
				+				fileItem.content = res.content;
			
 
				 				files = files;
			
 
				 			}
			
 
				 		} catch (e) {
			
--- a/src/lib/components/common/FileItem.svelte
+++ b/src/lib/components/common/FileItem.svelte
@@ -39,6 +39,8 @@
 
				 				if (url) {
			
 
				 					if (type === 'file') {
			
 
				 						window.open(`${url}/content`, '_blank').focus();
			
 
				+					} else {
			
 
				+						window.open(`${url}`, '_blank').focus();
			
 
				 					}
			
 
				 				}
			
 
				 			}
			
--- a/src/lib/components/common/FileItemModal.svelte
+++ b/src/lib/components/common/FileItemModal.svelte
@@ -20,8 +20,14 @@
 
				 	<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
			
 
				 		<div class="flex items-start justify-between pb-2">
			
 
				 			<div>
			
 
				-				<div class=" font-medium text-lg line-clamp-1 dark:text-gray-100">
			
 
				-					{file?.name ?? 'File'}
			
 
				+				<div class=" font-medium text-lg dark:text-gray-100">
			
 
				+					<a
			
 
				+						href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
			
 
				+						target="_blank"
			
 
				+						class="hover:underline line-clamp-1"
			
 
				+					>
			
 
				+						{file?.name ?? 'File'}
			
 
				+					</a>
			
 
				 				</div>
			
 
				 
			
 
				 				<div>