浏览代码

enh: full context web search

Timothy Jaeryang Baek 2 月之前
父节点
当前提交
ca0b7217d2

+ 6 - 0
backend/open_webui/config.py

@@ -1780,6 +1780,12 @@ RAG_WEB_SEARCH_ENGINE = PersistentConfig(
     os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
     os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
 )
 )
 
 
+RAG_WEB_SEARCH_FULL_CONTEXT = PersistentConfig(
+    "RAG_WEB_SEARCH_FULL_CONTEXT",
+    "rag.web.search.full_context",
+    os.getenv("RAG_WEB_SEARCH_FULL_CONTEXT", "False").lower() == "true",
+)
+
 # You can provide a list of your own websites to filter after performing a web search.
 # You can provide a list of your own websites to filter after performing a web search.
 # This ensures the highest level of safety and reliability of the information sources.
 # This ensures the highest level of safety and reliability of the information sources.
 RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
 RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(

+ 2 - 0
backend/open_webui/main.py

@@ -179,6 +179,7 @@ from open_webui.config import (
     YOUTUBE_LOADER_PROXY_URL,
     YOUTUBE_LOADER_PROXY_URL,
     # Retrieval (Web Search)
     # Retrieval (Web Search)
     RAG_WEB_SEARCH_ENGINE,
     RAG_WEB_SEARCH_ENGINE,
+    RAG_WEB_SEARCH_FULL_CONTEXT,
     RAG_WEB_SEARCH_RESULT_COUNT,
     RAG_WEB_SEARCH_RESULT_COUNT,
     RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
     RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
     RAG_WEB_SEARCH_TRUST_ENV,
     RAG_WEB_SEARCH_TRUST_ENV,
@@ -548,6 +549,7 @@ app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL
 
 
 app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
 app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
 app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
 app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
+app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = RAG_WEB_SEARCH_FULL_CONTEXT
 app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
 app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST
 
 
 app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION
 app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION

+ 8 - 1
backend/open_webui/retrieval/utils.py

@@ -304,7 +304,14 @@ def get_sources_from_files(
     relevant_contexts = []
     relevant_contexts = []
 
 
     for file in files:
     for file in files:
-        if file.get("context") == "full":
+        if file.get("docs"):
+
+            print("file.get('docs')", file.get("docs"))
+            context = {
+                "documents": [[doc.get("content") for doc in file.get("docs")]],
+                "metadatas": [[doc.get("metadata") for doc in file.get("docs")]],
+            }
+        elif file.get("context") == "full":
             context = {
             context = {
                 "documents": [[file.get("file").get("data", {}).get("content")]],
                 "documents": [[file.get("file").get("data", {}).get("content")]],
                 "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
                 "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],

+ 41 - 18
backend/open_webui/routers/retrieval.py

@@ -371,7 +371,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
             "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
             "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL,
         },
         },
         "web": {
         "web": {
-            "web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            "RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT,
             "search": {
             "search": {
                 "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
                 "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
                 "drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
                 "drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION,
@@ -457,7 +458,8 @@ class WebSearchConfig(BaseModel):
 
 
 class WebConfig(BaseModel):
 class WebConfig(BaseModel):
     search: WebSearchConfig
     search: WebSearchConfig
-    web_loader_ssl_verification: Optional[bool] = None
+    ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
+    RAG_WEB_SEARCH_FULL_CONTEXT: Optional[bool] = None
 
 
 
 
 class ConfigUpdateForm(BaseModel):
 class ConfigUpdateForm(BaseModel):
@@ -512,11 +514,16 @@ async def update_rag_config(
     if form_data.web is not None:
     if form_data.web is not None:
         request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
         request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
             # Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False
             # Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False
-            form_data.web.web_loader_ssl_verification
+            form_data.web.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
         )
         )
 
 
         request.app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled
         request.app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled
         request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
         request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
+
+        request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT = (
+            form_data.web.RAG_WEB_SEARCH_FULL_CONTEXT
+        )
+
         request.app.state.config.SEARXNG_QUERY_URL = (
         request.app.state.config.SEARXNG_QUERY_URL = (
             form_data.web.search.searxng_query_url
             form_data.web.search.searxng_query_url
         )
         )
@@ -600,7 +607,8 @@ async def update_rag_config(
             "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION,
             "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION,
         },
         },
         "web": {
         "web": {
-            "web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            "RAG_WEB_SEARCH_FULL_CONTEXT": request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT,
             "search": {
             "search": {
                 "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
                 "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH,
                 "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE,
                 "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE,
@@ -1349,21 +1357,36 @@ async def process_web_search(
             trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
             trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
         )
         )
         docs = await loader.aload()
         docs = await loader.aload()
-        await run_in_threadpool(
-            save_docs_to_vector_db,
-            request,
-            docs,
-            collection_name,
-            overwrite=True,
-            user=user,
-        )
 
 
-        return {
-            "status": True,
-            "collection_name": collection_name,
-            "filenames": urls,
-            "loaded_count": len(docs),
-        }
+        if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT:
+            return {
+                "status": True,
+                "docs": [
+                    {
+                        "content": doc.page_content,
+                        "metadata": doc.metadata,
+                    }
+                    for doc in docs
+                ],
+                "filenames": urls,
+                "loaded_count": len(docs),
+            }
+        else:
+            await run_in_threadpool(
+                save_docs_to_vector_db,
+                request,
+                docs,
+                collection_name,
+                overwrite=True,
+                user=user,
+            )
+
+            return {
+                "status": True,
+                "collection_name": collection_name,
+                "filenames": urls,
+                "loaded_count": len(docs),
+            }
     except Exception as e:
     except Exception as e:
         log.exception(e)
         log.exception(e)
         raise HTTPException(
         raise HTTPException(

+ 19 - 8
backend/open_webui/utils/middleware.py

@@ -362,14 +362,25 @@ async def chat_web_search_handler(
             )
             )
 
 
             files = form_data.get("files", [])
             files = form_data.get("files", [])
-            files.append(
-                {
-                    "collection_name": results["collection_name"],
-                    "name": searchQuery,
-                    "type": "web_search_results",
-                    "urls": results["filenames"],
-                }
-            )
+
+            if request.app.state.config.RAG_WEB_SEARCH_FULL_CONTEXT:
+                files.append(
+                    {
+                        "docs": results.get("docs", []),
+                        "name": searchQuery,
+                        "type": "web_search_docs",
+                        "urls": results["filenames"],
+                    }
+                )
+            else:
+                files.append(
+                    {
+                        "collection_name": results["collection_name"],
+                        "name": searchQuery,
+                        "type": "web_search_results",
+                        "urls": results["filenames"],
+                    }
+                )
             form_data["files"] = files
             form_data["files"] = files
         else:
         else:
             await event_emitter(
             await event_emitter(

+ 17 - 2
src/lib/components/admin/Settings/WebSearch.svelte

@@ -6,6 +6,7 @@
 	import { onMount, getContext } from 'svelte';
 	import { onMount, getContext } from 'svelte';
 	import { toast } from 'svelte-sonner';
 	import { toast } from 'svelte-sonner';
 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
+	import Tooltip from '$lib/components/common/Tooltip.svelte';
 
 
 	const i18n = getContext('i18n');
 	const i18n = getContext('i18n');
 
 
@@ -116,6 +117,19 @@
 					</div>
 					</div>
 				</div>
 				</div>
 
 
+				<div class=" py-0.5 flex w-full justify-between">
+					<div class=" self-center text-xs font-medium">{$i18n.t('Full Context Mode')}</div>
+					<div class="flex items-center relative">
+						<Tooltip
+							content={webConfig.RAG_WEB_SEARCH_FULL_CONTEXT
+								? 'Inject the entire web results as context for comprehensive processing, this is recommended for complex queries.'
+								: 'Default to segmented retrieval for focused and relevant content extraction, this is recommended for most cases.'}
+						>
+							<Switch bind:state={webConfig.RAG_WEB_SEARCH_FULL_CONTEXT} />
+						</Tooltip>
+					</div>
+				</div>
+
 				{#if webConfig.search.engine !== ''}
 				{#if webConfig.search.engine !== ''}
 					<div class="mt-1.5">
 					<div class="mt-1.5">
 						{#if webConfig.search.engine === 'searxng'}
 						{#if webConfig.search.engine === 'searxng'}
@@ -424,12 +438,13 @@
 						<button
 						<button
 							class="p-1 px-3 text-xs flex rounded-sm transition"
 							class="p-1 px-3 text-xs flex rounded-sm transition"
 							on:click={() => {
 							on:click={() => {
-								webConfig.web_loader_ssl_verification = !webConfig.web_loader_ssl_verification;
+								webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION =
+									!webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION;
 								submitHandler();
 								submitHandler();
 							}}
 							}}
 							type="button"
 							type="button"
 						>
 						>
-							{#if webConfig.web_loader_ssl_verification === false}
+							{#if webConfig.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION === false}
 								<span class="ml-2 self-center">{$i18n.t('On')}</span>
 								<span class="ml-2 self-center">{$i18n.t('On')}</span>
 							{:else}
 							{:else}
 								<span class="ml-2 self-center">{$i18n.t('Off')}</span>
 								<span class="ml-2 self-center">{$i18n.t('Off')}</span>