Timothy Jaeryang Baek 1 月之前
父节点
当前提交
9b20ef4922
共有 3 个文件被更改,包括 13 次插入17 次删除
  1. 4 4
      backend/open_webui/config.py
  2. 2 2
      backend/open_webui/main.py
  3. 7 11
      backend/open_webui/retrieval/web/utils.py

+ 4 - 4
backend/open_webui/config.py

@@ -2081,10 +2081,10 @@ PLAYWRIGHT_WS_URI = PersistentConfig(
     os.environ.get("PLAYWRIGHT_WS_URI", None),
     os.environ.get("PLAYWRIGHT_WS_URI", None),
 )
 )
 
 
-PLAYWRIGHT_GOTO_TIMEOUT = PersistentConfig(
-    "PLAYWRIGHT_GOTO_TIMEOUT",
-    "rag.web.loader.engine.playwright.goto.timeout",
-    int(os.environ.get("PLAYWRIGHT_GOTO_TIMEOUT", "10")),
+PLAYWRIGHT_TIMEOUT = PersistentConfig(
+    "PLAYWRIGHT_TIMEOUT",
+    "rag.web.loader.engine.playwright.timeout",
+    int(os.environ.get("PLAYWRIGHT_TIMEOUT", "10")),
 )
 )
 
 
 FIRECRAWL_API_KEY = PersistentConfig(
 FIRECRAWL_API_KEY = PersistentConfig(

+ 2 - 2
backend/open_webui/main.py

@@ -155,7 +155,7 @@ from open_webui.config import (
     AUDIO_TTS_AZURE_SPEECH_REGION,
     AUDIO_TTS_AZURE_SPEECH_REGION,
     AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
     AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT,
     PLAYWRIGHT_WS_URI,
     PLAYWRIGHT_WS_URI,
-    PLAYWRIGHT_GOTO_TIMEOUT,
+    PLAYWRIGHT_TIMEOUT,
     FIRECRAWL_API_BASE_URL,
     FIRECRAWL_API_BASE_URL,
     FIRECRAWL_API_KEY,
     FIRECRAWL_API_KEY,
     RAG_WEB_LOADER_ENGINE,
     RAG_WEB_LOADER_ENGINE,
@@ -630,7 +630,7 @@ app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_
 app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE
 app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE
 app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV
 app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV
 app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI
 app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI
-app.state.config.PLAYWRIGHT_GOTO_TIMEOUT = PLAYWRIGHT_GOTO_TIMEOUT
+app.state.config.PLAYWRIGHT_TIMEOUT = PLAYWRIGHT_TIMEOUT
 app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL
 app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL
 app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY
 app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY
 app.state.config.TAVILY_EXTRACT_DEPTH = TAVILY_EXTRACT_DEPTH
 app.state.config.TAVILY_EXTRACT_DEPTH = TAVILY_EXTRACT_DEPTH

+ 7 - 11
backend/open_webui/retrieval/web/utils.py

@@ -29,7 +29,7 @@ from open_webui.constants import ERROR_MESSAGES
 from open_webui.config import (
 from open_webui.config import (
     ENABLE_RAG_LOCAL_WEB_FETCH,
     ENABLE_RAG_LOCAL_WEB_FETCH,
     PLAYWRIGHT_WS_URI,
     PLAYWRIGHT_WS_URI,
-    PLAYWRIGHT_GOTO_TIMEOUT,
+    PLAYWRIGHT_TIMEOUT,
     RAG_WEB_LOADER_ENGINE,
     RAG_WEB_LOADER_ENGINE,
     FIRECRAWL_API_BASE_URL,
     FIRECRAWL_API_BASE_URL,
     FIRECRAWL_API_KEY,
     FIRECRAWL_API_KEY,
@@ -377,7 +377,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
         headless (bool): If True, the browser will run in headless mode.
         headless (bool): If True, the browser will run in headless mode.
         proxy (dict): Proxy override settings for the Playwright session.
         proxy (dict): Proxy override settings for the Playwright session.
         playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection.
         playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection.
-        playwright_goto_timeout (Optional[int]): Maximum operation time in milliseconds.
+        playwright_timeout (Optional[int]): Maximum operation time in milliseconds.
     """
     """
 
 
     def __init__(
     def __init__(
@@ -391,7 +391,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
         remove_selectors: Optional[List[str]] = None,
         remove_selectors: Optional[List[str]] = None,
         proxy: Optional[Dict[str, str]] = None,
         proxy: Optional[Dict[str, str]] = None,
         playwright_ws_url: Optional[str] = None,
         playwright_ws_url: Optional[str] = None,
-        playwright_goto_timeout: Optional[int] = 10000,
+        playwright_timeout: Optional[int] = 10000,
     ):
     ):
         """Initialize with additional safety parameters and remote browser support."""
         """Initialize with additional safety parameters and remote browser support."""
 
 
@@ -418,7 +418,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
         self.last_request_time = None
         self.last_request_time = None
         self.playwright_ws_url = playwright_ws_url
         self.playwright_ws_url = playwright_ws_url
         self.trust_env = trust_env
         self.trust_env = trust_env
-        self.playwright_goto_timeout = playwright_goto_timeout
+        self.playwright_timeout = playwright_timeout
 
 
     def lazy_load(self) -> Iterator[Document]:
     def lazy_load(self) -> Iterator[Document]:
         """Safely load URLs synchronously with support for remote browser."""
         """Safely load URLs synchronously with support for remote browser."""
@@ -435,7 +435,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
                 try:
                 try:
                     self._safe_process_url_sync(url)
                     self._safe_process_url_sync(url)
                     page = browser.new_page()
                     page = browser.new_page()
-                    response = page.goto(url, timeout=self.playwright_goto_timeout)
+                    response = page.goto(url, timeout=self.playwright_timeout)
                     if response is None:
                     if response is None:
                         raise ValueError(f"page.goto() returned None for url {url}")
                         raise ValueError(f"page.goto() returned None for url {url}")
 
 
@@ -466,9 +466,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
                 try:
                 try:
                     await self._safe_process_url(url)
                     await self._safe_process_url(url)
                     page = await browser.new_page()
                     page = await browser.new_page()
-                    response = await page.goto(
-                        url, timeout=self.playwright_goto_timeout
-                    )
+                    response = await page.goto(url, timeout=self.playwright_timeout)
                     if response is None:
                     if response is None:
                         raise ValueError(f"page.goto() returned None for url {url}")
                         raise ValueError(f"page.goto() returned None for url {url}")
 
 
@@ -611,9 +609,7 @@ def get_web_loader(
     }
     }
 
 
     if RAG_WEB_LOADER_ENGINE.value == "playwright":
     if RAG_WEB_LOADER_ENGINE.value == "playwright":
-        web_loader_args["playwright_goto_timeout"] = (
-            PLAYWRIGHT_GOTO_TIMEOUT.value * 1000
-        )
+        web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value * 1000
         if PLAYWRIGHT_WS_URI.value:
         if PLAYWRIGHT_WS_URI.value:
             web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value
             web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value