|
@@ -29,7 +29,7 @@ from open_webui.constants import ERROR_MESSAGES
|
|
|
from open_webui.config import (
|
|
|
ENABLE_RAG_LOCAL_WEB_FETCH,
|
|
|
PLAYWRIGHT_WS_URI,
|
|
|
- PLAYWRIGHT_GOTO_TIMEOUT,
|
|
|
+ PLAYWRIGHT_TIMEOUT,
|
|
|
RAG_WEB_LOADER_ENGINE,
|
|
|
FIRECRAWL_API_BASE_URL,
|
|
|
FIRECRAWL_API_KEY,
|
|
@@ -377,7 +377,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|
|
headless (bool): If True, the browser will run in headless mode.
|
|
|
proxy (dict): Proxy override settings for the Playwright session.
|
|
|
playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection.
|
|
|
- playwright_goto_timeout (Optional[int]): Maximum operation time in milliseconds.
|
|
|
+ playwright_timeout (Optional[int]): Maximum operation time in milliseconds.
|
|
|
"""
|
|
|
|
|
|
def __init__(
|
|
@@ -391,7 +391,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|
|
remove_selectors: Optional[List[str]] = None,
|
|
|
proxy: Optional[Dict[str, str]] = None,
|
|
|
playwright_ws_url: Optional[str] = None,
|
|
|
- playwright_goto_timeout: Optional[int] = 10000,
|
|
|
+ playwright_timeout: Optional[int] = 10000,
|
|
|
):
|
|
|
"""Initialize with additional safety parameters and remote browser support."""
|
|
|
|
|
@@ -418,7 +418,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|
|
self.last_request_time = None
|
|
|
self.playwright_ws_url = playwright_ws_url
|
|
|
self.trust_env = trust_env
|
|
|
- self.playwright_goto_timeout = playwright_goto_timeout
|
|
|
+ self.playwright_timeout = playwright_timeout
|
|
|
|
|
|
def lazy_load(self) -> Iterator[Document]:
|
|
|
"""Safely load URLs synchronously with support for remote browser."""
|
|
@@ -435,7 +435,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|
|
try:
|
|
|
self._safe_process_url_sync(url)
|
|
|
page = browser.new_page()
|
|
|
- response = page.goto(url, timeout=self.playwright_goto_timeout)
|
|
|
+ response = page.goto(url, timeout=self.playwright_timeout)
|
|
|
if response is None:
|
|
|
raise ValueError(f"page.goto() returned None for url {url}")
|
|
|
|
|
@@ -466,9 +466,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader, RateLimitMixin, URLProcessing
|
|
|
try:
|
|
|
await self._safe_process_url(url)
|
|
|
page = await browser.new_page()
|
|
|
- response = await page.goto(
|
|
|
- url, timeout=self.playwright_goto_timeout
|
|
|
- )
|
|
|
+ response = await page.goto(url, timeout=self.playwright_timeout)
|
|
|
if response is None:
|
|
|
raise ValueError(f"page.goto() returned None for url {url}")
|
|
|
|
|
@@ -611,9 +609,7 @@ def get_web_loader(
|
|
|
}
|
|
|
|
|
|
if RAG_WEB_LOADER_ENGINE.value == "playwright":
|
|
|
- web_loader_args["playwright_goto_timeout"] = (
|
|
|
- PLAYWRIGHT_GOTO_TIMEOUT.value * 1000
|
|
|
- )
|
|
|
+ web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value * 1000
|
|
|
if PLAYWRIGHT_WS_URI.value:
|
|
|
web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value
|
|
|
|