Browse Source

fix: Filter out invalid RAG web URLs (continued)

Rory 2 months ago
parent
commit
3db6b4352f
1 changed files with 13 additions and 4 deletions
  1. 13 4
      backend/open_webui/retrieval/web/utils.py

+ 13 - 4
backend/open_webui/retrieval/web/utils.py

@@ -42,6 +42,15 @@ def validate_url(url: Union[str, Sequence[str]]):
     else:
         return False
 
+def safe_validate_urls(url: Sequence[str]) -> Sequence[str]:
+    valid_urls = []
+    for u in url:
+        try:
+            if validate_url(u):
+                valid_urls.append(u)
+        except ValueError:
+            continue
+    return valid_urls
 
 def resolve_hostname(hostname):
     # Get address information
@@ -86,11 +95,11 @@ def get_web_loader(
     verify_ssl: bool = True,
     requests_per_second: int = 2,
 ):
-    # Check if the URL is valid
-    if not validate_url(urls):
-        raise ValueError(ERROR_MESSAGES.INVALID_URL)
+    # Check if the URLs are valid
+    safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls)
+
     return SafeWebBaseLoader(
-        urls,
+        safe_urls,
         verify_ssl=verify_ssl,
         requests_per_second=requests_per_second,
         continue_on_failure=True,