|
@@ -48,16 +48,6 @@ def validate_url(url: Union[str, Sequence[str]]):
|
|
|
else:
|
|
|
return False
|
|
|
|
|
|
-def safe_validate_urls(url: Sequence[str]) -> Sequence[str]:
|
|
|
- valid_urls = []
|
|
|
- for u in url:
|
|
|
- try:
|
|
|
- if validate_url(u):
|
|
|
- valid_urls.append(u)
|
|
|
- except ValueError:
|
|
|
- continue
|
|
|
- return valid_urls
|
|
|
-
|
|
|
def resolve_hostname(hostname):
|
|
|
# Get address information
|
|
|
addr_info = socket.getaddrinfo(hostname, None)
|
|
@@ -253,11 +243,12 @@ def get_web_loader(
|
|
|
verify_ssl: bool = True,
|
|
|
requests_per_second: int = 2,
|
|
|
):
|
|
|
- # Check if the URLs are valid
|
|
|
- safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls)
|
|
|
+ # Check if the URL is valid
|
|
|
+ if not validate_url(urls):
|
|
|
+ raise ValueError(ERROR_MESSAGES.INVALID_URL)
|
|
|
|
|
|
web_loader_args = {
|
|
|
- "urls": safe_urls,
|
|
|
+ "urls": urls,
|
|
|
"verify_ssl": verify_ssl,
|
|
|
"requests_per_second": requests_per_second,
|
|
|
"continue_on_failure": True
|
|
@@ -270,6 +261,6 @@ def get_web_loader(
|
|
|
WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value]
|
|
|
web_loader = WebLoaderClass(**web_loader_args)
|
|
|
|
|
|
- log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls))
|
|
|
+ log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(urls))
|
|
|
|
|
|
return web_loader
|