searxng.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import logging
  2. import requests
  3. from apps.rag.search.main import SearchResult
  4. from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
  5. log = logging.getLogger(__name__)
  6. log.setLevel(SRC_LOG_LEVELS["RAG"])
  7. def search_searxng(query_url: str, query: str) -> list[SearchResult]:
  8. """Search a SearXNG instance for a query and return the results as a list of SearchResult objects.
  9. Args:
  10. query_url (str): The URL of the SearXNG instance to search. Must contain "<query>" as a placeholder
  11. query (str): The query to search for
  12. """
  13. url = query_url.replace("<query>", query)
  14. if "&format=json" not in url:
  15. url += "&format=json"
  16. log.debug(f"searching {url}")
  17. r = requests.get(
  18. url,
  19. headers={
  20. "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot",
  21. "Accept": "text/html",
  22. "Accept-Encoding": "gzip, deflate",
  23. "Accept-Language": "en-US,en;q=0.5",
  24. "Connection": "keep-alive",
  25. },
  26. )
  27. r.raise_for_status()
  28. json_response = r.json()
  29. results = json_response.get("results", [])
  30. sorted_results = sorted(results, key=lambda x: x.get("score", 0), reverse=True)
  31. return [
  32. SearchResult(
  33. link=result["url"], title=result.get("title"), snippet=result.get("content")
  34. )
  35. for result in sorted_results[:RAG_WEB_SEARCH_RESULT_COUNT]
  36. ]