serpstack.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import json
  2. import logging
  3. from typing import List
  4. import requests
  5. from apps.rag.search.main import SearchResult, filter_by_whitelist
  6. from config import SRC_LOG_LEVELS
  7. log = logging.getLogger(__name__)
  8. log.setLevel(SRC_LOG_LEVELS["RAG"])
  9. def search_serpstack(
  10. api_key: str, query: str, count: int, whitelist:List[str], https_enabled: bool = True
  11. ) -> list[SearchResult]:
  12. """Search using serpstack.com's and return the results as a list of SearchResult objects.
  13. Args:
  14. api_key (str): A serpstack.com API key
  15. query (str): The query to search for
  16. https_enabled (bool): Whether to use HTTPS or HTTP for the API request
  17. """
  18. url = f"{'https' if https_enabled else 'http'}://api.serpstack.com/search"
  19. headers = {"Content-Type": "application/json"}
  20. params = {
  21. "access_key": api_key,
  22. "query": query,
  23. }
  24. response = requests.request("POST", url, headers=headers, params=params)
  25. response.raise_for_status()
  26. json_response = response.json()
  27. results = sorted(
  28. json_response.get("organic_results", []), key=lambda x: x.get("position", 0)
  29. )
  30. filtered_results = filter_by_whitelist(results, whitelist)
  31. return [
  32. SearchResult(
  33. link=result["url"], title=result.get("title"), snippet=result.get("snippet")
  34. )
  35. for result in filtered_results[:count]
  36. ]