serpstack.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import json
  2. import logging
  3. from typing import Optional
  4. import requests
  5. from apps.rag.search.main import SearchResult, get_filtered_results
  6. from config import SRC_LOG_LEVELS
  7. log = logging.getLogger(__name__)
  8. log.setLevel(SRC_LOG_LEVELS["RAG"])
  9. def search_serpstack(
  10. api_key: str,
  11. query: str,
  12. count: int,
  13. filter_list: Optional[list[str]] = None,
  14. https_enabled: bool = True,
  15. ) -> list[SearchResult]:
  16. """Search using serpstack.com's and return the results as a list of SearchResult objects.
  17. Args:
  18. api_key (str): A serpstack.com API key
  19. query (str): The query to search for
  20. https_enabled (bool): Whether to use HTTPS or HTTP for the API request
  21. """
  22. url = f"{'https' if https_enabled else 'http'}://api.serpstack.com/search"
  23. headers = {"Content-Type": "application/json"}
  24. params = {
  25. "access_key": api_key,
  26. "query": query,
  27. }
  28. response = requests.request("POST", url, headers=headers, params=params)
  29. response.raise_for_status()
  30. json_response = response.json()
  31. results = sorted(
  32. json_response.get("organic_results", []), key=lambda x: x.get("position", 0)
  33. )
  34. if filter_list:
  35. results = get_filtered_results(results, filter_list)
  36. return [
  37. SearchResult(
  38. link=result["url"], title=result.get("title"), snippet=result.get("snippet")
  39. )
  40. for result in results[:count]
  41. ]