serpstack.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. import json
  2. import logging
  3. from typing import List, Optional
  4. import requests
  5. from apps.rag.search.main import SearchResult, get_filtered_results
  6. from config import SRC_LOG_LEVELS
  7. log = logging.getLogger(__name__)
  8. log.setLevel(SRC_LOG_LEVELS["RAG"])
  9. def search_serpstack(
  10. api_key: str, query: str, count: int, filter_list: Optional[List[str]] = None, https_enabled: bool = True
  11. ) -> list[SearchResult]:
  12. """Search using serpstack.com's and return the results as a list of SearchResult objects.
  13. Args:
  14. api_key (str): A serpstack.com API key
  15. query (str): The query to search for
  16. https_enabled (bool): Whether to use HTTPS or HTTP for the API request
  17. """
  18. url = f"{'https' if https_enabled else 'http'}://api.serpstack.com/search"
  19. headers = {"Content-Type": "application/json"}
  20. params = {
  21. "access_key": api_key,
  22. "query": query,
  23. }
  24. response = requests.request("POST", url, headers=headers, params=params)
  25. response.raise_for_status()
  26. json_response = response.json()
  27. results = sorted(
  28. json_response.get("organic_results", []), key=lambda x: x.get("position", 0)
  29. )
  30. if filter_list:
  31. results = get_filtered_results(results, filter_list)
  32. return [
  33. SearchResult(
  34. link=result["url"], title=result.get("title"), snippet=result.get("snippet")
  35. )
  36. for result in results[:count]
  37. ]