google_pse.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. import json
  2. import logging
  3. from typing import List, Optional
  4. import requests
  5. from apps.rag.search.main import SearchResult, get_filtered_results
  6. from config import SRC_LOG_LEVELS
  7. log = logging.getLogger(__name__)
  8. log.setLevel(SRC_LOG_LEVELS["RAG"])
  9. def search_google_pse(
  10. api_key: str,
  11. search_engine_id: str,
  12. query: str,
  13. count: int,
  14. filter_list: Optional[List[str]] = None,
  15. ) -> list[SearchResult]:
  16. """Search using Google's Programmable Search Engine API and return the results as a list of SearchResult objects.
  17. Args:
  18. api_key (str): A Programmable Search Engine API key
  19. search_engine_id (str): A Programmable Search Engine ID
  20. query (str): The query to search for
  21. """
  22. url = "https://www.googleapis.com/customsearch/v1"
  23. headers = {"Content-Type": "application/json"}
  24. params = {
  25. "cx": search_engine_id,
  26. "q": query,
  27. "key": api_key,
  28. "num": count,
  29. }
  30. response = requests.request("GET", url, headers=headers, params=params)
  31. response.raise_for_status()
  32. json_response = response.json()
  33. results = json_response.get("items", [])
  34. if filter_list:
  35. results = get_filtered_results(results, filter_list)
  36. return [
  37. SearchResult(
  38. link=result["link"],
  39. title=result.get("title"),
  40. snippet=result.get("snippet"),
  41. )
  42. for result in results
  43. ]