Bläddra i källkod

refac: web search

Timothy J. Baek 11 månader sedan
förälder
incheckning
fbdfb7e4fa

+ 17 - 3
backend/apps/rag/main.py

@@ -739,7 +739,11 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
     # TODO: add playwright to search the web
     if engine == "searxng":
         if app.state.config.SEARXNG_QUERY_URL:
-            return search_searxng(app.state.config.SEARXNG_QUERY_URL, query)
+            return search_searxng(
+                app.state.config.SEARXNG_QUERY_URL,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+            )
         else:
             raise Exception("No SEARXNG_QUERY_URL found in environment variables")
     elif engine == "google_pse":
@@ -751,6 +755,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
                 app.state.config.GOOGLE_PSE_API_KEY,
                 app.state.config.GOOGLE_PSE_ENGINE_ID,
                 query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
             )
         else:
             raise Exception(
@@ -758,7 +763,11 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
             )
     elif engine == "brave":
         if app.state.config.BRAVE_SEARCH_API_KEY:
-            return search_brave(app.state.config.BRAVE_SEARCH_API_KEY, query)
+            return search_brave(
+                app.state.config.BRAVE_SEARCH_API_KEY,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+            )
         else:
             raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables")
     elif engine == "serpstack":
@@ -766,13 +775,18 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
             return search_serpstack(
                 app.state.config.SERPSTACK_API_KEY,
                 query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
                 https_enabled=app.state.config.SERPSTACK_HTTPS,
             )
         else:
             raise Exception("No SERPSTACK_API_KEY found in environment variables")
     elif engine == "serper":
         if app.state.config.SERPER_API_KEY:
-            return search_serper(app.state.config.SERPER_API_KEY, query)
+            return search_serper(
+                app.state.config.SERPER_API_KEY,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+            )
         else:
             raise Exception("No SERPER_API_KEY found in environment variables")
     else:

+ 4 - 4
backend/apps/rag/search/brave.py

@@ -3,13 +3,13 @@ import logging
 import requests
 
 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])
 
 
-def search_brave(api_key: str, query: str) -> list[SearchResult]:
+def search_brave(api_key: str, query: str, count: int) -> list[SearchResult]:
     """Search using Brave's Search API and return the results as a list of SearchResult objects.
 
     Args:
@@ -22,7 +22,7 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]:
         "Accept-Encoding": "gzip",
         "X-Subscription-Token": api_key,
     }
-    params = {"q": query, "count": RAG_WEB_SEARCH_RESULT_COUNT}
+    params = {"q": query, "count": count}
 
     response = requests.get(url, headers=headers, params=params)
     response.raise_for_status()
@@ -33,5 +33,5 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]:
         SearchResult(
             link=result["url"], title=result.get("title"), snippet=result.get("snippet")
         )
-        for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
+        for result in results[:count]
     ]

+ 3 - 3
backend/apps/rag/search/google_pse.py

@@ -4,14 +4,14 @@ import logging
 import requests
 
 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])
 
 
 def search_google_pse(
-    api_key: str, search_engine_id: str, query: str
+    api_key: str, search_engine_id: str, query: str, count: int
 ) -> list[SearchResult]:
     """Search using Google's Programmable Search Engine API and return the results as a list of SearchResult objects.
 
@@ -27,7 +27,7 @@ def search_google_pse(
         "cx": search_engine_id,
         "q": query,
         "key": api_key,
-        "num": RAG_WEB_SEARCH_RESULT_COUNT,
+        "num": count,
     }
 
     response = requests.request("GET", url, headers=headers, params=params)

+ 3 - 3
backend/apps/rag/search/searxng.py

@@ -3,13 +3,13 @@ import logging
 import requests
 
 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])
 
 
-def search_searxng(query_url: str, query: str) -> list[SearchResult]:
+def search_searxng(query_url: str, query: str, count: int) -> list[SearchResult]:
     """Search a SearXNG instance for a query and return the results as a list of SearchResult objects.
 
     Args:
@@ -40,5 +40,5 @@ def search_searxng(query_url: str, query: str) -> list[SearchResult]:
         SearchResult(
             link=result["url"], title=result.get("title"), snippet=result.get("content")
         )
-        for result in sorted_results[:RAG_WEB_SEARCH_RESULT_COUNT]
+        for result in sorted_results[:count]
     ]

+ 3 - 3
backend/apps/rag/search/serper.py

@@ -4,13 +4,13 @@ import logging
 import requests
 
 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])
 
 
-def search_serper(api_key: str, query: str) -> list[SearchResult]:
+def search_serper(api_key: str, query: str, count: int) -> list[SearchResult]:
     """Search using serper.dev's API and return the results as a list of SearchResult objects.
 
     Args:
@@ -35,5 +35,5 @@ def search_serper(api_key: str, query: str) -> list[SearchResult]:
             title=result.get("title"),
             snippet=result.get("description"),
         )
-        for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
+        for result in results[:count]
     ]

+ 3 - 3
backend/apps/rag/search/serpstack.py

@@ -4,14 +4,14 @@ import logging
 import requests
 
 from apps.rag.search.main import SearchResult
-from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
+from config import SRC_LOG_LEVELS
 
 log = logging.getLogger(__name__)
 log.setLevel(SRC_LOG_LEVELS["RAG"])
 
 
 def search_serpstack(
-    api_key: str, query: str, https_enabled: bool = True
+    api_key: str, query: str, count: int, https_enabled: bool = True
 ) -> list[SearchResult]:
     """Search using serpstack.com's and return the results as a list of SearchResult objects.
 
@@ -39,5 +39,5 @@ def search_serpstack(
         SearchResult(
             link=result["url"], title=result.get("title"), snippet=result.get("snippet")
         )
-        for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
+        for result in results[:count]
     ]

+ 3 - 1
src/lib/components/chat/Messages/ResponseMessage/WebSearchResults.svelte

@@ -35,7 +35,9 @@
 					? ''
 					: 'border-b border-gray-300/30 dark:border-gray-700/50'} group/item justify-between font-normal text-gray-800 dark:text-gray-300"
 			>
-				{url}
+				<div class=" line-clamp-1">
+					{url}
+				</div>
 
 				<div
 					class=" ml-1 text-white dark:text-gray-900 group-hover/item:text-gray-600 dark:group-hover/item:text-white transition"