Browse Source

Add searchapi as an alternative web search

Add config changes for SearchApi api key and engine

Add searchapi results json in testdata
Rajendra Kadam 8 months ago
parent
commit
7e1923fcfe

+ 1 - 1
README.md

@@ -37,7 +37,7 @@ Open WebUI is an [extensible](https://github.com/open-webui/pipelines), feature-
 
 - 📚 **Local RAG Integration**: Dive into the future of chat interactions with groundbreaking Retrieval Augmented Generation (RAG) support. This feature seamlessly integrates document interactions into your chat experience. You can load documents directly into the chat or add files to your document library, effortlessly accessing them using the `#` command before a query.
 
-- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo` and `TavilySearch` and inject the results directly into your chat experience.
+- 🔍 **Web Search for RAG**: Perform web searches using providers like `SearXNG`, `Google PSE`, `Brave Search`, `serpstack`, `serper`, `Serply`, `DuckDuckGo`, `TavilySearch` and `SearchApi` and inject the results directly into your chat experience.
 
 - 🌐 **Web Browsing Capability**: Seamlessly integrate websites into your chat experience using the `#` command followed by a URL. This feature allows you to incorporate web content directly into your conversations, enhancing the richness and depth of your interactions.
 

+ 27 - 0
backend/apps/rag/main.py

@@ -76,6 +76,7 @@ from apps.rag.search.serply import search_serply
 from apps.rag.search.duckduckgo import search_duckduckgo
 from apps.rag.search.tavily import search_tavily
 from apps.rag.search.jina_search import search_jina
+from apps.rag.search.searchapi import search_searchapi
 
 from utils.misc import (
     calculate_sha256,
@@ -128,6 +129,8 @@ from config import (
     SERPER_API_KEY,
     SERPLY_API_KEY,
     TAVILY_API_KEY,
+    SEARCHAPI_API_KEY,
+    SEARCHAPI_ENGINE,
     RAG_WEB_SEARCH_RESULT_COUNT,
     RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
     RAG_EMBEDDING_OPENAI_BATCH_SIZE,
@@ -189,6 +192,8 @@ app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
 app.state.config.SERPER_API_KEY = SERPER_API_KEY
 app.state.config.SERPLY_API_KEY = SERPLY_API_KEY
 app.state.config.TAVILY_API_KEY = TAVILY_API_KEY
+app.state.config.SEARCHAPI_API_KEY = SEARCHAPI_API_KEY
+app.state.config.SEARCHAPI_ENGINE = SEARCHAPI_ENGINE
 app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
 app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
 
@@ -427,6 +432,8 @@ async def get_rag_config(user=Depends(get_admin_user)):
                 "serper_api_key": app.state.config.SERPER_API_KEY,
                 "serply_api_key": app.state.config.SERPLY_API_KEY,
                 "tavily_api_key": app.state.config.TAVILY_API_KEY,
+                "searchapi_api_key": app.state.config.SEARCHAPI_API_KEY,
+                "seaarchapi_engine": app.state.config.SEARCHAPI_ENGINE,
                 "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
                 "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
             },
@@ -466,6 +473,8 @@ class WebSearchConfig(BaseModel):
     serper_api_key: Optional[str] = None
     serply_api_key: Optional[str] = None
     tavily_api_key: Optional[str] = None
+    searchapi_api_key: Optional[str] = None
+    searchapi_engine: Optional[str] = None
     result_count: Optional[int] = None
     concurrent_requests: Optional[int] = None
 
@@ -529,6 +538,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
         app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
         app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key
         app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key
+        app.state.config.SEARCHAPI_API_KEY = form_data.web.search.searchapi_api_key
+        app.state.config.SEARCHAPI_ENGINE = (
+            form_data.web.search.searchapi_engine
+        )
         app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
         app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
             form_data.web.search.concurrent_requests
@@ -566,6 +579,8 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
                 "serpstack_https": app.state.config.SERPSTACK_HTTPS,
                 "serper_api_key": app.state.config.SERPER_API_KEY,
                 "serply_api_key": app.state.config.SERPLY_API_KEY,
+                "serachapi_api_key": app.state.config.SEARCHAPI_API_KEY,
+                "searchapi_engine": app.state.config.SEARCHAPI_ENGINE,
                 "tavily_api_key": app.state.config.TAVILY_API_KEY,
                 "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
                 "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
@@ -817,6 +832,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
     - SERPER_API_KEY
     - SERPLY_API_KEY
     - TAVILY_API_KEY
+    - SEARCHAPI_API_KEY + SEARCHAPI_ENGINE (by default `google`)
     Args:
         query (str): The query to search for
     """
@@ -904,6 +920,17 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
             )
         else:
             raise Exception("No TAVILY_API_KEY found in environment variables")
+    elif engine == "searchapi":
+        if app.state.config.SEARCHAPI_API_KEY:
+            return search_searchapi(
+                app.state.config.SEARCHAPI_API_KEY,
+                app.state.config.SEARCHAPI_ENGINE,
+                query,
+                app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
+            )
+        else:
+            raise Exception("No SEARCHAPI_API_KEY found in environment variables")
     elif engine == "jina":
         return search_jina(query, app.state.config.RAG_WEB_SEARCH_RESULT_COUNT)
     else:

+ 2 - 1
backend/apps/rag/search/main.py

@@ -8,7 +8,8 @@ def get_filtered_results(results, filter_list):
         return results
     filtered_results = []
     for result in results:
-        domain = urlparse(result["url"]).netloc
+        url = result.get("url") or result.get("link", "")
+        domain = urlparse(url).netloc
         if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
             filtered_results.append(result)
     return filtered_results

+ 50 - 0
backend/apps/rag/search/searchapi.py

@@ -0,0 +1,50 @@
+import json
+import logging
+from typing import Optional
+import requests
+from urllib.parse import urlencode
+
+from apps.rag.search.main import SearchResult, get_filtered_results
+from config import SRC_LOG_LEVELS
+
+log = logging.getLogger(__name__)
+log.setLevel(SRC_LOG_LEVELS["RAG"])
+
+def search_searchapi(
+    api_key: str, engine: str, query: str, count: int, filter_list: Optional[list[str]] = None
+) -> list[SearchResult]:
+  """Search using searchapi.io's API and return the results as a list of SearchResult objects.
+
+  Args:
+    api_key (str): A searchapi.io API key
+    query (str): The query to search for
+  """
+  url = "https://www.searchapi.io/api/v1/search"
+
+  engine = engine or "google"
+
+  payload = {
+    "engine": engine,
+    "q": query,
+    "api_key": api_key
+  }
+
+  url = f"{url}?{urlencode(payload)}"
+  response = requests.request("GET", url)
+
+  json_response = response.json()
+  log.info(f"results from searchapi search: {json_response}")
+
+  results = sorted(
+    json_response.get("organic_results", []), key=lambda x: x.get("position", 0)
+  )
+  if filter_list:
+    results = get_filtered_results(results, filter_list)
+  return [
+    SearchResult(
+      link=result["link"],
+      title=result["title"],
+      snippet=result["snippet"]
+    )
+    for result in results[:count]
+  ]

File diff suppressed because it is too large
+ 79 - 0
backend/apps/rag/search/testdata/searchapi.json


+ 12 - 0
backend/config.py

@@ -1237,6 +1237,18 @@ TAVILY_API_KEY = PersistentConfig(
     os.getenv("TAVILY_API_KEY", ""),
 )
 
+SEARCHAPI_API_KEY = PersistentConfig(
+    "SEARCHAPI_API_KEY",
+    "rag.web.search.searchapi_api_key",
+    os.getenv("SEARCHAPI_API_KEY", ""),
+)
+
+SEARCHAPI_ENGINE = PersistentConfig(
+    "SEARCHAPI_ENGINE",
+    "rag.web.search.searchapi_engine",
+    os.getenv("SEARCHAPI_ENGINE", ""),
+)
+
 RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig(
     "RAG_WEB_SEARCH_RESULT_COUNT",
     "rag.web.search.result_count",

+ 29 - 0
src/lib/components/admin/Settings/WebSearch.svelte

@@ -19,6 +19,7 @@
 		'serpstack',
 		'serper',
 		'serply',
+		'searchapi',
 		'duckduckgo',
 		'tavily',
 		'jina'
@@ -182,6 +183,34 @@
 									bind:value={webConfig.search.serply_api_key}
 								/>
 							</div>
+						{:else if webConfig.search.engine === 'searchapi'}
+							<div>
+								<div class=" self-center text-xs font-medium mb-1">
+									{$i18n.t('SearchApi API Key')}
+								</div>
+
+								<SensitiveInput
+									placeholder={$i18n.t('Enter SearchApi API Key')}
+									bind:value={webConfig.search.searchapi_api_key}
+								/>
+							</div>
+							<div class="mt-1.5">
+								<div class=" self-center text-xs font-medium mb-1">
+									{$i18n.t('SearchApi Engine')}
+								</div>
+
+								<div class="flex w-full">
+									<div class="flex-1">
+										<input
+											class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
+											type="text"
+											placeholder={$i18n.t('Enter SearchApi Engine')}
+											bind:value={webConfig.search.searchapi_engine}
+											autocomplete="off"
+										/>
+									</div>
+								</div>
+							</div>
 						{:else if webConfig.search.engine === 'tavily'}
 							<div>
 								<div class=" self-center text-xs font-medium mb-1">

Some files were not shown because too many files changed in this diff