Browse Source

refac: web search settings

Timothy J. Baek 11 months ago
parent
commit
912a704fdc

+ 63 - 31
backend/apps/rag/main.py

@@ -97,9 +97,11 @@ from config import (
     ENABLE_RAG_LOCAL_WEB_FETCH,
     YOUTUBE_LOADER_LANGUAGE,
     ENABLE_RAG_WEB_SEARCH,
+    RAG_WEB_SEARCH_ENGINE,
     SEARXNG_QUERY_URL,
     GOOGLE_PSE_API_KEY,
     GOOGLE_PSE_ENGINE_ID,
+    BRAVE_SEARCH_API_KEY,
     SERPSTACK_API_KEY,
     SERPSTACK_HTTPS,
     SERPER_API_KEY,
@@ -145,9 +147,12 @@ app.state.YOUTUBE_LOADER_TRANSLATION = None
 
 
 app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
+app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE
+
 app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL
 app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY
 app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID
+app.state.config.BRAVE_SEARCH_API_KEY = BRAVE_SEARCH_API_KEY
 app.state.config.SERPSTACK_API_KEY = SERPSTACK_API_KEY
 app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
 app.state.config.SERPER_API_KEY = SERPER_API_KEY
@@ -351,23 +356,25 @@ async def get_rag_config(user=Depends(get_admin_user)):
             "chunk_size": app.state.config.CHUNK_SIZE,
             "chunk_overlap": app.state.config.CHUNK_OVERLAP,
         },
-        "web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
         "youtube": {
             "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
             "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
         },
         "web": {
+            "ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
             "search": {
                 "enable": app.state.config.ENABLE_RAG_WEB_SEARCH,
+                "engine": app.state.config.RAG_WEB_SEARCH_ENGINE,
                 "searxng_query_url": app.state.config.SEARXNG_QUERY_URL,
                 "google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY,
                 "google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID,
+                "brave_search_api_key": app.state.config.BRAVE_SEARCH_API_KEY,
                 "serpstack_api_key": app.state.config.SERPSTACK_API_KEY,
                 "serpstack_https": app.state.config.SERPSTACK_HTTPS,
                 "serper_api_key": app.state.config.SERPER_API_KEY,
                 "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
                 "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
-            }
+            },
         },
     }
 
@@ -384,9 +391,11 @@ class YoutubeLoaderConfig(BaseModel):
 
 class WebSearchConfig(BaseModel):
     enable: bool
+    engine: Optional[str] = None
     searxng_query_url: Optional[str] = None
     google_pse_api_key: Optional[str] = None
     google_pse_engine_id: Optional[str] = None
+    brave_search_api_key: Optional[str] = None
     serpstack_api_key: Optional[str] = None
     serpstack_https: Optional[bool] = None
     serper_api_key: Optional[str] = None
@@ -394,11 +403,16 @@ class WebSearchConfig(BaseModel):
     concurrent_requests: Optional[int] = None
 
 
+class WebConfig(BaseModel):
+    search: WebSearchConfig
+    web_loader_ssl_verification: Optional[bool] = None
+
+
 class ConfigUpdateForm(BaseModel):
     pdf_extract_images: Optional[bool] = None
     chunk: Optional[ChunkParamUpdateForm] = None
-    web_loader_ssl_verification: Optional[bool] = None
     youtube: Optional[YoutubeLoaderConfig] = None
+    web: Optional[WebConfig] = None
 
 
 @app.post("/config/update")
@@ -409,35 +423,36 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
         else app.state.config.PDF_EXTRACT_IMAGES
     )
 
-    app.state.config.CHUNK_SIZE = (
-        form_data.chunk.chunk_size
-        if form_data.chunk is not None
-        else app.state.config.CHUNK_SIZE
-    )
+    if form_data.chunk is not None:
+        app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size
+        app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
 
-    app.state.config.CHUNK_OVERLAP = (
-        form_data.chunk.chunk_overlap
-        if form_data.chunk is not None
-        else app.state.config.CHUNK_OVERLAP
-    )
+    if form_data.youtube is not None:
+        app.state.config.YOUTUBE_LOADER_LANGUAGE = form_data.youtube.language
+        app.state.YOUTUBE_LOADER_TRANSLATION = form_data.youtube.translation
 
-    app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
-        form_data.web_loader_ssl_verification
-        if form_data.web_loader_ssl_verification != None
-        else app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
-    )
-
-    app.state.config.YOUTUBE_LOADER_LANGUAGE = (
-        form_data.youtube.language
-        if form_data.youtube is not None
-        else app.state.config.YOUTUBE_LOADER_LANGUAGE
-    )
+    if form_data.web is not None:
+        app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
+            form_data.web.web_loader_ssl_verification
+        )
 
-    app.state.YOUTUBE_LOADER_TRANSLATION = (
-        form_data.youtube.translation
-        if form_data.youtube is not None
-        else app.state.YOUTUBE_LOADER_TRANSLATION
-    )
+        app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enable
+        app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine
+        app.state.config.SEARXNG_QUERY_URL = form_data.web.search.searxng_query_url
+        app.state.config.GOOGLE_PSE_API_KEY = form_data.web.search.google_pse_api_key
+        app.state.config.GOOGLE_PSE_ENGINE_ID = (
+            form_data.web.search.google_pse_engine_id
+        )
+        app.state.config.BRAVE_SEARCH_API_KEY = (
+            form_data.web.search.brave_search_api_key
+        )
+        app.state.config.SERPSTACK_API_KEY = form_data.web.search.serpstack_api_key
+        app.state.config.SERPSTACK_HTTPS = form_data.web.search.serpstack_https
+        app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key
+        app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = form_data.web.search.result_count
+        app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = (
+            form_data.web.search.concurrent_requests
+        )
 
     return {
         "status": True,
@@ -446,11 +461,26 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
             "chunk_size": app.state.config.CHUNK_SIZE,
             "chunk_overlap": app.state.config.CHUNK_OVERLAP,
         },
-        "web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
         "youtube": {
             "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
             "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
         },
+        "web": {
+            "ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
+            "search": {
+                "enable": app.state.config.ENABLE_RAG_WEB_SEARCH,
+                "engine": app.state.config.RAG_WEB_SEARCH_ENGINE,
+                "searxng_query_url": app.state.config.SEARXNG_QUERY_URL,
+                "google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY,
+                "google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID,
+                "brave_search_api_key": app.state.config.BRAVE_SEARCH_API_KEY,
+                "serpstack_api_key": app.state.config.SERPSTACK_API_KEY,
+                "serpstack_https": app.state.config.SERPSTACK_HTTPS,
+                "serper_api_key": app.state.config.SERPER_API_KEY,
+                "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
+                "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
+            },
+        },
     }
 
 
@@ -690,7 +720,9 @@ def resolve_hostname(hostname):
 def store_web_search(form_data: SearchForm, user=Depends(get_current_user)):
     try:
         try:
-            web_results = search_web(form_data.query)
+            web_results = search_web(
+                app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query
+            )
         except Exception as e:
             log.exception(e)
             raise HTTPException(

+ 30 - 11
backend/apps/rag/utils.py

@@ -538,7 +538,7 @@ class RerankCompressor(BaseDocumentCompressor):
         return final_results
 
 
-def search_web(query: str) -> list[SearchResult]:
+def search_web(engine: str, query: str) -> list[SearchResult]:
     """Search the web using a search engine and return the results as a list of SearchResult objects.
     Will look for a search engine API key in environment variables in the following order:
     - SEARXNG_QUERY_URL
@@ -552,15 +552,34 @@ def search_web(query: str) -> list[SearchResult]:
     """
 
     # TODO: add playwright to search the web
-    if SEARXNG_QUERY_URL:
-        return search_searxng(SEARXNG_QUERY_URL, query)
-    elif GOOGLE_PSE_API_KEY and GOOGLE_PSE_ENGINE_ID:
-        return search_google_pse(GOOGLE_PSE_API_KEY, GOOGLE_PSE_ENGINE_ID, query)
-    elif BRAVE_SEARCH_API_KEY:
-        return search_brave(BRAVE_SEARCH_API_KEY, query)
-    elif SERPSTACK_API_KEY:
-        return search_serpstack(SERPSTACK_API_KEY, query, https_enabled=SERPSTACK_HTTPS)
-    elif SERPER_API_KEY:
-        return search_serper(SERPER_API_KEY, query)
+    if engine == "searxng":
+        if SEARXNG_QUERY_URL:
+            return search_searxng(SEARXNG_QUERY_URL, query)
+        else:
+            raise Exception("No SEARXNG_QUERY_URL found in environment variables")
+    elif engine == "google_pse":
+        if GOOGLE_PSE_API_KEY and GOOGLE_PSE_ENGINE_ID:
+            return search_google_pse(GOOGLE_PSE_API_KEY, GOOGLE_PSE_ENGINE_ID, query)
+        else:
+            raise Exception(
+                "No GOOGLE_PSE_API_KEY or GOOGLE_PSE_ENGINE_ID found in environment variables"
+            )
+    elif engine == "brave":
+        if BRAVE_SEARCH_API_KEY:
+            return search_brave(BRAVE_SEARCH_API_KEY, query)
+        else:
+            raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables")
+    elif engine == "serpstack":
+        if SERPSTACK_API_KEY:
+            return search_serpstack(
+                SERPSTACK_API_KEY, query, https_enabled=SERPSTACK_HTTPS
+            )
+        else:
+            raise Exception("No SERPSTACK_API_KEY found in environment variables")
+    elif engine == "serper":
+        if SERPER_API_KEY:
+            return search_serper(SERPER_API_KEY, query)
+        else:
+            raise Exception("No SERPER_API_KEY found in environment variables")
     else:
         raise Exception("No search engine API key found in environment variables")

+ 5 - 0
backend/config.py

@@ -773,6 +773,11 @@ ENABLE_RAG_WEB_SEARCH = PersistentConfig(
     os.getenv("ENABLE_RAG_WEB_SEARCH", "False").lower() == "true",
 )
 
+RAG_WEB_SEARCH_ENGINE = PersistentConfig(
+    "RAG_WEB_SEARCH_ENGINE",
+    "rag.web.search.engine",
+    os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
+)
 
 SEARXNG_QUERY_URL = PersistentConfig(
     "SEARXNG_QUERY_URL",

+ 193 - 40
src/lib/components/documents/Settings/WebParams.svelte

@@ -1,5 +1,6 @@
 <script lang="ts">
 	import { getRAGConfig, updateRAGConfig } from '$lib/apis/rag';
+	import Switch from '$lib/components/common/Switch.svelte';
 
 	import { documents, models } from '$lib/stores';
 	import { onMount, getContext } from 'svelte';
@@ -9,14 +10,15 @@
 
 	export let saveHandler: Function;
 
-	let webLoaderSSLVerification = true;
+	let webConfig = null;
+	let webSearchEngines = ['searxng', 'google_pse', 'brave', 'serpstack', 'serper'];
 
 	let youtubeLanguage = 'en';
 	let youtubeTranslation = null;
 
 	const submitHandler = async () => {
 		const res = await updateRAGConfig(localStorage.token, {
-			web_loader_ssl_verification: webLoaderSSLVerification,
+			web: webConfig,
 			youtube: {
 				language: youtubeLanguage.split(',').map((lang) => lang.trim()),
 				translation: youtubeTranslation
@@ -28,7 +30,8 @@
 		const res = await getRAGConfig(localStorage.token);
 
 		if (res) {
-			webLoaderSSLVerification = res.web_loader_ssl_verification;
+			webConfig = res.web;
+
 			youtubeLanguage = res.youtube.language.join(',');
 			youtubeTranslation = res.youtube.translation;
 		}
@@ -37,59 +40,209 @@
 
 <form
 	class="flex flex-col h-full justify-between space-y-3 text-sm"
-	on:submit|preventDefault={() => {
-		submitHandler();
+	on:submit|preventDefault={async () => {
+		await submitHandler();
 		saveHandler();
 	}}
 >
 	<div class=" space-y-3 pr-1.5 overflow-y-scroll h-full max-h-[22rem]">
-		<div>
-			<div class=" mb-1 text-sm font-medium">
-				{$i18n.t('Web Loader Settings')}
-			</div>
-
+		{#if webConfig}
 			<div>
+				<div class=" mb-1 text-sm font-medium">
+					{$i18n.t('Web Search')}
+				</div>
+
+				<div>
+					<div class=" py-0.5 flex w-full justify-between">
+						<div class=" self-center text-xs font-medium">
+							{$i18n.t('Enable Web Search')}
+						</div>
+
+						<Switch bind:state={webConfig.search.enable} />
+					</div>
+				</div>
+
 				<div class=" py-0.5 flex w-full justify-between">
-					<div class=" self-center text-xs font-medium">
-						{$i18n.t('Bypass SSL verification for Websites')}
+					<div class=" self-center text-xs font-medium">{$i18n.t('Web Search Engine')}</div>
+					<div class="flex items-center relative">
+						<select
+							class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
+							bind:value={webConfig.search.engine}
+							placeholder="Select a engine"
+							required
+						>
+							<option disabled selected value="">Select a engine</option>
+							{#each webSearchEngines as engine}
+								<option value={engine}>{engine}</option>
+							{/each}
+						</select>
 					</div>
+				</div>
+
+				{#if webConfig.search.engine !== ''}
+					<div class="mt-1.5">
+						{#if webConfig.search.engine === 'searxng'}
+							<div>
+								<div class=" self-center text-xs font-medium mb-1">
+									{$i18n.t('Searxng Query URL')}
+								</div>
+
+								<div class="flex w-full">
+									<div class="flex-1">
+										<input
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											type="text"
+											placeholder={$i18n.t('Enter Searxng Query URL')}
+											bind:value={webConfig.search.searxng_query_url}
+											autocomplete="off"
+										/>
+									</div>
+								</div>
+							</div>
+						{:else if webConfig.search.engine === 'google_pse'}
+							<div>
+								<div class=" self-center text-xs font-medium mb-1">
+									{$i18n.t('Google PSE API Key')}
+								</div>
+
+								<div class="flex w-full">
+									<div class="flex-1">
+										<input
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											type="text"
+											placeholder={$i18n.t('Enter Google PSE API Key')}
+											bind:value={webConfig.search.google_pse_api_key}
+											autocomplete="off"
+										/>
+									</div>
+								</div>
+							</div>
+							<div class="mt-1.5">
+								<div class=" self-center text-xs font-medium mb-1">
+									{$i18n.t('Google PSE Engine Id')}
+								</div>
+
+								<div class="flex w-full">
+									<div class="flex-1">
+										<input
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											type="text"
+											placeholder={$i18n.t('Enter Google PSE Engine Id')}
+											bind:value={webConfig.search.google_pse_engine_id}
+											autocomplete="off"
+										/>
+									</div>
+								</div>
+							</div>
+						{:else if webConfig.search.engine === 'brave'}
+							<div>
+								<div class=" self-center text-xs font-medium mb-1">
+									{$i18n.t('Brave Search API Key')}
+								</div>
 
-					<button
-						class="p-1 px-3 text-xs flex rounded transition"
-						on:click={() => {
-							webLoaderSSLVerification = !webLoaderSSLVerification;
-							submitHandler();
-						}}
-						type="button"
-					>
-						{#if webLoaderSSLVerification === true}
-							<span class="ml-2 self-center">{$i18n.t('On')}</span>
-						{:else}
-							<span class="ml-2 self-center">{$i18n.t('Off')}</span>
+								<div class="flex w-full">
+									<div class="flex-1">
+										<input
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											type="text"
+											placeholder={$i18n.t('Enter Brave Search API Key')}
+											bind:value={webConfig.search.brave_search_api_key}
+											autocomplete="off"
+										/>
+									</div>
+								</div>
+							</div>
+						{:else if webConfig.search.engine === 'serpstack'}
+							<div>
+								<div class=" self-center text-xs font-medium mb-1">
+									{$i18n.t('Serpstack API Key')}
+								</div>
+
+								<div class="flex w-full">
+									<div class="flex-1">
+										<input
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											type="text"
+											placeholder={$i18n.t('Enter Serpstack API Key')}
+											bind:value={webConfig.search.serpstack_api_key}
+											autocomplete="off"
+										/>
+									</div>
+								</div>
+							</div>
+						{:else if webConfig.search.engine === 'serper'}
+							<div>
+								<div class=" self-center text-xs font-medium mb-1">
+									{$i18n.t('Serper API Key')}
+								</div>
+
+								<div class="flex w-full">
+									<div class="flex-1">
+										<input
+											class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+											type="text"
+											placeholder={$i18n.t('Enter Serper API Key')}
+											bind:value={webConfig.search.serper_api_key}
+											autocomplete="off"
+										/>
+									</div>
+								</div>
+							</div>
 						{/if}
-					</button>
-				</div>
+					</div>
+				{/if}
 			</div>
 
-			<div class=" mt-2 mb-1 text-sm font-medium">
-				{$i18n.t('Youtube Loader Settings')}
-			</div>
+			<hr class=" dark:border-gray-850 my-2" />
 
 			<div>
-				<div class=" py-0.5 flex w-full justify-between">
-					<div class=" w-20 text-xs font-medium self-center">{$i18n.t('Language')}</div>
-					<div class=" flex-1 self-center">
-						<input
-							class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
-							type="text"
-							placeholder={$i18n.t('Enter language codes')}
-							bind:value={youtubeLanguage}
-							autocomplete="off"
-						/>
+				<div class=" mb-1 text-sm font-medium">
+					{$i18n.t('Web Loader Settings')}
+				</div>
+
+				<div>
+					<div class=" py-0.5 flex w-full justify-between">
+						<div class=" self-center text-xs font-medium">
+							{$i18n.t('Bypass SSL verification for Websites')}
+						</div>
+
+						<button
+							class="p-1 px-3 text-xs flex rounded transition"
+							on:click={() => {
+								webConfig.ssl_verification = !webConfig.ssl_verification;
+								submitHandler();
+							}}
+							type="button"
+						>
+							{#if webConfig.ssl_verification === true}
+								<span class="ml-2 self-center">{$i18n.t('On')}</span>
+							{:else}
+								<span class="ml-2 self-center">{$i18n.t('Off')}</span>
+							{/if}
+						</button>
+					</div>
+				</div>
+
+				<div class=" mt-2 mb-1 text-sm font-medium">
+					{$i18n.t('Youtube Loader Settings')}
+				</div>
+
+				<div>
+					<div class=" py-0.5 flex w-full justify-between">
+						<div class=" w-20 text-xs font-medium self-center">{$i18n.t('Language')}</div>
+						<div class=" flex-1 self-center">
+							<input
+								class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+								type="text"
+								placeholder={$i18n.t('Enter language codes')}
+								bind:value={youtubeLanguage}
+								autocomplete="off"
+							/>
+						</div>
 					</div>
 				</div>
 			</div>
-		</div>
+		{/if}
 	</div>
 	<div class="flex justify-end pt-3 text-sm font-medium">
 		<button

+ 7 - 2
src/lib/components/documents/SettingsModal.svelte

@@ -1,11 +1,13 @@
 <script>
-	import { getContext } from 'svelte';
+	import { getContext, tick } from 'svelte';
 	import Modal from '../common/Modal.svelte';
 	import General from './Settings/General.svelte';
 	import ChunkParams from './Settings/ChunkParams.svelte';
 	import QueryParams from './Settings/QueryParams.svelte';
 	import WebParams from './Settings/WebParams.svelte';
 	import { toast } from 'svelte-sonner';
+	import { config } from '$lib/stores';
+	import { getBackendConfig } from '$lib/apis';
 
 	const i18n = getContext('i18n');
 
@@ -171,8 +173,11 @@
 					/>
 				{:else if selectedTab === 'web'}
 					<WebParams
-						saveHandler={() => {
+						saveHandler={async () => {
 							toast.success($i18n.t('Settings saved successfully!'));
+
+							await tick();
+							await config.set(await getBackendConfig());
 						}}
 					/>
 				{/if}