Kaynağa Gözat

Merge pull request #1756 from buroa/buroa/toggle-hybrid

feat: toggle hybrid search
Timothy Jaeryang Baek 1 yıl önce
ebeveyn
işleme
543707eefd

+ 26 - 1
backend/apps/rag/main.py

@@ -70,6 +70,7 @@ from config import (
     RAG_EMBEDDING_MODEL,
     RAG_EMBEDDING_MODEL_AUTO_UPDATE,
     RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
+    ENABLE_RAG_HYBRID_SEARCH,
     RAG_RERANKING_MODEL,
     RAG_RERANKING_MODEL_AUTO_UPDATE,
     RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
@@ -91,6 +92,9 @@ app = FastAPI()
 
 app.state.TOP_K = RAG_TOP_K
 app.state.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD
+
+app.state.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH
+
 app.state.CHUNK_SIZE = CHUNK_SIZE
 app.state.CHUNK_OVERLAP = CHUNK_OVERLAP
 
@@ -321,6 +325,7 @@ async def get_query_settings(user=Depends(get_admin_user)):
         "template": app.state.RAG_TEMPLATE,
         "k": app.state.TOP_K,
         "r": app.state.RELEVANCE_THRESHOLD,
+        "hybrid": app.state.ENABLE_RAG_HYBRID_SEARCH,
     }
 
 
@@ -328,6 +333,7 @@ class QuerySettingsForm(BaseModel):
     k: Optional[int] = None
     r: Optional[float] = None
     template: Optional[str] = None
+    hybrid: Optional[bool] = None
 
 
 @app.post("/query/settings/update")
@@ -337,7 +343,14 @@ async def update_query_settings(
     app.state.RAG_TEMPLATE = form_data.template if form_data.template else RAG_TEMPLATE
     app.state.TOP_K = form_data.k if form_data.k else 4
     app.state.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
-    return {"status": True, "template": app.state.RAG_TEMPLATE}
+    app.state.ENABLE_RAG_HYBRID_SEARCH = form_data.hybrid if form_data.hybrid else False
+    return {
+        "status": True,
+        "template": app.state.RAG_TEMPLATE,
+        "k": app.state.TOP_K,
+        "r": app.state.RELEVANCE_THRESHOLD,
+        "hybrid": app.state.ENABLE_RAG_HYBRID_SEARCH,
+    }
 
 
 class QueryDocForm(BaseModel):
@@ -345,6 +358,7 @@ class QueryDocForm(BaseModel):
     query: str
     k: Optional[int] = None
     r: Optional[float] = None
+    hybrid: Optional[bool] = None
 
 
 @app.post("/query/doc")
@@ -368,6 +382,11 @@ def query_doc_handler(
             r=form_data.r if form_data.r else app.state.RELEVANCE_THRESHOLD,
             embeddings_function=embeddings_function,
             reranking_function=app.state.sentence_transformer_rf,
+            hybrid_search=(
+                form_data.hybrid
+                if form_data.hybrid
+                else app.state.ENABLE_RAG_HYBRID_SEARCH
+            ),
         )
     except Exception as e:
         log.exception(e)
@@ -382,6 +401,7 @@ class QueryCollectionsForm(BaseModel):
     query: str
     k: Optional[int] = None
     r: Optional[float] = None
+    hybrid: Optional[bool] = None
 
 
 @app.post("/query/collection")
@@ -405,6 +425,11 @@ def query_collection_handler(
             r=form_data.r if form_data.r else app.state.RELEVANCE_THRESHOLD,
             embeddings_function=embeddings_function,
             reranking_function=app.state.sentence_transformer_rf,
+            hybrid_search=(
+                form_data.hybrid
+                if form_data.hybrid
+                else app.state.ENABLE_RAG_HYBRID_SEARCH
+            ),
         )
     except Exception as e:
         log.exception(e)

+ 18 - 18
backend/apps/rag/utils.py

@@ -18,8 +18,6 @@ from langchain.retrievers import (
     EnsembleRetriever,
 )
 
-from sentence_transformers import CrossEncoder
-
 from typing import Optional
 from config import SRC_LOG_LEVELS, CHROMA_CLIENT
 
@@ -32,16 +30,15 @@ def query_embeddings_doc(
     collection_name: str,
     query: str,
     embeddings_function,
+    reranking_function,
     k: int,
-    reranking_function: Optional[CrossEncoder] = None,
-    r: Optional[float] = None,
+    r: int,
+    hybrid_search: bool,
 ):
     try:
+        collection = CHROMA_CLIENT.get_collection(name=collection_name)
 
-        if reranking_function:
-            # if you use docker use the model from the environment variable
-            collection = CHROMA_CLIENT.get_collection(name=collection_name)
-
+        if hybrid_search:
             documents = collection.get()  # get all documents
             bm25_retriever = BM25Retriever.from_texts(
                 texts=documents.get("documents"),
@@ -77,24 +74,19 @@ def query_embeddings_doc(
                 "metadatas": [[d.metadata for d in result]],
             }
         else:
-            # if you use docker use the model from the environment variable
             query_embeddings = embeddings_function(query)
-
-            log.info(f"query_embeddings_doc {query_embeddings}")
-            collection = CHROMA_CLIENT.get_collection(name=collection_name)
-
             result = collection.query(
                 query_embeddings=[query_embeddings],
                 n_results=k,
             )
 
-            log.info(f"query_embeddings_doc:result {result}")
+        log.info(f"query_embeddings_doc:result {result}")
         return result
     except Exception as e:
         raise e
 
 
-def merge_and_sort_query_results(query_results, k):
+def merge_and_sort_query_results(query_results, k, reverse=False):
     # Initialize lists to store combined data
     combined_distances = []
     combined_documents = []
@@ -109,7 +101,7 @@ def merge_and_sort_query_results(query_results, k):
     combined = list(zip(combined_distances, combined_documents, combined_metadatas))
 
     # Sort the list based on distances
-    combined.sort(key=lambda x: x[0])
+    combined.sort(key=lambda x: x[0], reverse=reverse)
 
     # We don't have anything :-(
     if not combined:
@@ -142,6 +134,7 @@ def query_embeddings_collection(
     r: float,
     embeddings_function,
     reranking_function,
+    hybrid_search: bool,
 ):
 
     results = []
@@ -155,12 +148,14 @@ def query_embeddings_collection(
                 r=r,
                 embeddings_function=embeddings_function,
                 reranking_function=reranking_function,
+                hybrid_search=hybrid_search,
             )
             results.append(result)
         except:
             pass
 
-    return merge_and_sort_query_results(results, k)
+    reverse = hybrid and reranking_function is not None
+    return merge_and_sort_query_results(results, k=k, reverse=reverse)
 
 
 def rag_template(template: str, context: str, query: str):
@@ -211,6 +206,7 @@ def rag_messages(
     template,
     k,
     r,
+    hybrid_search,
     embedding_engine,
     embedding_model,
     embedding_function,
@@ -283,6 +279,7 @@ def rag_messages(
                     r=r,
                     embeddings_function=embeddings_function,
                     reranking_function=reranking_function,
+                    hybrid_search=hybrid_search,
                 )
             else:
                 context = query_embeddings_doc(
@@ -292,6 +289,7 @@ def rag_messages(
                     r=r,
                     embeddings_function=embeddings_function,
                     reranking_function=reranking_function,
+                    hybrid_search=hybrid_search,
                 )
         except Exception as e:
             log.exception(e)
@@ -479,7 +477,9 @@ class RerankCompressor(BaseDocumentCompressor):
                 (d, s) for d, s in docs_with_scores if s >= self.r_score
             ]
 
-        result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=True)
+        reverse = self.reranking_function is not None
+        result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=reverse)
+
         final_results = []
         for doc, doc_score in result[: self.top_n]:
             metadata = doc.metadata

+ 4 - 0
backend/config.py

@@ -423,6 +423,10 @@ CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
 RAG_TOP_K = int(os.environ.get("RAG_TOP_K", "5"))
 RAG_RELEVANCE_THRESHOLD = float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0"))
 
+ENABLE_RAG_HYBRID_SEARCH = (
+    os.environ.get("ENABLE_RAG_HYBRID_SEARCH", "").lower() == "true"
+)
+
 RAG_EMBEDDING_ENGINE = os.environ.get("RAG_EMBEDDING_ENGINE", "")
 
 RAG_EMBEDDING_MODEL = os.environ.get(

+ 1 - 0
backend/main.py

@@ -121,6 +121,7 @@ class RAGMiddleware(BaseHTTPMiddleware):
                     rag_app.state.RAG_TEMPLATE,
                     rag_app.state.TOP_K,
                     rag_app.state.RELEVANCE_THRESHOLD,
+                    rag_app.state.ENABLE_RAG_HYBRID_SEARCH,
                     rag_app.state.RAG_EMBEDDING_ENGINE,
                     rag_app.state.RAG_EMBEDDING_MODEL,
                     rag_app.state.sentence_transformer_ef,

+ 106 - 83
src/lib/components/documents/Settings/General.svelte

@@ -43,7 +43,8 @@
 	let querySettings = {
 		template: '',
 		r: 0.0,
-		k: 4
+		k: 4,
+		hybrid: false
 	};
 
 	const scanHandler = async () => {
@@ -174,6 +175,12 @@
 		}
 	};
 
+	const toggleHybridSearch = async () => {
+		querySettings.hybrid = !querySettings.hybrid;
+
+		querySettings = await updateQuerySettings(localStorage.token, querySettings);
+	};
+
 	onMount(async () => {
 		const res = await getRAGConfig(localStorage.token);
 
@@ -202,6 +209,24 @@
 		<div>
 			<div class=" mb-2 text-sm font-medium">{$i18n.t('General Settings')}</div>
 
+			<div class=" flex w-full justify-between">
+				<div class=" self-center text-xs font-medium">{$i18n.t('Hybrid Search')}</div>
+
+				<button
+					class="p-1 px-3 text-xs flex rounded transition"
+					on:click={() => {
+						toggleHybridSearch();
+					}}
+					type="button"
+				>
+					{#if querySettings.hybrid === true}
+						<span class="ml-2 self-center">{$i18n.t('On')}</span>
+					{:else}
+						<span class="ml-2 self-center">{$i18n.t('Off')}</span>
+					{/if}
+				</button>
+			</div>
+
 			<div class=" flex w-full justify-between">
 				<div class=" self-center text-xs font-medium">{$i18n.t('Embedding Model Engine')}</div>
 				<div class="flex items-center relative">
@@ -386,78 +411,74 @@
 
 				<hr class=" dark:border-gray-700 my-3" />
 
-				<div class=" ">
-					<div class=" mb-2 text-sm font-medium">{$i18n.t('Update Reranking Model')}</div>
+				{#if querySettings.hybrid === true}
+					<div class=" ">
+						<div class=" mb-2 text-sm font-medium">{$i18n.t('Update Reranking Model')}</div>
 
-					<div class="flex w-full">
-						<div class="flex-1 mr-2">
-							<input
-								class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
-								placeholder={$i18n.t('Update reranking model (e.g. {{model}})', {
-									model: rerankingModel.slice(-40)
-								})}
-								bind:value={rerankingModel}
-							/>
-						</div>
-						<button
-							class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
-							on:click={() => {
-								rerankingModelUpdateHandler();
-							}}
-							disabled={updateRerankingModelLoading}
-						>
-							{#if updateRerankingModelLoading}
-								<div class="self-center">
+						<div class="flex w-full">
+							<div class="flex-1 mr-2">
+								<input
+									class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+									placeholder={$i18n.t('Update reranking model (e.g. {{model}})', {
+										model: rerankingModel.slice(-40)
+									})}
+									bind:value={rerankingModel}
+								/>
+							</div>
+							<button
+								class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
+								on:click={() => {
+									rerankingModelUpdateHandler();
+								}}
+								disabled={updateRerankingModelLoading}
+							>
+								{#if updateRerankingModelLoading}
+									<div class="self-center">
+										<svg
+											class=" w-4 h-4"
+											viewBox="0 0 24 24"
+											fill="currentColor"
+											xmlns="http://www.w3.org/2000/svg"
+											><style>
+												.spinner_ajPY {
+													transform-origin: center;
+													animation: spinner_AtaB 0.75s infinite linear;
+												}
+												@keyframes spinner_AtaB {
+													100% {
+														transform: rotate(360deg);
+													}
+												}
+											</style><path
+												d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
+												opacity=".25"
+											/><path
+												d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
+												class="spinner_ajPY"
+											/></svg
+										>
+									</div>
+								{:else}
 									<svg
-										class=" w-4 h-4"
-										viewBox="0 0 24 24"
-										fill="currentColor"
 										xmlns="http://www.w3.org/2000/svg"
-										><style>
-											.spinner_ajPY {
-												transform-origin: center;
-												animation: spinner_AtaB 0.75s infinite linear;
-											}
-											@keyframes spinner_AtaB {
-												100% {
-													transform: rotate(360deg);
-												}
-											}
-										</style><path
-											d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
-											opacity=".25"
-										/><path
-											d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
-											class="spinner_ajPY"
-										/></svg
+										viewBox="0 0 16 16"
+										fill="currentColor"
+										class="w-4 h-4"
 									>
-								</div>
-							{:else}
-								<svg
-									xmlns="http://www.w3.org/2000/svg"
-									viewBox="0 0 16 16"
-									fill="currentColor"
-									class="w-4 h-4"
-								>
-									<path
-										d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
-									/>
-									<path
-										d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
-									/>
-								</svg>
-							{/if}
-						</button>
+										<path
+											d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
+										/>
+										<path
+											d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
+										/>
+									</svg>
+								{/if}
+							</button>
+						</div>
 					</div>
-				</div>
-
-				<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
-					{$i18n.t(
-						'Note: If you choose a reranking model, it will use that to score and rerank instead of the embedding model.'
-					)}
-				</div>
 
-				<hr class=" dark:border-gray-700 my-3" />
+					<hr class=" dark:border-gray-700 my-3" />
+				{/if}
 
 				<div class="  flex w-full justify-between">
 					<div class=" self-center text-xs font-medium">
@@ -583,25 +604,27 @@
 						</div>
 					</div>
 
-					<div class=" flex">
-						<div class="  flex w-full justify-between">
-							<div class="self-center text-xs font-medium flex-1">
-								{$i18n.t('Relevance Threshold')}
-							</div>
+					{#if querySettings.hybrid === true}
+						<div class=" flex">
+							<div class="  flex w-full justify-between">
+								<div class="self-center text-xs font-medium flex-1">
+									{$i18n.t('Relevance Threshold')}
+								</div>
 
-							<div class="self-center p-3">
-								<input
-									class=" w-full rounded-lg py-1.5 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
-									type="number"
-									step="0.01"
-									placeholder={$i18n.t('Enter Relevance Threshold')}
-									bind:value={querySettings.r}
-									autocomplete="off"
-									min="0.0"
-								/>
+								<div class="self-center p-3">
+									<input
+										class=" w-full rounded-lg py-1.5 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
+										type="number"
+										step="0.01"
+										placeholder={$i18n.t('Enter Relevance Threshold')}
+										bind:value={querySettings.r}
+										autocomplete="off"
+										min="0.0"
+									/>
+								</div>
 							</div>
 						</div>
-					</div>
+					{/if}
 
 					<div>
 						<div class=" mb-2.5 text-sm font-medium">{$i18n.t('RAG Template')}</div>