Просмотр исходного кода

feat: retrieval whole document mode

Timothy J. Baek 7 месяцев назад
Родитель
Сommit
6d764ee55e

+ 44 - 39
backend/open_webui/apps/retrieval/utils.py

@@ -317,58 +317,63 @@ def get_rag_context(
     relevant_contexts = []
     relevant_contexts = []
 
 
     for file in files:
     for file in files:
-        context = None
+        if file.get("context") == "full":
+            context = {
+                "documents": [[file["content"]]],
+                "metadatas": [[{"file_id": file["id"], "name": file["name"]}]],
+            }
+        else:
+            context = None
 
 
-        collection_names = (
-            file["collection_names"]
-            if file["type"] == "collection"
-            else [file["collection_name"]] if file["collection_name"] else []
-        )
+            collection_names = (
+                file["collection_names"]
+                if file["type"] == "collection"
+                else [file["collection_name"]] if file["collection_name"] else []
+            )
 
 
-        collection_names = set(collection_names).difference(extracted_collections)
-        if not collection_names:
-            log.debug(f"skipping {file} as it has already been extracted")
-            continue
+            collection_names = set(collection_names).difference(extracted_collections)
+            if not collection_names:
+                log.debug(f"skipping {file} as it has already been extracted")
+                continue
 
 
-        try:
-            context = None
-            if file["type"] == "text":
-                context = file["content"]
-            else:
-                if hybrid_search:
-                    try:
-                        context = query_collection_with_hybrid_search(
+            try:
+                context = None
+                if file["type"] == "text":
+                    context = file["content"]
+                else:
+                    if hybrid_search:
+                        try:
+                            context = query_collection_with_hybrid_search(
+                                collection_names=collection_names,
+                                query=query,
+                                embedding_function=embedding_function,
+                                k=k,
+                                reranking_function=reranking_function,
+                                r=r,
+                            )
+                        except Exception as e:
+                            log.debug(
+                                "Error when using hybrid search, using"
+                                " non hybrid search as fallback."
+                            )
+
+                    if (not hybrid_search) or (context is None):
+                        context = query_collection(
                             collection_names=collection_names,
                             collection_names=collection_names,
                             query=query,
                             query=query,
                             embedding_function=embedding_function,
                             embedding_function=embedding_function,
                             k=k,
                             k=k,
-                            reranking_function=reranking_function,
-                            r=r,
-                        )
-                    except Exception as e:
-                        log.debug(
-                            "Error when using hybrid search, using"
-                            " non hybrid search as fallback."
                         )
                         )
+            except Exception as e:
+                log.exception(e)
 
 
-                if (not hybrid_search) or (context is None):
-                    context = query_collection(
-                        collection_names=collection_names,
-                        query=query,
-                        embedding_function=embedding_function,
-                        k=k,
-                    )
-        except Exception as e:
-            log.exception(e)
+            extracted_collections.extend(collection_names)
 
 
         if context:
         if context:
-            relevant_contexts.append({**context, "source": file})
-
-        extracted_collections.extend(collection_names)
+            relevant_contexts.append({**context, "file": file})
 
 
     contexts = []
     contexts = []
     citations = []
     citations = []
-
     for context in relevant_contexts:
     for context in relevant_contexts:
         try:
         try:
             if "documents" in context:
             if "documents" in context:
@@ -381,7 +386,7 @@ def get_rag_context(
                 if "metadatas" in context:
                 if "metadatas" in context:
                     citations.append(
                     citations.append(
                         {
                         {
-                            "source": context["source"],
+                            "source": context["file"],
                             "document": context["documents"][0],
                             "document": context["documents"][0],
                             "metadata": context["metadatas"][0],
                             "metadata": context["metadatas"][0],
                         }
                         }

+ 1 - 0
src/lib/components/chat/Controls/Controls.svelte

@@ -36,6 +36,7 @@
 						<FileItem
 						<FileItem
 							className="w-full"
 							className="w-full"
 							{file}
 							{file}
+							edit={true}
 							url={`${file?.url}`}
 							url={`${file?.url}`}
 							name={file.name}
 							name={file.name}
 							type={file.type}
 							type={file.type}

+ 1 - 0
src/lib/components/chat/MessageInput.svelte

@@ -459,6 +459,7 @@
 												size={file?.size}
 												size={file?.size}
 												status={file.status}
 												status={file.status}
 												dismissible={true}
 												dismissible={true}
+												edit={true}
 												on:dismiss={() => {
 												on:dismiss={() => {
 													files.splice(fileIdx, 1);
 													files.splice(fileIdx, 1);
 													files = files;
 													files = files;

+ 2 - 2
src/lib/components/common/FileItem.svelte

@@ -15,7 +15,7 @@
 	export let status = 'processed';
 	export let status = 'processed';
 
 
 	export let file = null;
 	export let file = null;
-	export let enableModal = true;
+	export let edit = false;
 
 
 	export let name: string;
 	export let name: string;
 	export let type: string;
 	export let type: string;
@@ -25,7 +25,7 @@
 </script>
 </script>
 
 
 {#if file}
 {#if file}
-	<FileItemModal bind:show={showModal} bind:file />
+	<FileItemModal bind:show={showModal} bind:file {edit} />
 {/if}
 {/if}
 
 
 <div class="relative group">
 <div class="relative group">

+ 62 - 24
src/lib/components/common/FileItemModal.svelte

@@ -7,57 +7,95 @@
 	import Modal from './Modal.svelte';
 	import Modal from './Modal.svelte';
 	import XMark from '../icons/XMark.svelte';
 	import XMark from '../icons/XMark.svelte';
 	import Info from '../icons/Info.svelte';
 	import Info from '../icons/Info.svelte';
+	import Switch from './Switch.svelte';
+	import Tooltip from './Tooltip.svelte';
 
 
 	export let file;
 	export let file;
 	export let show = false;
 	export let show = false;
 
 
+	export let edit = false;
+
+	let enableFullContent = false;
+
 	onMount(() => {
 	onMount(() => {
 		console.log(file);
 		console.log(file);
+
+		if (file?.context === 'full') {
+			enableFullContent = true;
+		}
 	});
 	});
 </script>
 </script>
 
 
 <Modal bind:show size="md">
 <Modal bind:show size="md">
 	<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
 	<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
-		<div class="flex items-start justify-between pb-2">
-			<div>
-				<div class=" font-medium text-lg dark:text-gray-100">
-					<a
-						href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
-						target="_blank"
-						class="hover:underline line-clamp-1"
-					>
-						{file?.name ?? 'File'}
-					</a>
+		<div class=" pb-2">
+			<div class="flex items-start justify-between">
+				<div>
+					<div class=" font-medium text-lg dark:text-gray-100">
+						<a
+							href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
+							target="_blank"
+							class="hover:underline line-clamp-1"
+						>
+							{file?.name ?? 'File'}
+						</a>
+					</div>
 				</div>
 				</div>
 
 
 				<div>
 				<div>
-					<div class=" flex text-sm gap-1 text-gray-500">
+					<button
+						on:click={() => {
+							show = false;
+						}}
+					>
+						<XMark />
+					</button>
+				</div>
+			</div>
+
+			<div>
+				<div class="flex flex-col md:flex-row gap-1 justify-between w-full">
+					<div class=" flex flex-wrap text-sm gap-1 text-gray-500">
 						{#if file.size}
 						{#if file.size}
-							<div class="capitalize">{formatFileSize(file.size)}</div>
+							<div class="capitalize shrink-0">{formatFileSize(file.size)}</div>
 						{/if}
 						{/if}
 
 
 						{#if file.content}
 						{#if file.content}
-							<div class="capitalize">{getLineCount(file.content)} extracted lines</div>
+							<div class="capitalize shrink-0">{getLineCount(file.content)} extracted lines</div>
 
 
-							<div class="flex items-center gap-1">
+							<div class="flex items-center gap-1 shrink-0">
 								<Info />
 								<Info />
 
 
 								Formatting may be inconsistent from source.
 								Formatting may be inconsistent from source.
 							</div>
 							</div>
 						{/if}
 						{/if}
 					</div>
 					</div>
-				</div>
-			</div>
 
 
-			<div>
-				<button
-					on:click={() => {
-						show = false;
-					}}
-				>
-					<XMark />
-				</button>
+					{#if edit}
+						<div>
+							<Tooltip
+								content={enableFullContent
+									? 'Inject the entire document as context for comprehensive processing.'
+									: 'Default to segmented retrieval for focused and relevant content extraction.'}
+							>
+								<div class="flex items-center gap-1.5 text-xs">
+									{#if enableFullContent}
+										Use Entire Document
+									{:else}
+										Use Focused Retrieval
+									{/if}
+									<Switch
+										bind:state={enableFullContent}
+										on:change={(e) => {
+											file.context = e.detail ? 'full' : undefined;
+										}}
+									/>
+								</div>
+							</Tooltip>
+						</div>
+					{/if}
+				</div>
 			</div>
 			</div>
 		</div>
 		</div>