Browse Source

enh: show extracted file content

Timothy J. Baek 7 months ago
parent
commit
90ec458c4c

+ 5 - 2
backend/open_webui/apps/retrieval/main.py

@@ -725,13 +725,15 @@ def process_file(
             PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
         )
         docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
-        raw_text_content = " ".join([doc.page_content for doc in docs])
+        text_content = " ".join([doc.page_content for doc in docs])
+
+        log.debug(f"text_content: {text_content}")
 
         Files.update_files_metadata_by_id(
             form_data.file_id,
             {
                 "content": {
-                    "text": raw_text_content,
+                    "text": text_content,
                 }
             },
         )
@@ -751,6 +753,7 @@ def process_file(
                     "status": True,
                     "collection_name": collection_name,
                     "filename": file.meta.get("name", file.filename),
+                    "content": text_content,
                 }
         except Exception as e:
             raise HTTPException(

+ 6 - 0
src/lib/components/chat/MessageInput.svelte

@@ -163,6 +163,8 @@
 			if (res) {
 				fileItem.status = 'processed';
 				fileItem.collection_name = res.collection_name;
+				fileItem.content = res.content;
+
 				files = files;
 			}
 		} catch (e) {
@@ -464,6 +466,7 @@
 											</div>
 										{:else}
 											<FileItem
+												{file}
 												name={file.name}
 												type={file.type}
 												size={file?.size}
@@ -473,6 +476,9 @@
 													files.splice(fileIdx, 1);
 													files = files;
 												}}
+												on:click={() => {
+													console.log(file);
+												}}
 											/>
 										{/if}
 									{/each}

+ 13 - 14
src/lib/components/common/FileItem.svelte

@@ -1,5 +1,8 @@
 <script lang="ts">
 	import { createEventDispatcher, getContext } from 'svelte';
+	import { formatFileSize } from '$lib/utils';
+
+	import FileItemModal from './FileItemModal.svelte';
 
 	const i18n = getContext('i18n');
 	const dispatch = createEventDispatcher();
@@ -11,30 +14,26 @@
 	export let dismissible = false;
 	export let status = 'processed';
 
+	export let file = null;
+	export let enableModal = true;
+
 	export let name: string;
 	export let type: string;
 	export let size: number;
 
-	const formatSize = (size) => {
-		if (size == null) return 'Unknown size';
-		if (typeof size !== 'number' || size < 0) return 'Invalid size';
-		if (size === 0) return '0 B';
-		const units = ['B', 'KB', 'MB', 'GB', 'TB'];
-		let unitIndex = 0;
-
-		while (size >= 1024 && unitIndex < units.length - 1) {
-			size /= 1024;
-			unitIndex++;
-		}
-		return `${size.toFixed(1)} ${units[unitIndex]}`;
-	};
+	let showModal = false;
 </script>
 
+{#if file}
+	<FileItemModal bind:show={showModal} {file} />
+{/if}
+
 <div class="relative group">
 	<button
 		class="h-14 {className} flex items-center space-x-3 {colorClassName} rounded-xl border border-gray-100 dark:border-gray-800 text-left"
 		type="button"
 		on:click={async () => {
+			showModal = !showModal;
 			dispatch('click');
 		}}
 	>
@@ -111,7 +110,7 @@
 					<span class=" capitalize">{type}</span>
 				{/if}
 				{#if size}
-					<span class="capitalize">{formatSize(size)}</span>
+					<span class="capitalize">{formatFileSize(size)}</span>
 				{/if}
 			</div>
 		</div>

+ 62 - 0
src/lib/components/common/FileItemModal.svelte

@@ -0,0 +1,62 @@
+<script lang="ts">
+	import { getContext, onMount } from 'svelte';
+	import { formatFileSize, getLineCount } from '$lib/utils';
+
+	const i18n = getContext('i18n');
+
+	import Modal from './Modal.svelte';
+	import XMark from '../icons/XMark.svelte';
+	import Info from '../icons/Info.svelte';
+
+	export let file;
+	export let show = false;
+
+	onMount(() => {
+		console.log(file);
+	});
+</script>
+
+<Modal bind:show size="md">
+	<div class="font-primary px-6 py-6 w-full flex flex-col justify-center dark:text-gray-400">
+		<div class="flex items-start justify-between pb-2">
+			<div>
+				<div class=" font-medium text-lg line-clamp-1 dark:text-gray-100">
+					{file?.name ?? 'File'}
+				</div>
+
+				<div>
+					<div class=" flex text-sm gap-1 text-gray-500">
+						{#if file.size}
+							<div class="capitalize">{formatFileSize(file.size)}</div>
+							•
+						{/if}
+
+						{#if file.content}
+							<div class="capitalize">{getLineCount(file.content)} extracted lines</div>
+
+							<div class="flex items-center gap-1">
+								<Info />
+
+								Formatting may be inconsistent from source.
+							</div>
+						{/if}
+					</div>
+				</div>
+			</div>
+
+			<div>
+				<button
+					on:click={() => {
+						show = false;
+					}}
+				>
+					<XMark />
+				</button>
+			</div>
+		</div>
+
+		<div class="max-h-96 overflow-scroll scrollbar-hidden text-xs whitespace-pre-wrap">
+			{file?.content ?? 'No content'}
+		</div>
+	</div>
+</Modal>

+ 19 - 0
src/lib/components/icons/Info.svelte

@@ -0,0 +1,19 @@
+<script lang="ts">
+	export let className = 'size-4';
+	export let strokeWidth = '1.5';
+</script>
+
+<svg
+	xmlns="http://www.w3.org/2000/svg"
+	fill="none"
+	viewBox="0 0 24 24"
+	stroke-width={strokeWidth}
+	stroke="currentColor"
+	class={className}
+>
+	<path
+		stroke-linecap="round"
+		stroke-linejoin="round"
+		d="m11.25 11.25.041-.02a.75.75 0 0 1 1.063.852l-.708 2.836a.75.75 0 0 0 1.063.853l.041-.021M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Zm-9-3.75h.008v.008H12V8.25Z"
+	/>
+</svg>

+ 18 - 0
src/lib/utils/index.ts

@@ -873,3 +873,21 @@ export const createMessagesList = (history, messageId) => {
 		return [message];
 	}
 };
+
+export const formatFileSize = (size) => {
+	if (size == null) return 'Unknown size';
+	if (typeof size !== 'number' || size < 0) return 'Invalid size';
+	if (size === 0) return '0 B';
+	const units = ['B', 'KB', 'MB', 'GB', 'TB'];
+	let unitIndex = 0;
+
+	while (size >= 1024 && unitIndex < units.length - 1) {
+		size /= 1024;
+		unitIndex++;
+	}
+	return `${size.toFixed(1)} ${units[unitIndex]}`;
+};
+
+export const getLineCount = (text) => {
+	return text.split('\n').length;
+};