Timothy J. Baek 7 月之前
父節點
當前提交
351b1dbf31

+ 5 - 1
backend/open_webui/apps/retrieval/main.py

@@ -1,3 +1,5 @@
+# TODO: Merge this with the webui_app and make it a single app
+
 import json
 import logging
 import mimetypes
@@ -728,11 +730,13 @@ def process_file(
         docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
         text_content = " ".join([doc.page_content for doc in docs])
         log.debug(f"text_content: {text_content}")
+        hash = calculate_sha256_string(text_content)
 
-        Files.update_files_data_by_id(
+        Files.update_file_data_by_id(
             form_data.file_id,
             {"content": text_content},
         )
+        Files.update_file_hash_by_id(form_data.file_id, hash)
 
         try:
             result = save_docs_to_vector_db(

+ 17 - 5
backend/open_webui/apps/webui/models/files.py

@@ -20,7 +20,7 @@ class File(Base):
 
     id = Column(String, primary_key=True)
     user_id = Column(String)
-    hash = Column(String)
+    hash = Column(Text, nullable=True)
 
     filename = Column(Text)
     data = Column(JSON)
@@ -35,7 +35,7 @@ class FileModel(BaseModel):
 
     id: str
     user_id: str
-    hash: str
+    hash: Optional[str] = None
 
     filename: str
     data: dict
@@ -53,7 +53,7 @@ class FileModel(BaseModel):
 class FileModelResponse(BaseModel):
     id: str
     user_id: str
-    hash: str
+    hash: Optional[str] = None
 
     filename: str
     data: dict
@@ -65,6 +65,7 @@ class FileModelResponse(BaseModel):
 
 class FileForm(BaseModel):
     id: str
+    hash: Optional[str] = None
     filename: str
     meta: dict = {}
 
@@ -120,7 +121,18 @@ class FilesTable:
                 for file in db.query(File).filter_by(user_id=user_id).all()
             ]
 
-    def update_files_data_by_id(self, id: str, data: dict) -> Optional[FileModel]:
+    def update_file_hash_by_id(self, id: str, hash: str) -> Optional[FileModel]:
+        with get_db() as db:
+            try:
+                file = db.query(File).filter_by(id=id).first()
+                file.hash = hash
+                db.commit()
+
+                return FileModel.model_validate(file)
+            except Exception:
+                return None
+
+    def update_file_data_by_id(self, id: str, data: dict) -> Optional[FileModel]:
         with get_db() as db:
             try:
                 file = db.query(File).filter_by(id=id).first()
@@ -131,7 +143,7 @@ class FilesTable:
             except Exception:
                 return None
 
-    def update_files_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]:
+    def update_file_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]:
         with get_db() as db:
             try:
                 file = db.query(File).filter_by(id=id).first()

+ 1 - 1
backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py

@@ -20,7 +20,7 @@ depends_on: Union[str, Sequence[str], None] = None
 
 def upgrade():
     # ### commands auto generated by Alembic - please adjust! ###
-    op.add_column("file", sa.Column("hash", sa.String(), nullable=True))
+    op.add_column("file", sa.Column("hash", sa.Text(), nullable=True))
     op.add_column("file", sa.Column("data", sa.JSON(), nullable=True))
     op.add_column("file", sa.Column("updated_at", sa.BigInteger(), nullable=True))
 

+ 7 - 2
src/lib/apis/retrieval/index.ts

@@ -306,7 +306,11 @@ export interface SearchDocument {
 	filenames: string[];
 }
 
-export const processFile = async (token: string, file_id: string) => {
+export const processFile = async (
+	token: string,
+	file_id: string,
+	collection_name: string | null = null
+) => {
 	let error = null;
 
 	const res = await fetch(`${RAG_API_BASE_URL}/process/file`, {
@@ -317,7 +321,8 @@ export const processFile = async (token: string, file_id: string) => {
 			authorization: `Bearer ${token}`
 		},
 		body: JSON.stringify({
-			file_id: file_id
+			file_id: file_id,
+			collection_name: collection_name ? collection_name : undefined
 		})
 	})
 		.then(async (res) => {

+ 128 - 0
src/lib/components/workspace/Knowledge/AddContentModal.svelte

@@ -0,0 +1,128 @@
+<script lang="ts">
+	import { toast } from 'svelte-sonner';
+	import dayjs from 'dayjs';
+	import { onMount, getContext, createEventDispatcher } from 'svelte';
+	const i18n = getContext('i18n');
+	const dispatch = createEventDispatcher();
+
+	import { knowledge } from '$lib/stores';
+	import Modal from '$lib/components/common/Modal.svelte';
+	import { uploadFile } from '$lib/apis/files';
+
+	export let show = false;
+
+	let fileInputElement: HTMLInputElement;
+	let inputFiles;
+
+	const submitHandler = async () => {
+		if (inputFiles && inputFiles.length > 0) {
+			for (const file of inputFiles) {
+				console.log(file, file.name.split('.').at(-1));
+
+				const uploadedFile = uploadFile(localStorage.token, file);
+
+				if (uploadedFile) {
+					dispatch('add', uploadedFile);
+				}
+			}
+
+			inputFiles = null;
+			fileInputElement.value = '';
+
+			show = false;
+		} else {
+			toast.error($i18n.t(`File not found.`));
+		}
+	};
+</script>
+
+<Modal size="sm" bind:show>
+	<div>
+		<div class=" flex justify-between dark:text-gray-300 px-5 pt-4">
+			<div class=" text-lg font-medium self-center">{$i18n.t('Add Content')}</div>
+			<button
+				class="self-center"
+				on:click={() => {
+					show = false;
+				}}
+			>
+				<svg
+					xmlns="http://www.w3.org/2000/svg"
+					viewBox="0 0 20 20"
+					fill="currentColor"
+					class="w-5 h-5"
+				>
+					<path
+						d="M6.28 5.22a.75.75 0 00-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 101.06 1.06L10 11.06l3.72 3.72a.75.75 0 101.06-1.06L11.06 10l3.72-3.72a.75.75 0 00-1.06-1.06L10 8.94 6.28 5.22z"
+					/>
+				</svg>
+			</button>
+		</div>
+		<div class="flex flex-col md:flex-row w-full px-5 py-4 md:space-x-4 dark:text-gray-200">
+			<div class=" flex flex-col w-full sm:flex-row sm:justify-center sm:space-x-6">
+				<form
+					class="flex flex-col w-full"
+					on:submit|preventDefault={() => {
+						submitHandler();
+					}}
+				>
+					<div class="mb-3 w-full">
+						<input
+							id="upload-doc-input"
+							bind:this={fileInputElement}
+							bind:files={inputFiles}
+							type="file"
+							multiple
+							hidden
+						/>
+
+						<button
+							class="w-full text-sm font-medium py-3 bg-gray-100 hover:bg-gray-200 dark:bg-gray-850 dark:hover:bg-gray-800 text-center rounded-xl"
+							type="button"
+							on:click={() => {
+								fileInputElement.click();
+							}}
+						>
+							{#if inputFiles}
+								{inputFiles.length > 0 ? `${inputFiles.length}` : ''} document(s) selected.
+							{:else}
+								{$i18n.t('Click here to select files.')}
+							{/if}
+						</button>
+					</div>
+
+					<div class="flex justify-end text-sm font-medium">
+						<button
+							class=" px-4 py-2 bg-emerald-700 hover:bg-emerald-800 text-gray-100 transition rounded-lg"
+							type="submit"
+						>
+							{$i18n.t('Save')}
+						</button>
+					</div>
+				</form>
+			</div>
+		</div>
+	</div>
+</Modal>
+
+<style>
+	input::-webkit-outer-spin-button,
+	input::-webkit-inner-spin-button {
+		/* display: none; <- Crashes Chrome on hover */
+		-webkit-appearance: none;
+		margin: 0; /* <-- Apparently some margin are still there even though it's hidden */
+	}
+
+	.tabs::-webkit-scrollbar {
+		display: none; /* for Chrome, Safari and Opera */
+	}
+
+	.tabs {
+		-ms-overflow-style: none; /* IE and Edge */
+		scrollbar-width: none; /* Firefox */
+	}
+
+	input[type='number'] {
+		-moz-appearance: textfield; /* Firefox */
+	}
+</style>

+ 67 - 10
src/lib/components/workspace/Knowledge/Item.svelte

@@ -16,6 +16,10 @@
 	import Badge from '$lib/components/common/Badge.svelte';
 	import Files from './Files.svelte';
 	import AddFilesPlaceholder from '$lib/components/AddFilesPlaceholder.svelte';
+	import AddContentModal from './AddContentModal.svelte';
+	import { transcribeAudio } from '$lib/apis/audio';
+	import { blobToFile } from '$lib/utils';
+	import { processFile } from '$lib/apis/retrieval';
 
 	let largeScreen = true;
 
@@ -60,6 +64,59 @@
 		}, 1000);
 	};
 
+	const uploadFileHandler = async (file) => {
+		console.log(file);
+
+		// Check if the file is an audio file and transcribe/convert it to text file
+		if (['audio/mpeg', 'audio/wav', 'audio/ogg', 'audio/x-m4a'].includes(file['type'])) {
+			const res = await transcribeAudio(localStorage.token, file).catch((error) => {
+				toast.error(error);
+				return null;
+			});
+
+			if (res) {
+				console.log(res);
+				const blob = new Blob([res.text], { type: 'text/plain' });
+				file = blobToFile(blob, `${file.name}.txt`);
+			}
+		}
+
+		try {
+			const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => {
+				toast.error(e);
+			});
+
+			if (uploadedFile) {
+				const processedFile = await processFile(localStorage.token, uploadedFile.id, id).catch(
+					(e) => {
+						toast.error(e);
+					}
+				);
+
+				if (processedFile.status) {
+					knowledge.data.file_ids = [...(knowledge.data.file_ids ?? []), uploadedFile.id];
+
+					const updatedKnowledge = await updateKnowledgeById(localStorage.token, id, {
+						data: knowledge.data
+					}).catch((e) => {
+						toast.error(e);
+					});
+
+					if (updatedKnowledge) {
+						knowledge = updatedKnowledge;
+						toast.success($i18n.t('File added successfully.'));
+					}
+				} else {
+					toast.error($i18n.t('Failed to process file.'));
+				}
+			} else {
+				toast.error($i18n.t('Failed to upload file.'));
+			}
+		} catch (e) {
+			toast.error(e);
+		}
+	};
+
 	onMount(async () => {
 		// listen to resize 1024px
 		const mediaQuery = window.matchMedia('(min-width: 1024px)');
@@ -78,7 +135,8 @@
 		id = $page.params.id;
 
 		const res = await getKnowledgeById(localStorage.token, id).catch((e) => {
-			console.error(e);
+			toast.error(e);
+			return null;
 		});
 
 		if (res) {
@@ -102,19 +160,11 @@
 			e.preventDefault();
 
 			if (e.dataTransfer?.files) {
-				let reader = new FileReader();
 				const inputFiles = e.dataTransfer?.files;
 
 				if (inputFiles && inputFiles.length > 0) {
 					for (const file of inputFiles) {
-						console.log(file, file.name.split('.').at(-1));
-						const uploadedFile = await uploadFile(localStorage.token, file).catch((e) => {
-							toast.error(e);
-						});
-
-						if (uploadedFile) {
-							knowledge.data.file_ids = [...(knowledge.data.file_ids ?? []), uploadedFile.id];
-						}
+						await uploadFileHandler(file);
 					}
 				} else {
 					toast.error($i18n.t(`File not found.`));
@@ -161,6 +211,13 @@
 	</div>
 {/if}
 
+<AddContentModal
+	bind:show={showAddContentModal}
+	on:add={(e) => {
+		console.log(e);
+	}}
+/>
+
 <div class="flex flex-col w-full max-h-[100dvh] h-full">
 	<button
 		class="flex space-x-1"