Timothy J. Baek hai 7 meses
pai
achega
b291271df3

+ 10 - 4
backend/open_webui/apps/retrieval/main.py

@@ -731,7 +731,7 @@ def process_file(
 
         collection_name = form_data.collection_name
         if collection_name is None:
-            collection_name = file.id
+            collection_name = f"file-{file.id}"
 
         loader = Loader(
             engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
@@ -758,12 +758,11 @@ def process_file(
         log.debug(f"text_content: {text_content}")
         hash = calculate_sha256_string(text_content)
 
-        res = Files.update_file_data_by_id(
+        Files.update_file_data_by_id(
             file.id,
             {"content": text_content},
         )
-        print(res)
-        Files.update_file_hash_by_id(form_data.file_id, hash)
+        Files.update_file_hash_by_id(file.id, hash)
 
         try:
             result = save_docs_to_vector_db(
@@ -778,6 +777,13 @@ def process_file(
             )
 
             if result:
+                Files.update_file_metadata_by_id(
+                    file.id,
+                    {
+                        "collection_name": collection_name,
+                    },
+                )
+
                 return {
                     "status": True,
                     "collection_name": collection_name,

+ 1 - 1
backend/open_webui/apps/retrieval/utils.py

@@ -319,7 +319,7 @@ def get_rag_context(
     for file in files:
         if file.get("context") == "full":
             context = {
-                "documents": [[file.get("file").get("content")]],
+                "documents": [[file.get("file").get("data", {}).get("content")]],
                 "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
             }
         else:

+ 9 - 1
backend/open_webui/apps/webui/routers/files.py

@@ -6,7 +6,8 @@ from pathlib import Path
 from typing import Optional
 
 from open_webui.apps.webui.models.files import FileForm, FileModel, Files
-from open_webui.apps.webui.models.knowledge import Knowledges
+from open_webui.apps.retrieval.main import process_file, ProcessFileForm
+
 from open_webui.config import UPLOAD_DIR
 from open_webui.constants import ERROR_MESSAGES
 from open_webui.env import SRC_LOG_LEVELS
@@ -61,6 +62,13 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)):
             ),
         )
 
+        try:
+            process_file(ProcessFileForm(file_id=id))
+            file = Files.get_file_by_id(id=id)
+        except Exception as e:
+            log.exception(e)
+            log.error(f"Error processing file: {file.id}")
+
         if file:
             return file
         else:

+ 7 - 3
backend/open_webui/apps/webui/routers/knowledge.py

@@ -17,7 +17,6 @@ from open_webui.utils.utils import get_admin_user, get_verified_user
 
 from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
 
-
 router = APIRouter()
 
 ############################
@@ -132,7 +131,7 @@ class KnowledgeFileIdForm(BaseModel):
 
 
 @router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse])
-async def add_file_to_knowledge_by_id(
+def add_file_to_knowledge_by_id(
     id: str,
     form_data: KnowledgeFileIdForm,
     user=Depends(get_admin_user),
@@ -144,6 +143,11 @@ async def add_file_to_knowledge_by_id(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=ERROR_MESSAGES.NOT_FOUND,
         )
+    if not file.data:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=ERROR_MESSAGES.FILE_NOT_PROCESSED,
+        )
 
     if knowledge:
         data = knowledge.data or {}
@@ -191,7 +195,7 @@ class KnowledgeFileIdForm(BaseModel):
 
 
 @router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse])
-async def remove_file_from_knowledge_by_id(
+def remove_file_from_knowledge_by_id(
     id: str,
     form_data: KnowledgeFileIdForm,
     user=Depends(get_admin_user),

+ 1 - 0
backend/open_webui/constants.py

@@ -95,6 +95,7 @@ class ERROR_MESSAGES(str, Enum):
     )
 
     DUPLICATE_CONTENT = "The content provided is a duplicate. Please ensure that the content is unique before proceeding."
+    FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding."
 
 
 class TASKS(str, Enum):

+ 70 - 0
src/lib/apis/knowledge/index.ts

@@ -138,6 +138,76 @@ export const updateKnowledgeById = async (token: string, id: string, form: Knowl
 	return res;
 };
 
+export const addFileToKnowledgeById = async (token: string, id: string, fileId: string) => {
+	let error = null;
+
+	const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/add`, {
+		method: 'POST',
+		headers: {
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
+			authorization: `Bearer ${token}`
+		},
+		body: JSON.stringify({
+			file_id: fileId
+		})
+	})
+		.then(async (res) => {
+			if (!res.ok) throw await res.json();
+			return res.json();
+		})
+		.then((json) => {
+			return json;
+		})
+		.catch((err) => {
+			error = err.detail;
+
+			console.log(err);
+			return null;
+		});
+
+	if (error) {
+		throw error;
+	}
+
+	return res;
+};
+
+export const removeFileFromKnowledgeById = async (token: string, id: string, fileId: string) => {
+	let error = null;
+
+	const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/remove`, {
+		method: 'POST',
+		headers: {
+			Accept: 'application/json',
+			'Content-Type': 'application/json',
+			authorization: `Bearer ${token}`
+		},
+		body: JSON.stringify({
+			file_id: fileId
+		})
+	})
+		.then(async (res) => {
+			if (!res.ok) throw await res.json();
+			return res.json();
+		})
+		.then((json) => {
+			return json;
+		})
+		.catch((err) => {
+			error = err.detail;
+
+			console.log(err);
+			return null;
+		});
+
+	if (error) {
+		throw error;
+	}
+
+	return res;
+};
+
 export const deleteKnowledgeById = async (token: string, id: string) => {
 	let error = null;
 

+ 1 - 1
src/lib/components/chat/Controls/Controls.svelte

@@ -35,7 +35,7 @@
 					{#each chatFiles as file, fileIdx}
 						<FileItem
 							className="w-full"
-							{file}
+							item={file}
 							edit={true}
 							url={`${file?.url}`}
 							name={file.name}

+ 5 - 25
src/lib/components/chat/MessageInput.svelte

@@ -125,16 +125,17 @@
 		}
 
 		try {
+			// During the file upload, file content is automatically extracted.
 			const uploadedFile = await uploadFile(localStorage.token, file);
 
 			if (uploadedFile) {
-				fileItem.status = 'uploaded';
+				fileItem.status = 'processed';
 				fileItem.file = uploadedFile;
 				fileItem.id = uploadedFile.id;
+				fileItem.collection_name = uploadedFile?.meta?.collection_name;
 				fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`;
 
-				// Try to extract content of the file for retrieval, even non-supported file types
-				processFileItem(fileItem);
+				files = files;
 			} else {
 				files = files.filter((item) => item.status !== null);
 			}
@@ -143,27 +144,6 @@
 			files = files.filter((item) => item.status !== null);
 		}
 	};
-
-	const processFileItem = async (fileItem) => {
-		try {
-			const res = await processFile(localStorage.token, fileItem.id);
-			if (res) {
-				fileItem.status = 'processed';
-				fileItem.collection_name = res.collection_name;
-				fileItem.file = {
-					...fileItem.file,
-					content: res.content
-				};
-
-				files = files;
-			}
-		} catch (e) {
-			// We keep the file in the files list even if it fails to process
-			fileItem.status = 'processed';
-			files = files;
-		}
-	};
-
 	const inputFilesHandler = async (inputFiles) => {
 		inputFiles.forEach((file) => {
 			console.log(file, file.name.split('.').at(-1));
@@ -456,7 +436,7 @@
 											</div>
 										{:else}
 											<FileItem
-												{file}
+												item={file}
 												name={file.name}
 												type={file.type}
 												size={file?.size}

+ 1 - 1
src/lib/components/chat/Messages/UserMessage.svelte

@@ -127,7 +127,7 @@
 								<img src={file.url} alt="input" class=" max-h-96 rounded-lg" draggable="false" />
 							{:else}
 								<FileItem
-									{file}
+									item={file}
 									url={file.url}
 									name={file.name}
 									type={file.type}

+ 4 - 4
src/lib/components/common/FileItem.svelte

@@ -15,7 +15,7 @@
 	export let dismissible = false;
 	export let status = 'processed';
 
-	export let file = null;
+	export let item = null;
 	export let edit = false;
 
 	export let name: string;
@@ -25,15 +25,15 @@
 	let showModal = false;
 </script>
 
-{#if file}
-	<FileItemModal bind:show={showModal} bind:file {edit} />
+{#if item}
+	<FileItemModal bind:show={showModal} bind:item {edit} />
 {/if}
 
 <button
 	class="relative group p-1.5 {className} flex items-center {colorClassName} rounded-2xl text-left"
 	type="button"
 	on:click={async () => {
-		if (file?.file?.content) {
+		if (item?.file?.data?.content) {
 			showModal = !showModal;
 		} else {
 			if (url) {

+ 11 - 11
src/lib/components/common/FileItemModal.svelte

@@ -10,7 +10,7 @@
 	import Switch from './Switch.svelte';
 	import Tooltip from './Tooltip.svelte';
 
-	export let file;
+	export let item;
 	export let show = false;
 
 	export let edit = false;
@@ -18,9 +18,9 @@
 	let enableFullContent = false;
 
 	onMount(() => {
-		console.log(file);
+		console.log(item);
 
-		if (file?.context === 'full') {
+		if (item?.context === 'full') {
 			enableFullContent = true;
 		}
 	});
@@ -33,11 +33,11 @@
 				<div>
 					<div class=" font-medium text-lg dark:text-gray-100">
 						<a
-							href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
+							href={item.url ? (item.type === 'file' ? `${item.url}/content` : `${item.url}`) : '#'}
 							target="_blank"
 							class="hover:underline line-clamp-1"
 						>
-							{file?.name ?? 'File'}
+							{item?.name ?? 'File'}
 						</a>
 					</div>
 				</div>
@@ -56,14 +56,14 @@
 			<div>
 				<div class="flex flex-col items-center md:flex-row gap-1 justify-between w-full">
 					<div class=" flex flex-wrap text-sm gap-1 text-gray-500">
-						{#if file.size}
-							<div class="capitalize shrink-0">{formatFileSize(file.size)}</div>
+						{#if item.size}
+							<div class="capitalize shrink-0">{formatFileSize(item.size)}</div>
 						{/if}
 
-						{#if file?.file?.content}
+						{#if item?.file?.data?.content}
 							<div class="capitalize shrink-0">
-								{getLineCount(file?.file?.content ?? '')} extracted lines
+								{getLineCount(item?.file?.data?.content ?? '')} extracted lines
 							</div>
 
 							<div class="flex items-center gap-1 shrink-0">
@@ -90,7 +90,7 @@
 									<Switch
 										bind:state={enableFullContent}
 										on:change={(e) => {
-											file.context = e.detail ? 'full' : undefined;
+											item.context = e.detail ? 'full' : undefined;
 										}}
 									/>
 								</div>
@@ -102,7 +102,7 @@
 		</div>
 
 		<div class="max-h-96 overflow-scroll scrollbar-hidden text-xs whitespace-pre-wrap">
-			{file?.file?.content ?? 'No content'}
+			{item?.file?.data?.content ?? 'No content'}
 		</div>
 	</div>
 </Modal>

+ 30 - 26
src/lib/components/workspace/Knowledge/Collection.svelte

@@ -9,7 +9,12 @@
 	import { mobile, showSidebar } from '$lib/stores';
 
 	import { uploadFile } from '$lib/apis/files';
-	import { getKnowledgeById, updateKnowledgeById } from '$lib/apis/knowledge';
+	import {
+		addFileToKnowledgeById,
+		getKnowledgeById,
+		removeFileFromKnowledgeById,
+		updateKnowledgeById
+	} from '$lib/apis/knowledge';
 
 	import Spinner from '$lib/components/common/Spinner.svelte';
 	import Tooltip from '$lib/components/common/Tooltip.svelte';
@@ -77,7 +82,7 @@
 
 			if (uploadedFile) {
 				console.log(uploadedFile);
-				processFileHandler(uploadedFile);
+				addFileHandler(uploadedFile.id);
 			} else {
 				toast.error($i18n.t('Failed to upload file.'));
 			}
@@ -86,34 +91,31 @@
 		}
 	};
 
-	const processFileHandler = async (uploadedFile) => {
-		const processedFile = await processFile(localStorage.token, uploadedFile.id, id).catch((e) => {
-			toast.error(e);
-		});
-
-		if (processedFile.status) {
-			console.log(processedFile);
-
-			if (!knowledge.data) {
-				knowledge.data = {};
+	const addFileHandler = async (fileId) => {
+		const updatedKnowledge = await addFileToKnowledgeById(localStorage.token, id, fileId).catch(
+			(e) => {
+				console.error(e);
 			}
+		);
 
-			knowledge.data.file_ids = [...(knowledge?.data?.file_ids ?? []), uploadedFile.id];
-
-			console.log(knowledge);
+		if (updatedKnowledge) {
+			knowledge = updatedKnowledge;
+			toast.success($i18n.t('File added successfully.'));
+		}
+	};
 
-			const updatedKnowledge = await updateKnowledgeById(localStorage.token, id, {
-				data: knowledge?.data ?? {}
-			}).catch((e) => {
-				console.error(e);
-			});
+	const deleteFileHandler = async (fileId) => {
+		const updatedKnowledge = await removeFileFromKnowledgeById(
+			localStorage.token,
+			id,
+			fileId
+		).catch((e) => {
+			console.error(e);
+		});
 
-			if (updatedKnowledge) {
-				knowledge = updatedKnowledge;
-				toast.success($i18n.t('File added successfully.'));
-			}
-		} else {
-			toast.error($i18n.t('Failed to process file.'));
+		if (updatedKnowledge) {
+			knowledge = updatedKnowledge;
+			toast.success($i18n.t('File removed successfully.'));
 		}
 	};
 
@@ -338,6 +340,8 @@
 										}}
 										on:delete={(e) => {
 											console.log(e.detail);
+
+											deleteFileHandler(e.detail);
 										}}
 									/>
 								</div>