Quellcode durchsuchen

Merge pull request #212 from ollama-webui/multimodal

feat: multimodal support
Timothy Jaeryang Baek vor 1 Jahr
Ursprung
Commit
346b0df811

+ 2 - 1
README.md

@@ -35,6 +35,8 @@ Also check our sibling project, [OllamaHub](https://ollamahub.com/), where you c
 
 - 🤖 **Multiple Model Support**: Seamlessly switch between different chat models for diverse interactions.
 
+- 🔄 **Multi-Modal Support**: Seamlessly engage with models that support multimodal interactions, including images (e.g., LLava).
+
 - 🧩 **Modelfile Builder**: Easily create Ollama modelfiles via the web UI. Create and add characters/agents, customize chat elements, and import modelfiles effortlessly through [OllamaHub](https://ollamahub.com/) integration.
 
 - ⚙️ **Many Models Conversations**: Effortlessly engage with various models simultaneously, harnessing their unique strengths for optimal responses. Enhance your experience by leveraging a diverse set of models in parallel.
@@ -234,7 +236,6 @@ See [TROUBLESHOOTING.md](/TROUBLESHOOTING.md) for information on how to troubles
 
 Here are some exciting tasks on our roadmap:
 
-- 🔄 **Multi-Modal Support**: Seamlessly engage with models that support multimodal interactions, including images (e.g., LLava).
 - 📚 **RAG Integration**: Experience first-class retrieval augmented generation support, enabling chat with your documents.
 - 🔐 **Access Control**: Securely manage requests to Ollama by utilizing the backend as a reverse proxy gateway, ensuring only authenticated users can send specific requests.
 - 🧪 **Research-Centric Features**: Empower researchers in the fields of LLM and HCI with a comprehensive web UI for conducting user studies. Stay tuned for ongoing feature enhancements (e.g., surveys, analytics, and participant tracking) to facilitate their research.

BIN
demo.gif


+ 32 - 31
src/lib/components/chat/MessageInput.svelte

@@ -14,7 +14,7 @@
 
 	export let files = [];
 
-	export let fileUploadEnabled = false;
+	export let fileUploadEnabled = true;
 	export let speechRecognitionEnabled = true;
 	export let speechRecognitionListening = false;
 
@@ -84,40 +84,40 @@
 	};
 </script>
 
-<div class="fixed bottom-0 w-full bg-white dark:bg-gray-800">
-	<div class=" absolute right-0 left-0 bottom-0 mb-20">
-		<div class="max-w-3xl px-2.5 pt-2.5 -mb-0.5 mx-auto inset-x-0">
-			{#if messages.length == 0 && suggestionPrompts.length !== 0}
+<div class="fixed bottom-0 w-full">
+	<div class="px-2.5 pt-2.5 -mb-0.5 mx-auto inset-x-0 bg-transparent flex justify-center">
+		{#if messages.length == 0 && suggestionPrompts.length !== 0}
+			<div class="max-w-3xl">
 				<Suggestions {suggestionPrompts} {submitPrompt} />
-			{/if}
+			</div>
+		{/if}
 
-			{#if autoScroll === false && messages.length > 0}
-				<div class=" flex justify-center mb-4">
-					<button
-						class=" bg-white border border-gray-100 dark:border-none dark:bg-white/20 p-1.5 rounded-full"
-						on:click={() => {
-							autoScroll = true;
-							window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
-						}}
+		{#if autoScroll === false && messages.length > 0}
+			<div class=" flex justify-center mb-4">
+				<button
+					class=" bg-white border border-gray-100 dark:border-none dark:bg-white/20 p-1.5 rounded-full"
+					on:click={() => {
+						autoScroll = true;
+						window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
+					}}
+				>
+					<svg
+						xmlns="http://www.w3.org/2000/svg"
+						viewBox="0 0 20 20"
+						fill="currentColor"
+						class="w-5 h-5"
 					>
-						<svg
-							xmlns="http://www.w3.org/2000/svg"
-							viewBox="0 0 20 20"
-							fill="currentColor"
-							class="w-5 h-5"
-						>
-							<path
-								fill-rule="evenodd"
-								d="M10 3a.75.75 0 01.75.75v10.638l3.96-4.158a.75.75 0 111.08 1.04l-5.25 5.5a.75.75 0 01-1.08 0l-5.25-5.5a.75.75 0 111.08-1.04l3.96 4.158V3.75A.75.75 0 0110 3z"
-								clip-rule="evenodd"
-							/>
-						</svg>
-					</button>
-				</div>
-			{/if}
-		</div>
+						<path
+							fill-rule="evenodd"
+							d="M10 3a.75.75 0 01.75.75v10.638l3.96-4.158a.75.75 0 111.08 1.04l-5.25 5.5a.75.75 0 01-1.08 0l-5.25-5.5a.75.75 0 111.08-1.04l3.96 4.158V3.75A.75.75 0 0110 3z"
+							clip-rule="evenodd"
+						/>
+					</svg>
+				</button>
+			</div>
+		{/if}
 	</div>
-	<div>
+	<div class="bg-white dark:bg-gray-800">
 		<div class="max-w-3xl px-2.5 -mb-0.5 mx-auto inset-x-0">
 			<div class="bg-gradient-to-t from-white dark:from-gray-800 from-40% pb-2">
 				<input
@@ -136,6 +136,7 @@
 								}
 							];
 							inputFiles = null;
+							filesInputElement.value = '';
 						};
 
 						if (

+ 26 - 12
src/lib/components/chat/Messages.svelte

@@ -15,6 +15,7 @@
 	export let sendPrompt: Function;
 	export let regenerateResponse: Function;
 
+	export let bottomPadding = false;
 	export let autoScroll;
 	export let selectedModels;
 	export let history = {};
@@ -31,6 +32,13 @@
 		})();
 	}
 
+	$: if (autoScroll && bottomPadding) {
+		(async () => {
+			await tick();
+			window.scrollTo({ top: document.body.scrollHeight, behavior: 'smooth' });
+		})();
+	}
+
 	const speakMessage = (message) => {
 		const speak = new SpeechSynthesisUtterance(message);
 		speechSynthesis.speak(speak);
@@ -184,7 +192,8 @@
 			parentId: history.messages[messageId].parentId,
 			childrenIds: [],
 			role: 'user',
-			content: userPrompt
+			content: userPrompt,
+			...(history.messages[messageId].files && { files: history.messages[messageId].files })
 		};
 
 		let messageParentId = history.messages[messageId].parentId;
@@ -425,6 +434,18 @@
 								class="prose chat-{message.role} w-full max-w-full dark:prose-invert prose-headings:my-0 prose-p:my-0 prose-p:-mb-4 prose-pre:my-0 prose-table:my-0 prose-blockquote:my-0 prose-img:my-0 prose-ul:-my-4 prose-ol:-my-4 prose-li:-my-3 prose-ul:-mb-6 prose-ol:-mb-6 prose-li:-mb-4 whitespace-pre-line"
 							>
 								{#if message.role == 'user'}
+									{#if message.files}
+										<div class="my-3 w-full flex overflow-x-auto space-x-2">
+											{#each message.files as file}
+												<div>
+													{#if file.type === 'image'}
+														<img src={file.url} alt="input" class=" max-h-96 rounded-lg" />
+													{/if}
+												</div>
+											{/each}
+										</div>
+									{/if}
+
 									{#if message?.edit === true}
 										<div class=" w-full">
 											<textarea
@@ -458,17 +479,6 @@
 										</div>
 									{:else}
 										<div class="w-full">
-											{#if message.files}
-												<div class="my-3">
-													{#each message.files as file}
-														<div>
-															{#if file.type === 'image'}
-																<img src={file.url} alt="input" class=" max-h-96" />
-															{/if}
-														</div>
-													{/each}
-												</div>
-											{/if}
 											<pre id="user-message">{message.content}</pre>
 
 											<div class=" flex justify-start space-x-1">
@@ -889,4 +899,8 @@
 			</div>
 		</div>
 	{/each}
+
+	{#if bottomPadding}
+		<div class=" mb-10" />
+	{/if}
 {/if}

+ 36 - 34
src/routes/(app)/+page.svelte

@@ -50,6 +50,10 @@
 		messages = [];
 	}
 
+	$: if (files) {
+		console.log(files);
+	}
+
 	onMount(async () => {
 		await chatId.set(uuidv4());
 
@@ -106,7 +110,6 @@
 	const sendPromptOllama = async (model, userPrompt, parentId, _chatId) => {
 		console.log('sendPromptOllama');
 		let responseMessageId = uuidv4();
-
 		let responseMessage = {
 			parentId: parentId,
 			id: responseMessageId,
@@ -126,38 +129,8 @@
 		}
 
 		await tick();
-
 		window.scrollTo({ top: document.body.scrollHeight });
 
-		// const res = await fetch(`${$settings?.API_BASE_URL ?? OLLAMA_API_BASE_URL}/generate`, {
-		// 	method: 'POST',
-		// 	headers: {
-		// 		'Content-Type': 'text/event-stream',
-		// 		...($settings.authHeader && { Authorization: $settings.authHeader }),
-		// 		...($user && { Authorization: `Bearer ${localStorage.token}` })
-		// 	},
-		// 	body: JSON.stringify({
-		// 		model: model,
-		// 		prompt: userPrompt,
-		// 		system: $settings.system ?? undefined,
-		// 		options: {
-		// 			seed: $settings.seed ?? undefined,
-		// 			temperature: $settings.temperature ?? undefined,
-		// 			repeat_penalty: $settings.repeat_penalty ?? undefined,
-		// 			top_k: $settings.top_k ?? undefined,
-		// 			top_p: $settings.top_p ?? undefined,
-		// 			num_ctx: $settings.num_ctx ?? undefined,
-		// 			...($settings.options ?? {})
-		// 		},
-		// 		format: $settings.requestFormat ?? undefined,
-		// 		context:
-		// 			history.messages[parentId] !== null &&
-		// 			history.messages[parentId].parentId in history.messages
-		// 				? history.messages[history.messages[parentId].parentId]?.context ?? undefined
-		// 				: undefined
-		// 	})
-		// });
-
 		const res = await fetch(`${$settings?.API_BASE_URL ?? OLLAMA_API_BASE_URL}/chat`, {
 			method: 'POST',
 			headers: {
@@ -177,7 +150,15 @@
 					...messages
 				]
 					.filter((message) => message)
-					.map((message) => ({ role: message.role, content: message.content })),
+					.map((message) => ({
+						role: message.role,
+						content: message.content,
+						...(message.files && {
+							images: message.files
+								.filter((file) => file.type === 'image')
+								.map((file) => file.url.slice(file.url.indexOf(',') + 1))
+						})
+					})),
 				options: {
 					seed: $settings.seed ?? undefined,
 					temperature: $settings.temperature ?? undefined,
@@ -350,7 +331,27 @@
 							...messages
 						]
 							.filter((message) => message)
-							.map((message) => ({ role: message.role, content: message.content })),
+							.map((message) => ({
+								role: message.role,
+								...(message.files
+									? {
+											content: [
+												{
+													type: 'text',
+													text: message.content
+												},
+												...message.files
+													.filter((file) => file.type === 'image')
+													.map((file) => ({
+														type: 'image_url',
+														image_url: {
+															url: file.url
+														}
+													}))
+											]
+									  }
+									: { content: message.content })
+							})),
 						temperature: $settings.temperature ?? undefined,
 						top_p: $settings.top_p ?? undefined,
 						num_ctx: $settings.num_ctx ?? undefined,
@@ -579,6 +580,7 @@
 				bind:history
 				bind:messages
 				bind:autoScroll
+				bottomPadding={files.length > 0}
 				{sendPrompt}
 				{regenerateResponse}
 			/>
@@ -586,8 +588,8 @@
 	</div>
 
 	<MessageInput
-		bind:prompt
 		bind:files
+		bind:prompt
 		bind:autoScroll
 		suggestionPrompts={selectedModelfile?.suggestionPrompts ?? [
 			{

+ 35 - 50
src/routes/(app)/c/[id]/+page.svelte

@@ -51,17 +51,6 @@
 		messages = [];
 	}
 
-	// onMount(async () => {
-	// 	let chat = await loadChat();
-
-	// 	await tick();
-	// 	if (chat) {
-	// 		loaded = true;
-	// 	} else {
-	// 		await goto('/');
-	// 	}
-	// });
-
 	$: if ($page.params.id) {
 		(async () => {
 			let chat = await loadChat();
@@ -133,7 +122,6 @@
 	const sendPromptOllama = async (model, userPrompt, parentId, _chatId) => {
 		console.log('sendPromptOllama');
 		let responseMessageId = uuidv4();
-
 		let responseMessage = {
 			parentId: parentId,
 			id: responseMessageId,
@@ -153,38 +141,8 @@
 		}
 
 		await tick();
-
 		window.scrollTo({ top: document.body.scrollHeight });
 
-		// const res = await fetch(`${$settings?.API_BASE_URL ?? OLLAMA_API_BASE_URL}/generate`, {
-		// 	method: 'POST',
-		// 	headers: {
-		// 		'Content-Type': 'text/event-stream',
-		// 		...($settings.authHeader && { Authorization: $settings.authHeader }),
-		// 		...($user && { Authorization: `Bearer ${localStorage.token}` })
-		// 	},
-		// 	body: JSON.stringify({
-		// 		model: model,
-		// 		prompt: userPrompt,
-		// 		system: $settings.system ?? undefined,
-		// 		options: {
-		// 			seed: $settings.seed ?? undefined,
-		// 			temperature: $settings.temperature ?? undefined,
-		// 			repeat_penalty: $settings.repeat_penalty ?? undefined,
-		// 			top_k: $settings.top_k ?? undefined,
-		// 			top_p: $settings.top_p ?? undefined,
-		// 			num_ctx: $settings.num_ctx ?? undefined,
-		// 			...($settings.options ?? {})
-		// 		},
-		// 		format: $settings.requestFormat ?? undefined,
-		// 		context:
-		// 			history.messages[parentId] !== null &&
-		// 			history.messages[parentId].parentId in history.messages
-		// 				? history.messages[history.messages[parentId].parentId]?.context ?? undefined
-		// 				: undefined
-		// 	})
-		// });
-
 		const res = await fetch(`${$settings?.API_BASE_URL ?? OLLAMA_API_BASE_URL}/chat`, {
 			method: 'POST',
 			headers: {
@@ -204,7 +162,15 @@
 					...messages
 				]
 					.filter((message) => message)
-					.map((message) => ({ role: message.role, content: message.content })),
+					.map((message) => ({
+						role: message.role,
+						content: message.content,
+						...(message.files && {
+							images: message.files
+								.filter((file) => file.type === 'image')
+								.map((file) => file.url.slice(file.url.indexOf(',') + 1))
+						})
+					})),
 				options: {
 					seed: $settings.seed ?? undefined,
 					temperature: $settings.temperature ?? undefined,
@@ -377,7 +343,27 @@
 							...messages
 						]
 							.filter((message) => message)
-							.map((message) => ({ role: message.role, content: message.content })),
+							.map((message) => ({
+								role: message.role,
+								...(message.files
+									? {
+											content: [
+												{
+													type: 'text',
+													text: message.content
+												},
+												...message.files
+													.filter((file) => file.type === 'image')
+													.map((file) => ({
+														type: 'image_url',
+														image_url: {
+															url: file.url
+														}
+													}))
+											]
+									  }
+									: { content: message.content })
+							})),
 						temperature: $settings.temperature ?? undefined,
 						top_p: $settings.top_p ?? undefined,
 						num_ctx: $settings.num_ctx ?? undefined,
@@ -392,12 +378,9 @@
 
 				while (true) {
 					const { value, done } = await reader.read();
-					if (done || stopResponseFlag) {
-						if (stopResponseFlag) {
-							responseMessage.done = true;
-							messages = messages;
-						}
-
+					if (done || stopResponseFlag || _chatId !== $chatId) {
+						responseMessage.done = true;
+						messages = messages;
 						break;
 					}
 
@@ -610,6 +593,7 @@
 					bind:history
 					bind:messages
 					bind:autoScroll
+					bottomPadding={files.length > 0}
 					{sendPrompt}
 					{regenerateResponse}
 				/>
@@ -617,6 +601,7 @@
 		</div>
 
 		<MessageInput
+			bind:files
 			bind:prompt
 			bind:autoScroll
 			suggestionPrompts={selectedModelfile?.suggestionPrompts ?? [