Timothy Jaeryang Baek 4 tháng trước cách đây
mục cha
commit
f9a05dd1e1

+ 1 - 1
backend/open_webui/utils/chat.py

@@ -136,7 +136,7 @@ async def generate_chat_completion(
         response = await generate_ollama_chat_completion(
             request=request, form_data=form_data, user=user, bypass_filter=bypass_filter
         )
-        if form_data.stream:
+        if form_data.get("stream"):
             response.headers["content-type"] = "text/event-stream"
             return StreamingResponse(
                 convert_streaming_response_ollama_to_openai(response),

+ 8 - 2
backend/open_webui/utils/misc.py

@@ -106,7 +106,7 @@ def openai_chat_message_template(model: str):
 
 
 def openai_chat_chunk_message_template(
-    model: str, message: Optional[str] = None
+    model: str, message: Optional[str] = None, usage: Optional[dict] = None
 ) -> dict:
     template = openai_chat_message_template(model)
     template["object"] = "chat.completion.chunk"
@@ -114,17 +114,23 @@ def openai_chat_chunk_message_template(
         template["choices"][0]["delta"] = {"content": message}
     else:
         template["choices"][0]["finish_reason"] = "stop"
+
+    if usage:
+        template["usage"] = usage
     return template
 
 
 def openai_chat_completion_message_template(
-    model: str, message: Optional[str] = None
+    model: str, message: Optional[str] = None, usage: Optional[dict] = None
 ) -> dict:
     template = openai_chat_message_template(model)
     template["object"] = "chat.completion"
     if message is not None:
         template["choices"][0]["message"] = {"content": message, "role": "assistant"}
     template["choices"][0]["finish_reason"] = "stop"
+
+    if usage:
+        template["usage"] = usage
     return template
 
 

+ 56 - 1
backend/open_webui/utils/response.py

@@ -21,8 +21,63 @@ async def convert_streaming_response_ollama_to_openai(ollama_streaming_response)
         message_content = data.get("message", {}).get("content", "")
         done = data.get("done", False)
 
+        usage = None
+        if done:
+            usage = {
+                "response_token/s": (
+                    round(
+                        (
+                            (
+                                data.get("eval_count", 0)
+                                / ((data.get("eval_duration", 0) / 1_000_000_000))
+                            )
+                            * 100
+                        ),
+                        2,
+                    )
+                    if data.get("eval_duration", 0) > 0
+                    else "N/A"
+                ),
+                "prompt_token/s": (
+                    round(
+                        (
+                            (
+                                data.get("prompt_eval_count", 0)
+                                / (
+                                    (
+                                        data.get("prompt_eval_duration", 0)
+                                        / 1_000_000_000
+                                    )
+                                )
+                            )
+                            * 100
+                        ),
+                        2,
+                    )
+                    if data.get("prompt_eval_duration", 0) > 0
+                    else "N/A"
+                ),
+                "total_duration": round(
+                    ((data.get("total_duration", 0) / 1_000_000) * 100), 2
+                ),
+                "load_duration": round(
+                    ((data.get("load_duration", 0) / 1_000_000) * 100), 2
+                ),
+                "prompt_eval_count": data.get("prompt_eval_count", 0),
+                "prompt_eval_duration": round(
+                    ((data.get("prompt_eval_duration", 0) / 1_000_000) * 100), 2
+                ),
+                "eval_count": data.get("eval_count", 0),
+                "eval_duration": round(
+                    ((data.get("eval_duration", 0) / 1_000_000) * 100), 2
+                ),
+                "approximate_total": (
+                    lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
+                )((data.get("total_duration", 0) or 0) // 1_000_000_000),
+            }
+
         data = openai_chat_chunk_message_template(
-            model, message_content if not done else None
+            model, message_content if not done else None, usage
         )
 
         line = f"data: {json.dumps(data)}\n\n"

+ 21 - 1
src/lib/apis/streaming/index.ts

@@ -77,10 +77,14 @@ async function* openAIStreamToIterator(
 				continue;
 			}
 
+			if (parsedData.usage) {
+				yield { done: false, value: '', usage: parsedData.usage };
+				continue;
+			}
+
 			yield {
 				done: false,
 				value: parsedData.choices?.[0]?.delta?.content ?? '',
-				usage: parsedData.usage
 			};
 		} catch (e) {
 			console.error('Error extracting delta from SSE event:', e);
@@ -98,10 +102,26 @@ async function* streamLargeDeltasAsRandomChunks(
 			yield textStreamUpdate;
 			return;
 		}
+
+		if (textStreamUpdate.error) {
+			yield textStreamUpdate;
+			continue;
+		}
 		if (textStreamUpdate.sources) {
 			yield textStreamUpdate;
 			continue;
 		}
+		if (textStreamUpdate.selectedModelId) {
+			yield textStreamUpdate;
+			continue;
+		}
+		if (textStreamUpdate.usage) {
+			yield textStreamUpdate;
+			continue;
+		}
+
+
+
 		let content = textStreamUpdate.value;
 		if (content.length < 5) {
 			yield { done: false, value: content };

+ 70 - 68
src/lib/components/chat/Chat.svelte

@@ -455,41 +455,43 @@
 	//////////////////////////
 
 	const initNewChat = async () => {
-		if (sessionStorage.selectedModels) {
-			selectedModels = JSON.parse(sessionStorage.selectedModels);
-			sessionStorage.removeItem('selectedModels');
-		} else {
-			if ($page.url.searchParams.get('models')) {
-				selectedModels = $page.url.searchParams.get('models')?.split(',');
-			} else if ($page.url.searchParams.get('model')) {
-				const urlModels = $page.url.searchParams.get('model')?.split(',');
-
-				if (urlModels.length === 1) {
-					const m = $models.find((m) => m.id === urlModels[0]);
-					if (!m) {
-						const modelSelectorButton = document.getElementById('model-selector-0-button');
-						if (modelSelectorButton) {
-							modelSelectorButton.click();
-							await tick();
-
-							const modelSelectorInput = document.getElementById('model-search-input');
-							if (modelSelectorInput) {
-								modelSelectorInput.focus();
-								modelSelectorInput.value = urlModels[0];
-								modelSelectorInput.dispatchEvent(new Event('input'));
-							}
+		if ($page.url.searchParams.get('models')) {
+			selectedModels = $page.url.searchParams.get('models')?.split(',');
+		} else if ($page.url.searchParams.get('model')) {
+			const urlModels = $page.url.searchParams.get('model')?.split(',');
+
+			if (urlModels.length === 1) {
+				const m = $models.find((m) => m.id === urlModels[0]);
+				if (!m) {
+					const modelSelectorButton = document.getElementById('model-selector-0-button');
+					if (modelSelectorButton) {
+						modelSelectorButton.click();
+						await tick();
+
+						const modelSelectorInput = document.getElementById('model-search-input');
+						if (modelSelectorInput) {
+							modelSelectorInput.focus();
+							modelSelectorInput.value = urlModels[0];
+							modelSelectorInput.dispatchEvent(new Event('input'));
 						}
-					} else {
-						selectedModels = urlModels;
 					}
 				} else {
 					selectedModels = urlModels;
 				}
-			} else if ($settings?.models) {
-				selectedModels = $settings?.models;
-			} else if ($config?.default_models) {
-				console.log($config?.default_models.split(',') ?? '');
-				selectedModels = $config?.default_models.split(',');
+			} else {
+				selectedModels = urlModels;
+			}
+		} else {
+			if (sessionStorage.selectedModels) {
+				selectedModels = JSON.parse(sessionStorage.selectedModels);
+				sessionStorage.removeItem('selectedModels');
+			} else {
+				if ($settings?.models) {
+					selectedModels = $settings?.models;
+				} else if ($config?.default_models) {
+					console.log($config?.default_models.split(',') ?? '');
+					selectedModels = $config?.default_models.split(',');
+				}
 			}
 		}
 
@@ -1056,11 +1058,14 @@
 					}
 
 					let _response = null;
-					if (model?.owned_by === 'ollama') {
-						_response = await sendPromptOllama(model, prompt, responseMessageId, _chatId);
-					} else if (model) {
-						_response = await sendPromptOpenAI(model, prompt, responseMessageId, _chatId);
-					}
+
+					// if (model?.owned_by === 'ollama') {
+					// 	_response = await sendPromptOllama(model, prompt, responseMessageId, _chatId);
+					// } else if (model) {
+					// }
+
+					_response = await sendPromptOpenAI(model, prompt, responseMessageId, _chatId);
+
 					_responses.push(_response);
 
 					if (chatEventEmitter) clearInterval(chatEventEmitter);
@@ -1207,24 +1212,14 @@
 			$settings?.params?.stream_response ??
 			params?.stream_response ??
 			true;
+
 		const [res, controller] = await generateChatCompletion(localStorage.token, {
 			stream: stream,
 			model: model.id,
 			messages: messagesBody,
-			options: {
-				...{ ...($settings?.params ?? {}), ...params },
-				stop:
-					(params?.stop ?? $settings?.params?.stop ?? undefined)
-						? (params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop).map(
-								(str) => decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"'))
-							)
-						: undefined,
-				num_predict: params?.max_tokens ?? $settings?.params?.max_tokens ?? undefined,
-				repeat_penalty:
-					params?.frequency_penalty ?? $settings?.params?.frequency_penalty ?? undefined
-			},
 			format: $settings.requestFormat ?? undefined,
 			keep_alive: $settings.keepAlive ?? undefined,
+
 			tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined,
 			files: files.length > 0 ? files : undefined,
 			session_id: $socket?.id,
@@ -1542,13 +1537,6 @@
 				{
 					stream: stream,
 					model: model.id,
-					...(stream && (model.info?.meta?.capabilities?.usage ?? false)
-						? {
-								stream_options: {
-									include_usage: true
-								}
-							}
-						: {}),
 					messages: [
 						params?.system || $settings.system || (responseMessage?.userContext ?? null)
 							? {
@@ -1593,23 +1581,36 @@
 										content: message?.merged?.content ?? message.content
 									})
 						})),
-					seed: params?.seed ?? $settings?.params?.seed ?? undefined,
-					stop:
-						(params?.stop ?? $settings?.params?.stop ?? undefined)
-							? (params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop).map(
-									(str) => decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"'))
-								)
-							: undefined,
-					temperature: params?.temperature ?? $settings?.params?.temperature ?? undefined,
-					top_p: params?.top_p ?? $settings?.params?.top_p ?? undefined,
-					frequency_penalty:
-						params?.frequency_penalty ?? $settings?.params?.frequency_penalty ?? undefined,
-					max_tokens: params?.max_tokens ?? $settings?.params?.max_tokens ?? undefined,
+
+					// params: {
+					// 	...$settings?.params,
+					// 	...params,
+
+					// 	format: $settings.requestFormat ?? undefined,
+					// 	keep_alive: $settings.keepAlive ?? undefined,
+					// 	stop:
+					// 		(params?.stop ?? $settings?.params?.stop ?? undefined)
+					// 			? (
+					// 					params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop
+					// 				).map((str) =>
+					// 					decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"'))
+					// 				)
+					// 			: undefined
+					// },
+
 					tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined,
 					files: files.length > 0 ? files : undefined,
 					session_id: $socket?.id,
 					chat_id: $chatId,
-					id: responseMessageId
+					id: responseMessageId,
+
+					...(stream && (model.info?.meta?.capabilities?.usage ?? false)
+						? {
+								stream_options: {
+									include_usage: true
+								}
+							}
+						: {})
 				},
 				`${WEBUI_BASE_URL}/api`
 			);
@@ -1636,6 +1637,7 @@
 							await handleOpenAIError(error, null, model, responseMessage);
 							break;
 						}
+
 						if (done || stopResponseFlag || _chatId !== $chatId) {
 							responseMessage.done = true;
 							history.messages[responseMessageId] = responseMessage;
@@ -1648,7 +1650,7 @@
 						}
 
 						if (usage) {
-							responseMessage.info = { ...usage, openai: true, usage };
+							responseMessage.usage = usage;
 						}
 
 						if (selectedModelId) {

+ 36 - 73
src/lib/components/chat/Messages/ResponseMessage.svelte

@@ -932,82 +932,45 @@
 									</Tooltip>
 								{/if}
 
-								{#if message.info}
+								{#if message.usage}
 									<Tooltip
-										content={message.info.openai
-											? message.info.usage
-												? `<pre>${sanitizeResponseContent(
-														JSON.stringify(message.info.usage, null, 2)
-															.replace(/"([^(")"]+)":/g, '$1:')
-															.slice(1, -1)
-															.split('\n')
-															.map((line) => line.slice(2))
-															.map((line) => (line.endsWith(',') ? line.slice(0, -1) : line))
-															.join('\n')
-													)}</pre>`
-												: `prompt_tokens: ${message.info.prompt_tokens ?? 'N/A'}<br/>
-													completion_tokens: ${message.info.completion_tokens ?? 'N/A'}<br/>
-													total_tokens: ${message.info.total_tokens ?? 'N/A'}`
-											: `response_token/s: ${
-													`${
-														Math.round(
-															((message.info.eval_count ?? 0) /
-																((message.info.eval_duration ?? 0) / 1000000000)) *
-																100
-														) / 100
-													} tokens` ?? 'N/A'
-												}<br/>
-					prompt_token/s: ${
-						Math.round(
-							((message.info.prompt_eval_count ?? 0) /
-								((message.info.prompt_eval_duration ?? 0) / 1000000000)) *
-								100
-						) / 100 ?? 'N/A'
-					} tokens<br/>
-		            total_duration: ${
-									Math.round(((message.info.total_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
-								}ms<br/>
-		            load_duration: ${
-									Math.round(((message.info.load_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
-								}ms<br/>
-		            prompt_eval_count: ${message.info.prompt_eval_count ?? 'N/A'}<br/>
-		            prompt_eval_duration: ${
-									Math.round(((message.info.prompt_eval_duration ?? 0) / 1000000) * 100) / 100 ??
-									'N/A'
-								}ms<br/>
-		            eval_count: ${message.info.eval_count ?? 'N/A'}<br/>
-		            eval_duration: ${
-									Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
-								}ms<br/>
-		            approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`}
-										placement="top"
+										content={message.usage
+											? `<pre>${sanitizeResponseContent(
+													JSON.stringify(message.usage, null, 2)
+														.replace(/"([^(")"]+)":/g, '$1:')
+														.slice(1, -1)
+														.split('\n')
+														.map((line) => line.slice(2))
+														.map((line) => (line.endsWith(',') ? line.slice(0, -1) : line))
+														.join('\n')
+												)}</pre>`
+											: ''}
+										placement="bottom"
 									>
-										<Tooltip content={$i18n.t('Generation Info')} placement="bottom">
-											<button
-												class=" {isLastMessage
-													? 'visible'
-													: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition whitespace-pre-wrap"
-												on:click={() => {
-													console.log(message);
-												}}
-												id="info-{message.id}"
+										<button
+											class=" {isLastMessage
+												? 'visible'
+												: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition whitespace-pre-wrap"
+											on:click={() => {
+												console.log(message);
+											}}
+											id="info-{message.id}"
+										>
+											<svg
+												xmlns="http://www.w3.org/2000/svg"
+												fill="none"
+												viewBox="0 0 24 24"
+												stroke-width="2.3"
+												stroke="currentColor"
+												class="w-4 h-4"
 											>
-												<svg
-													xmlns="http://www.w3.org/2000/svg"
-													fill="none"
-													viewBox="0 0 24 24"
-													stroke-width="2.3"
-													stroke="currentColor"
-													class="w-4 h-4"
-												>
-													<path
-														stroke-linecap="round"
-														stroke-linejoin="round"
-														d="M11.25 11.25l.041-.02a.75.75 0 011.063.852l-.708 2.836a.75.75 0 001.063.853l.041-.021M21 12a9 9 0 11-18 0 9 9 0 0118 0zm-9-3.75h.008v.008H12V8.25z"
-													/>
-												</svg>
-											</button>
-										</Tooltip>
+												<path
+													stroke-linecap="round"
+													stroke-linejoin="round"
+													d="M11.25 11.25l.041-.02a.75.75 0 011.063.852l-.708 2.836a.75.75 0 001.063.853l.041-.021M21 12a9 9 0 11-18 0 9 9 0 0118 0zm-9-3.75h.008v.008H12V8.25z"
+												/>
+											</svg>
+										</button>
 									</Tooltip>
 								{/if}