10 months ago · 7f70de99d3
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@@ -30,6 +30,7 @@
 
															 	import {
														
 
															 		convertMessagesToHistory,
														
 
															 		copyToClipboard,
														
 
															+		extractSentencesForAudio,
														
 
															 		promptTemplate,
														
 
															 		splitStream
														
 
															 	} from '$lib/utils';
														
@@ -593,7 +594,15 @@
 
															 				array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index
														
 
															 		);
														
 
															-		eventTarget.dispatchEvent(new CustomEvent('chat:start'));
														
 
															+		eventTarget.dispatchEvent(
														
 
															+			new CustomEvent('chat:start', {
														
 
															+				detail: {
														
 
															+					id: responseMessageId
														
 
															+				}
														
 
															+			})
														
 
															+		);
														
 
															+
														
 
															+		await tick();
														
 
															 		const [res, controller] = await generateChatCompletion(localStorage.token, {
														
 
															 			model: model.id,
														
@@ -664,9 +673,23 @@
 
															 									continue;
														
 
															 								} else {
														
 
															 									responseMessage.content += data.message.content;
														
 
															-									eventTarget.dispatchEvent(
														
 
															-										new CustomEvent('chat', { detail: { content: data.message.content } })
														
 
															-									);
														
 
															+
														
 
															+									const sentences = extractSentencesForAudio(responseMessage.content);
														
 
															+									sentences.pop();
														
 
															+
														
 
															+									// dispatch only last sentence and make sure it hasn't been dispatched before
														
 
															+									if (
														
 
															+										sentences.length > 0 &&
														
 
															+										sentences[sentences.length - 1] !== responseMessage.lastSentence
														
 
															+									) {
														
 
															+										responseMessage.lastSentence = sentences[sentences.length - 1];
														
 
															+										eventTarget.dispatchEvent(
														
 
															+											new CustomEvent('chat', {
														
 
															+												detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
														
 
															+											})
														
 
															+										);
														
 
															+									}
														
 
															+
														
 
															 									messages = messages;
														
 
															 								}
														
 
															 							} else {
														
@@ -760,7 +783,23 @@
 
															 		stopResponseFlag = false;
														
 
															 		await tick();
														
 
															-		eventTarget.dispatchEvent(new CustomEvent('chat:finish'));
														
 
															+
														
 
															+		let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
														
 
															+		if (lastSentence) {
														
 
															+			eventTarget.dispatchEvent(
														
 
															+				new CustomEvent('chat', {
														
 
															+					detail: { id: responseMessageId, content: lastSentence }
														
 
															+				})
														
 
															+			);
														
 
															+		}
														
 
															+		eventTarget.dispatchEvent(
														
 
															+			new CustomEvent('chat:finish', {
														
 
															+				detail: {
														
 
															+					id: responseMessageId,
														
 
															+					content: responseMessage.content
														
 
															+				}
														
 
															+			})
														
 
															+		);
														
 
															 		if (autoScroll) {
														
 
															 			scrollToBottom();
														
@@ -802,7 +841,14 @@
 
															 		scrollToBottom();
														
 
															-		eventTarget.dispatchEvent(new CustomEvent('chat:start'));
														
 
															+		eventTarget.dispatchEvent(
														
 
															+			new CustomEvent('chat:start', {
														
 
															+				detail: {
														
 
															+					id: responseMessageId
														
 
															+				}
														
 
															+			})
														
 
															+		);
														
 
															+		await tick();
														
 
															 		try {
														
 
															 			const [res, controller] = await generateOpenAIChatCompletion(
														
@@ -924,7 +970,23 @@
 
															 						continue;
														
 
															 					} else {
														
 
															 						responseMessage.content += value;
														
 
															-						eventTarget.dispatchEvent(new CustomEvent('chat', { detail: { content: value } }));
														
 
															+
														
 
															+						const sentences = extractSentencesForAudio(responseMessage.content);
														
 
															+						sentences.pop();
														
 
															+
														
 
															+						// dispatch only last sentence and make sure it hasn't been dispatched before
														
 
															+						if (
														
 
															+							sentences.length > 0 &&
														
 
															+							sentences[sentences.length - 1] !== responseMessage.lastSentence
														
 
															+						) {
														
 
															+							responseMessage.lastSentence = sentences[sentences.length - 1];
														
 
															+							eventTarget.dispatchEvent(
														
 
															+								new CustomEvent('chat', {
														
 
															+									detail: { id: responseMessageId, content: sentences[sentences.length - 1] }
														
 
															+								})
														
 
															+							);
														
 
															+						}
														
 
															+
														
 
															 						messages = messages;
														
 
															 					}
														
@@ -975,7 +1037,23 @@
 
															 		stopResponseFlag = false;
														
 
															 		await tick();
														
 
															-		eventTarget.dispatchEvent(new CustomEvent('chat:finish'));
														
 
															+		let lastSentence = extractSentencesForAudio(responseMessage.content)?.at(-1) ?? '';
														
 
															+		if (lastSentence) {
														
 
															+			eventTarget.dispatchEvent(
														
 
															+				new CustomEvent('chat', {
														
 
															+					detail: { id: responseMessageId, content: lastSentence }
														
 
															+				})
														
 
															+			);
														
 
															+		}
														
 
															+
														
 
															+		eventTarget.dispatchEvent(
														
 
															+			new CustomEvent('chat:finish', {
														
 
															+				detail: {
														
 
															+					id: responseMessageId,
														
 
															+					content: responseMessage.content
														
 
															+				}
														
 
															+			})
														
 
															+		);
														
 
															 		if (autoScroll) {
														
 
															 			scrollToBottom();
														
@@ -1207,14 +1285,18 @@
 
															 	</title>
														
 
															 </svelte:head>
														
 
															-<CallOverlay
														
 
															-	{submitPrompt}
														
 
															-	{stopResponse}
														
 
															-	bind:files
														
 
															-	modelId={selectedModelIds?.at(0) ?? null}
														
 
															-	chatId={$chatId}
														
 
															-	{eventTarget}
														
 
															-/>
														
 
															+<audio id="audioElement" src="" style="display: none;" />
														
 
															+
														
 
															+{#if $showCallOverlay}
														
 
															+	<CallOverlay
														
 
															+		{submitPrompt}
														
 
															+		{stopResponse}
														
 
															+		bind:files
														
 
															+		modelId={selectedModelIds?.at(0) ?? null}
														
 
															+		chatId={$chatId}
														
 
															+		{eventTarget}
														
 
															+	/>
														
 
															+{/if}
														
 
															 {#if !chatIdProp || (loaded && chatIdProp)}
														
 
															 	<div
														
--- a/src/lib/components/chat/MessageInput/CallOverlay.svelte
+++ b/src/lib/components/chat/MessageInput/CallOverlay.svelte
@@ -2,7 +2,12 @@
 
															 	import { config, settings, showCallOverlay } from '$lib/stores';
														
 
															 	import { onMount, tick, getContext } from 'svelte';
														
 
															-	import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
														
 
															+	import {
														
 
															+		blobToFile,
														
 
															+		calculateSHA256,
														
 
															+		extractSentencesForAudio,
														
 
															+		findWordIndices
														
 
															+	} from '$lib/utils';
														
 
															 	import { generateEmoji } from '$lib/apis';
														
 
															 	import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
														
@@ -32,34 +37,7 @@
 
															 	let camera = false;
														
 
															 	let cameraStream = null;
														
 
															-	let assistantSpeaking = false;
														
 
															-
														
 
															 	let chatStreaming = false;
														
 
															-	let assistantMessage = '';
														
 
															-	let assistantSentences = [];
														
 
															-	let assistantSentenceAudios = {};
														
 
															-	let assistantSentenceIdx = -1;
														
 
															-
														
 
															-	let audioQueue = [];
														
 
															-	let emojiQueue = [];
														
 
															-
														
 
															-	$: assistantSentences = extractSentences(assistantMessage).reduce((mergedTexts, currentText) => {
														
 
															-		const lastIndex = mergedTexts.length - 1;
														
 
															-		if (lastIndex >= 0) {
														
 
															-			const previousText = mergedTexts[lastIndex];
														
 
															-			const wordCount = previousText.split(/\s+/).length;
														
 
															-			if (wordCount < 2) {
														
 
															-				mergedTexts[lastIndex] = previousText + ' ' + currentText;
														
 
															-			} else {
														
 
															-				mergedTexts.push(currentText);
														
 
															-			}
														
 
															-		} else {
														
 
															-			mergedTexts.push(currentText);
														
 
															-		}
														
 
															-		return mergedTexts;
														
 
															-	}, []);
														
 
															-
														
 
															-	let currentUtterance = null;
														
 
															 	let rmsLevel = 0;
														
 
															 	let hasStartedSpeaking = false;
														
@@ -170,6 +148,88 @@
 
															 	const MIN_DECIBELS = -45;
														
 
															 	const VISUALIZER_BUFFER_LENGTH = 300;
														
 
															+	const transcribeHandler = async (audioBlob) => {
														
 
															+		// Create a blob from the audio chunks
														
 
															+
														
 
															+		await tick();
														
 
															+		const file = blobToFile(audioBlob, 'recording.wav');
														
 
															+
														
 
															+		const res = await transcribeAudio(localStorage.token, file).catch((error) => {
														
 
															+			toast.error(error);
														
 
															+			return null;
														
 
															+		});
														
 
															+
														
 
															+		if (res) {
														
 
															+			console.log(res.text);
														
 
															+
														
 
															+			if (res.text !== '') {
														
 
															+				const _responses = await submitPrompt(res.text, { _raw: true });
														
 
															+				console.log(_responses);
														
 
															+			}
														
 
															+		}
														
 
															+	};
														
 
															+
														
 
															+	const stopRecordingCallback = async (_continue = true) => {
														
 
															+		if ($showCallOverlay) {
														
 
															+			console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
														
 
															+
														
 
															+			// deep copy the audioChunks array
														
 
															+			const _audioChunks = audioChunks.slice(0);
														
 
															+
														
 
															+			audioChunks = [];
														
 
															+			mediaRecorder = false;
														
 
															+
														
 
															+			if (_continue) {
														
 
															+				startRecording();
														
 
															+			}
														
 
															+
														
 
															+			if (confirmed) {
														
 
															+				loading = true;
														
 
															+				emoji = null;
														
 
															+
														
 
															+				if (cameraStream) {
														
 
															+					const imageUrl = takeScreenshot();
														
 
															+
														
 
															+					files = [
														
 
															+						{
														
 
															+							type: 'image',
														
 
															+							url: imageUrl
														
 
															+						}
														
 
															+					];
														
 
															+				}
														
 
															+
														
 
															+				const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
														
 
															+				await transcribeHandler(audioBlob);
														
 
															+
														
 
															+				confirmed = false;
														
 
															+				loading = false;
														
 
															+			}
														
 
															+		} else {
														
 
															+			audioChunks = [];
														
 
															+			mediaRecorder = false;
														
 
															+		}
														
 
															+	};
														
 
															+
														
 
															+	const startRecording = async () => {
														
 
															+		const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
														
 
															+		mediaRecorder = new MediaRecorder(stream);
														
 
															+		mediaRecorder.onstart = () => {
														
 
															+			console.log('Recording started');
														
 
															+			audioChunks = [];
														
 
															+			analyseAudio(stream);
														
 
															+		};
														
 
															+		mediaRecorder.ondataavailable = (event) => {
														
 
															+			if (hasStartedSpeaking) {
														
 
															+				audioChunks.push(event.data);
														
 
															+			}
														
 
															+		};
														
 
															+		mediaRecorder.onstop = async () => {
														
 
															+			console.log('Recording stopped');
														
 
															+			await stopRecordingCallback();
														
 
															+		};
														
 
															+		mediaRecorder.start();
														
 
															+	};
														
 
															+
														
 
															 	// Function to calculate the RMS level from time domain data
														
 
															 	const calculateRMS = (data: Uint8Array) => {
														
 
															 		let sumSquares = 0;
														
@@ -211,12 +271,15 @@
 
															 				// Check if initial speech/noise has started
														
 
															 				const hasSound = domainData.some((value) => value > 0);
														
 
															 				if (hasSound) {
														
 
															-					hasStartedSpeaking = true;
														
 
															-					lastSoundTime = Date.now();
														
 
															-
														
 
															 					// BIG RED TEXT
														
 
															 					console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
														
 
															-					stopAllAudio();
														
 
															+
														
 
															+					if (!hasStartedSpeaking) {
														
 
															+						hasStartedSpeaking = true;
														
 
															+						stopAllAudio();
														
 
															+					}
														
 
															+
														
 
															+					lastSoundTime = Date.now();
														
 
															 				}
														
 
															 				// Start silence detection only after initial speech/noise has been detected
														
@@ -239,52 +302,9 @@
 
															 		detectSound();
														
 
															 	};
														
 
															-	const transcribeHandler = async (audioBlob) => {
														
 
															-		// Create a blob from the audio chunks
														
 
															-
														
 
															-		await tick();
														
 
															-		const file = blobToFile(audioBlob, 'recording.wav');
														
 
															-
														
 
															-		const res = await transcribeAudio(localStorage.token, file).catch((error) => {
														
 
															-			toast.error(error);
														
 
															-			return null;
														
 
															-		});
														
 
															-
														
 
															-		if (res) {
														
 
															-			console.log(res.text);
														
 
															-
														
 
															-			if (res.text !== '') {
														
 
															-				const _responses = await submitPrompt(res.text, { _raw: true });
														
 
															-				console.log(_responses);
														
 
															-			}
														
 
															-		}
														
 
															-	};
														
 
															-
														
 
															-	const stopAllAudio = async () => {
														
 
															-		interrupted = true;
														
 
															-
														
 
															-		if (chatStreaming) {
														
 
															-			stopResponse();
														
 
															-		}
														
 
															-
														
 
															-		if (currentUtterance) {
														
 
															-			speechSynthesis.cancel();
														
 
															-			currentUtterance = null;
														
 
															-		}
														
 
															-
														
 
															-		await tick();
														
 
															-		emojiQueue = [];
														
 
															-		audioQueue = [];
														
 
															-		await tick();
														
 
															-
														
 
															-		const audioElement = document.getElementById('audioElement');
														
 
															-		if (audioElement) {
														
 
															-			audioElement.pause();
														
 
															-			audioElement.currentTime = 0;
														
 
															-		}
														
 
															-
														
 
															-		assistantSpeaking = false;
														
 
															-	};
														
 
															+	let finishedMessages = {};
														
 
															+	let currentMessageId = null;
														
 
															+	let currentUtterance = null;
														
 
															 	const speakSpeechSynthesisHandler = (content) => {
														
 
															 		if ($showCallOverlay) {
														
@@ -350,246 +370,175 @@
 
															 		}
														
 
															 	};
														
 
															-	const playAudioHandler = async () => {
														
 
															-		console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0);
														
 
															-		if (!assistantSpeaking && !interrupted && audioQueue.length > 0) {
														
 
															-			assistantSpeaking = true;
														
 
															-
														
 
															-			if ($settings?.showEmojiInCall ?? false) {
														
 
															-				if (emojiQueue.length > 0) {
														
 
															-					emoji = emojiQueue.shift();
														
 
															-					emojiQueue = emojiQueue;
														
 
															-				}
														
 
															-			}
														
 
															+	const stopAllAudio = async () => {
														
 
															+		interrupted = true;
														
 
															-			const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing.
														
 
															-			audioQueue = audioQueue;
														
 
															-			await playAudio(audioToPlay);
														
 
															-			assistantSpeaking = false;
														
 
															+		if (chatStreaming) {
														
 
															+			stopResponse();
														
 
															 		}
														
 
															-	};
														
 
															-	const setContentAudio = async (content, idx) => {
														
 
															-		if (assistantSentenceAudios[idx] === undefined) {
														
 
															-			// Wait for the previous audio to be loaded
														
 
															-			if (idx > 0) {
														
 
															-				await new Promise((resolve) => {
														
 
															-					const check = setInterval(() => {
														
 
															-						if (
														
 
															-							assistantSentenceAudios[idx - 1] !== undefined &&
														
 
															-							assistantSentenceAudios[idx - 1] !== null
														
 
															-						) {
														
 
															-							clearInterval(check);
														
 
															-							resolve();
														
 
															-						}
														
 
															-					}, 100);
														
 
															-				});
														
 
															-			}
														
 
															+		if (currentUtterance) {
														
 
															+			speechSynthesis.cancel();
														
 
															+			currentUtterance = null;
														
 
															+		}
														
 
															-			assistantSentenceAudios[idx] = null;
														
 
															+		const audioElement = document.getElementById('audioElement');
														
 
															+		if (audioElement) {
														
 
															+			audioElement.pause();
														
 
															+			audioElement.currentTime = 0;
														
 
															+		}
														
 
															+	};
														
 
															-			if ($settings?.showEmojiInCall ?? false) {
														
 
															-				const sentenceEmoji = await generateEmoji(localStorage.token, modelId, content);
														
 
															+	let audioAbortController = new AbortController();
														
 
															-				if (sentenceEmoji) {
														
 
															-					// Big red text with content and emoji
														
 
															-					console.log('%c%s', 'color: blue; font-size: 10px;', `${sentenceEmoji}: ${content}`);
														
 
															+	// Audio cache map where key is the content and value is the Audio object.
														
 
															+	const audioCache = new Map();
														
 
															+	const fetchAudio = async (content) => {
														
 
															+		if (!audioCache.has(content)) {
														
 
															+			try {
														
 
															+				const res = await synthesizeOpenAISpeech(
														
 
															+					localStorage.token,
														
 
															+					$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
														
 
															+					content
														
 
															+				).catch((error) => {
														
 
															+					console.error(error);
														
 
															+					return null;
														
 
															+				});
														
 
															-					if (/\p{Extended_Pictographic}/u.test(sentenceEmoji)) {
														
 
															-						emojiQueue.push(sentenceEmoji.match(/\p{Extended_Pictographic}/gu)[0]);
														
 
															-						emojiQueue = emojiQueue;
														
 
															-					}
														
 
															+				if (res) {
														
 
															+					const blob = await res.blob();
														
 
															+					const blobUrl = URL.createObjectURL(blob);
														
 
															+					audioCache.set(content, new Audio(blobUrl));
														
 
															 				}
														
 
															-
														
 
															-				await tick();
														
 
															-			}
														
 
															-
														
 
															-			const res = await synthesizeOpenAISpeech(
														
 
															-				localStorage.token,
														
 
															-				$settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
														
 
															-				content
														
 
															-			).catch((error) => {
														
 
															-				toast.error(error);
														
 
															-				assistantSpeaking = false;
														
 
															-				return null;
														
 
															-			});
														
 
															-
														
 
															-			if (res) {
														
 
															-				const blob = await res.blob();
														
 
															-				const blobUrl = URL.createObjectURL(blob);
														
 
															-				const audio = new Audio(blobUrl);
														
 
															-				assistantSentenceAudios[idx] = audio;
														
 
															-
														
 
															-				console.log('%c%s', 'color: red; font-size: 20px;', content);
														
 
															-
														
 
															-				audioQueue.push(audio);
														
 
															-				audioQueue = audioQueue;
														
 
															+			} catch (error) {
														
 
															+				console.error('Error synthesizing speech:', error);
														
 
															 			}
														
 
															 		}
														
 
															+		return audioCache.get(content);
														
 
															 	};
														
 
															-	const stopRecordingCallback = async (_continue = true) => {
														
 
															-		if ($showCallOverlay) {
														
 
															-			console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
														
 
															-
														
 
															-			// deep copy the audioChunks array
														
 
															-			const _audioChunks = audioChunks.slice(0);
														
 
															-
														
 
															-			audioChunks = [];
														
 
															-			mediaRecorder = false;
														
 
															-
														
 
															-			if (_continue) {
														
 
															-				startRecording();
														
 
															-			}
														
 
															-
														
 
															-			if (confirmed) {
														
 
															-				loading = true;
														
 
															-				emoji = null;
														
 
															-
														
 
															-				if (cameraStream) {
														
 
															-					const imageUrl = takeScreenshot();
														
 
															-
														
 
															-					files = [
														
 
															-						{
														
 
															-							type: 'image',
														
 
															-							url: imageUrl
														
 
															-						}
														
 
															-					];
														
 
															+	let messages = {};
														
 
															+
														
 
															+	const monitorAndPlayAudio = async (id, signal) => {
														
 
															+		while (!signal.aborted) {
														
 
															+			if (messages[id] && messages[id].length > 0) {
														
 
															+				// Retrieve the next content string from the queue
														
 
															+				const content = messages[id].shift(); // Dequeues the content for playing
														
 
															+
														
 
															+				if (audioCache.has(content)) {
														
 
															+					// If content is available in the cache, play it
														
 
															+					try {
														
 
															+						console.log(
														
 
															+							'%c%s',
														
 
															+							'color: red; font-size: 20px;',
														
 
															+							`Playing audio for content: ${content}`
														
 
															+						);
														
 
															+
														
 
															+						const audio = audioCache.get(content);
														
 
															+						await playAudio(audio); // Here ensure that playAudio is indeed correct method to execute
														
 
															+						console.log(`Played audio for content: ${content}`);
														
 
															+						await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
														
 
															+					} catch (error) {
														
 
															+						console.error('Error playing audio:', error);
														
 
															+					}
														
 
															+				} else {
														
 
															+					// If not available in the cache, push it back to the queue and delay
														
 
															+					messages[id].unshift(content); // Re-queue the content at the start
														
 
															+					console.log(`Audio for "${content}" not yet available in the cache, re-queued...`);
														
 
															+					await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
														
 
															 				}
														
 
															-
														
 
															-				const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
														
 
															-				await transcribeHandler(audioBlob);
														
 
															-
														
 
															-				confirmed = false;
														
 
															-				loading = false;
														
 
															+			} else if (finishedMessages[id] && messages[id] && messages[id].length === 0) {
														
 
															+				// If the message is finished and there are no more messages to process, break the loop
														
 
															+				break;
														
 
															+			} else {
														
 
															+				// No messages to process, sleep for a bit
														
 
															+				await new Promise((resolve) => setTimeout(resolve, 200));
														
 
															 			}
														
 
															-		} else {
														
 
															-			audioChunks = [];
														
 
															-			mediaRecorder = false;
														
 
															 		}
														
 
															+		console.log(`Audio monitoring and playing stopped for message ID ${id}`);
														
 
															 	};
														
 
															-	const startRecording = async () => {
														
 
															-		const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
														
 
															-		mediaRecorder = new MediaRecorder(stream);
														
 
															-		mediaRecorder.onstart = () => {
														
 
															-			console.log('Recording started');
														
 
															-			audioChunks = [];
														
 
															-			analyseAudio(stream);
														
 
															-		};
														
 
															-		mediaRecorder.ondataavailable = (event) => {
														
 
															-			if (hasStartedSpeaking) {
														
 
															-				audioChunks.push(event.data);
														
 
															-			}
														
 
															-		};
														
 
															-		mediaRecorder.onstop = async () => {
														
 
															-			console.log('Recording stopped');
														
 
															-			await stopRecordingCallback();
														
 
															-		};
														
 
															-		mediaRecorder.start();
														
 
															-	};
														
 
															+	onMount(async () => {
														
 
															+		startRecording();
														
 
															-	const resetAssistantMessage = async () => {
														
 
															-		interrupted = false;
														
 
															+		const chatStartHandler = async (e) => {
														
 
															+			const { id } = e.detail;
														
 
															-		assistantMessage = '';
														
 
															-		assistantSentenceIdx = -1;
														
 
															-		assistantSentenceAudios = {}; // Reset audio tracking
														
 
															-		audioQueue = []; // Clear the audio queue
														
 
															-		audioQueue = audioQueue;
														
 
															+			chatStreaming = true;
														
 
															-		emoji = null;
														
 
															-		emojiQueue = [];
														
 
															-		emojiQueue = emojiQueue;
														
 
															-	};
														
 
															+			if ($config.audio.tts.engine !== '') {
														
 
															+				// set currentMessageId to id
														
 
															+				if (currentMessageId !== id) {
														
 
															+					console.log(`Received chat start event for message ID ${id}`);
														
 
															-	$: (async () => {
														
 
															-		if ($showCallOverlay) {
														
 
															-			await resetAssistantMessage();
														
 
															-			await tick();
														
 
															-			startRecording();
														
 
															-		} else {
														
 
															-			stopCamera();
														
 
															-			stopAllAudio();
														
 
															-			stopRecordingCallback(false);
														
 
															-		}
														
 
															-	})();
														
 
															+					currentMessageId = id;
														
 
															+					if (audioAbortController) {
														
 
															+						audioAbortController.abort();
														
 
															+					}
														
 
															+					audioAbortController = new AbortController();
														
 
															-	$: {
														
 
															-		if (audioQueue.length > 0 && !assistantSpeaking) {
														
 
															-			playAudioHandler();
														
 
															-		}
														
 
															-	}
														
 
															-
														
 
															-	onMount(() => {
														
 
															-		eventTarget.addEventListener('chat:start', async (e) => {
														
 
															-			if ($showCallOverlay) {
														
 
															-				console.log('Chat start event:', e);
														
 
															-				await resetAssistantMessage();
														
 
															-				await tick();
														
 
															-				chatStreaming = true;
														
 
															+					// Start monitoring and playing audio for the message ID
														
 
															+					monitorAndPlayAudio(id, audioAbortController.signal);
														
 
															+				}
														
 
															 			}
														
 
															-		});
														
 
															+		};
														
 
															-		eventTarget.addEventListener('chat', async (e) => {
														
 
															-			if ($showCallOverlay) {
														
 
															-				const { content } = e.detail;
														
 
															-				assistantMessage += content;
														
 
															-				await tick();
														
 
															+		const chatEventHandler = async (e) => {
														
 
															+			const { id, content } = e.detail;
														
 
															+			// "id" here is message id
														
 
															+			// if "id" is not the same as "currentMessageId" then do not process
														
 
															+			// "content" here is a sentence from the assistant,
														
 
															+			// there will be many sentences for the same "id"
														
 
															+
														
 
															+			if ($config.audio.tts.engine !== '') {
														
 
															+				if (currentMessageId === id) {
														
 
															+					console.log(`Received chat event for message ID ${id}: ${content}`);
														
 
															+
														
 
															+					try {
														
 
															+						if (messages[id] === undefined) {
														
 
															+							messages[id] = [content];
														
 
															+						} else {
														
 
															+							messages[id].push(content);
														
 
															+						}
														
 
															-				if (!interrupted) {
														
 
															-					if ($config.audio.tts.engine !== '') {
														
 
															-						assistantSentenceIdx = assistantSentences.length - 2;
														
 
															+						console.log(content);
														
 
															-						if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
														
 
															-							await tick();
														
 
															-							setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
														
 
															-						}
														
 
															+						fetchAudio(content);
														
 
															+					} catch (error) {
														
 
															+						console.error('Failed to fetch or play audio:', error);
														
 
															 					}
														
 
															 				}
														
 
															-
														
 
															-				chatStreaming = true;
														
 
															 			}
														
 
															-		});
														
 
															-
														
 
															-		eventTarget.addEventListener('chat:finish', async (e) => {
														
 
															-			if ($showCallOverlay) {
														
 
															-				chatStreaming = false;
														
 
															-				loading = false;
														
 
															+		};
														
 
															-				console.log('Chat finish event:', e);
														
 
															-				await tick();
														
 
															+		const chatFinishHandler = async (e) => {
														
 
															+			const { id, content } = e.detail;
														
 
															+			// "content" here is the entire message from the assistant
														
 
															-				if (!interrupted) {
														
 
															-					if ($config.audio.tts.engine !== '') {
														
 
															-						for (const [idx, sentence] of assistantSentences.entries()) {
														
 
															-							if (!assistantSentenceAudios[idx]) {
														
 
															-								await tick();
														
 
															-								setContentAudio(sentence, idx);
														
 
															-							}
														
 
															-						}
														
 
															-					} else {
														
 
															-						if ($settings?.showEmojiInCall ?? false) {
														
 
															-							const res = await generateEmoji(localStorage.token, modelId, assistantMessage);
														
 
															-
														
 
															-							if (res) {
														
 
															-								console.log(res);
														
 
															-								if (/\p{Extended_Pictographic}/u.test(res)) {
														
 
															-									emoji = res.match(/\p{Extended_Pictographic}/gu)[0];
														
 
															-								}
														
 
															-							}
														
 
															-						}
														
 
															+			chatStreaming = false;
														
 
															-						speakSpeechSynthesisHandler(assistantMessage);
														
 
															-					}
														
 
															-				}
														
 
															+			if ($config.audio.tts.engine !== '') {
														
 
															+				finishedMessages[id] = true;
														
 
															+			} else {
														
 
															+				speakSpeechSynthesisHandler(content);
														
 
															 			}
														
 
															-		});
														
 
															+		};
														
 
															+
														
 
															+		eventTarget.addEventListener('chat:start', chatStartHandler);
														
 
															+		eventTarget.addEventListener('chat', chatEventHandler);
														
 
															+		eventTarget.addEventListener('chat:finish', chatFinishHandler);
														
 
															+
														
 
															+		return async () => {
														
 
															+			eventTarget.removeEventListener('chat:start', chatStartHandler);
														
 
															+			eventTarget.removeEventListener('chat', chatEventHandler);
														
 
															+			eventTarget.removeEventListener('chat:finish', chatFinishHandler);
														
 
															+
														
 
															+			await stopRecordingCallback(false);
														
 
															+			await stopCamera();
														
 
															+		};
														
 
															 	});
														
 
															 </script>
														
 
															-<audio id="audioElement" src="" style="display: none;" />
														
 
															-
														
 
															 {#if $showCallOverlay}
														
 
															 	<div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
														
 
															 		<div
														
--- a/src/lib/utils/index.ts
+++ b/src/lib/utils/index.ts
@@ -443,6 +443,24 @@ export const extractSentences = (text) => {
 
															 		.filter((sentence) => sentence !== '');
														
 
															 };
														
 
															+export const extractSentencesForAudio = (text) => {
														
 
															+	return extractSentences(text).reduce((mergedTexts, currentText) => {
														
 
															+		const lastIndex = mergedTexts.length - 1;
														
 
															+		if (lastIndex >= 0) {
														
 
															+			const previousText = mergedTexts[lastIndex];
														
 
															+			const wordCount = previousText.split(/\s+/).length;
														
 
															+			if (wordCount < 2) {
														
 
															+				mergedTexts[lastIndex] = previousText + ' ' + currentText;
														
 
															+			} else {
														
 
															+				mergedTexts.push(currentText);
														
 
															+			}
														
 
															+		} else {
														
 
															+			mergedTexts.push(currentText);
														
 
															+		}
														
 
															+		return mergedTexts;
														
 
															+	}, []);
														
 
															+};
														
 
															+
														
 
															 export const blobToFile = (blob, fileName) => {
														
 
															 	// Create a new File object from the Blob
														
 
															 	const file = new File([blob], fileName, { type: blob.type });