11 bulan lalu · fc3a31e3d5
--- a/src/lib/components/chat/MessageInput/CallOverlay.svelte
+++ b/src/lib/components/chat/MessageInput/CallOverlay.svelte
@@ -2,8 +2,8 @@
 
				 	import { settings, showCallOverlay } from '$lib/stores';
			
 
				 	import { onMount, tick, getContext } from 'svelte';
			
 
				 
			
 
				-	import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
			
 
				-	import { transcribeAudio } from '$lib/apis/audio';
			
 
				+	import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
			
 
				+	import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
			
 
				 	import { toast } from 'svelte-sonner';
			
 
				 
			
 
				 	const i18n = getContext('i18n');
			
@@ -14,7 +14,8 @@
 
				 	let confirmed = false;
			
 
				 
			
 
				 	let assistantSpeaking = false;
			
 
				-	let assistantAudio = null;
			
 
				+	let assistantAudio = {};
			
 
				+	let assistantAudioIdx = null;
			
 
				 
			
 
				 	let rmsLevel = 0;
			
 
				 	let hasStartedSpeaking = false;
			
@@ -26,6 +27,7 @@
 
				 	let animationFrameId;
			
 
				 
			
 
				 	let speechRecognition;
			
 
				+	let currentUtterance = null;
			
 
				 
			
 
				 	let mediaRecorder;
			
 
				 	let audioChunks = [];
			
@@ -108,14 +110,7 @@
 
				 				// Check if initial speech/noise has started
			
 
				 				const hasSound = domainData.some((value) => value > 0);
			
 
				 				if (hasSound) {
			
 
				-					if (assistantSpeaking) {
			
 
				-						speechSynthesis.cancel();
			
 
				-
			
 
				-						if (assistantAudio) {
			
 
				-							assistantAudio.pause();
			
 
				-							assistantAudio.currentTime = 0;
			
 
				-						}
			
 
				-					}
			
 
				+					stopAllAudio();
			
 
				 					hasStartedSpeaking = true;
			
 
				 					lastSoundTime = Date.now();
			
 
				 				}
			
@@ -140,6 +135,55 @@
 
				 		detectSound();
			
 
				 	};
			
 
				 
			
 
				+	const stopAllAudio = () => {
			
 
				+		if (currentUtterance) {
			
 
				+			speechSynthesis.cancel();
			
 
				+			currentUtterance = null;
			
 
				+		}
			
 
				+		if (assistantAudio[assistantAudioIdx]) {
			
 
				+			assistantAudio[assistantAudioIdx].pause();
			
 
				+			assistantAudio[assistantAudioIdx].currentTime = 0;
			
 
				+		}
			
 
				+		assistantSpeaking = false;
			
 
				+	};
			
 
				+
			
 
				+	const playAudio = (idx) => {
			
 
				+		return new Promise((res) => {
			
 
				+			assistantAudioIdx = idx;
			
 
				+			const audio = assistantAudio[idx];
			
 
				+			audio.play();
			
 
				+			audio.onended = async (e) => {
			
 
				+				await new Promise((r) => setTimeout(r, 300));
			
 
				+
			
 
				+				if (Object.keys(assistantAudio).length - 1 === idx) {
			
 
				+					assistantSpeaking = false;
			
 
				+				}
			
 
				+
			
 
				+				res(e);
			
 
				+			};
			
 
				+		});
			
 
				+	};
			
 
				+
			
 
				+	const getOpenAISpeech = async (text) => {
			
 
				+		const res = await synthesizeOpenAISpeech(
			
 
				+			localStorage.token,
			
 
				+			$settings?.audio?.speaker ?? 'alloy',
			
 
				+			text,
			
 
				+			$settings?.audio?.model ?? 'tts-1'
			
 
				+		).catch((error) => {
			
 
				+			toast.error(error);
			
 
				+			assistantSpeaking = false;
			
 
				+			return null;
			
 
				+		});
			
 
				+
			
 
				+		if (res) {
			
 
				+			const blob = await res.blob();
			
 
				+			const blobUrl = URL.createObjectURL(blob);
			
 
				+			const audio = new Audio(blobUrl);
			
 
				+			assistantAudio = audio;
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				 	const transcribeHandler = async (audioBlob) => {
			
 
				 		// Create a blob from the audio chunks
			
 
				 
			
@@ -152,21 +196,68 @@
 
				 		});
			
 
				 
			
 
				 		if (res) {
			
 
				-			toast.success(res.text);
			
 
				+			console.log(res.text);
			
 
				 
			
 
				 			const _responses = await submitPrompt(res.text);
			
 
				 			console.log(_responses);
			
 
				 
			
 
				 			if (_responses.at(0)) {
			
 
				-				const response = _responses[0];
			
 
				-				if (response) {
			
 
				-					assistantSpeaking = true;
			
 
				+				const content = _responses[0];
			
 
				+				if (content) {
			
 
				+					assistantSpeakingHandler(content);
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	};
			
 
				 
			
 
				-					if ($settings?.audio?.TTSEngine ?? '') {
			
 
				-						speechSynthesis.speak(new SpeechSynthesisUtterance(response));
			
 
				+	const assistantSpeakingHandler = async (content) => {
			
 
				+		assistantSpeaking = true;
			
 
				+
			
 
				+		if (($settings?.audio?.TTSEngine ?? '') == '') {
			
 
				+			currentUtterance = new SpeechSynthesisUtterance(content);
			
 
				+			speechSynthesis.speak(currentUtterance);
			
 
				+		} else if ($settings?.audio?.TTSEngine === 'openai') {
			
 
				+			console.log('openai');
			
 
				+
			
 
				+			const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
			
 
				+				const lastIndex = mergedTexts.length - 1;
			
 
				+				if (lastIndex >= 0) {
			
 
				+					const previousText = mergedTexts[lastIndex];
			
 
				+					const wordCount = previousText.split(/\s+/).length;
			
 
				+					if (wordCount < 2) {
			
 
				+						mergedTexts[lastIndex] = previousText + ' ' + currentText;
			
 
				 					} else {
			
 
				-						console.log('openai');
			
 
				+						mergedTexts.push(currentText);
			
 
				 					}
			
 
				+				} else {
			
 
				+					mergedTexts.push(currentText);
			
 
				+				}
			
 
				+				return mergedTexts;
			
 
				+			}, []);
			
 
				+
			
 
				+			console.log(sentences);
			
 
				+
			
 
				+			let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
			
 
				+
			
 
				+			for (const [idx, sentence] of sentences.entries()) {
			
 
				+				const res = await synthesizeOpenAISpeech(
			
 
				+					localStorage.token,
			
 
				+					$settings?.audio?.speaker,
			
 
				+					sentence,
			
 
				+					$settings?.audio?.model
			
 
				+				).catch((error) => {
			
 
				+					toast.error(error);
			
 
				+
			
 
				+					assistantSpeaking = false;
			
 
				+					return null;
			
 
				+				});
			
 
				+
			
 
				+				if (res) {
			
 
				+					const blob = await res.blob();
			
 
				+					const blobUrl = URL.createObjectURL(blob);
			
 
				+					const audio = new Audio(blobUrl);
			
 
				+					assistantAudio[idx] = audio;
			
 
				+					lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
@@ -311,7 +402,7 @@
 
				 								{#if loading}
			
 
				 									Thinking...
			
 
				 								{:else}
			
 
				-									Listening... {Math.round(rmsLevel * 100)}
			
 
				+									Listening...
			
 
				 								{/if}
			
 
				 							</div>
			
 
				 						</button>