1 年之前 · f245c62692
--- a/src/lib/components/chat/MessageInput.svelte
+++ b/src/lib/components/chat/MessageInput.svelte
@@ -35,7 +35,6 @@
 
															 	export let fileUploadEnabled = true;
														
 
															 	export let speechRecognitionEnabled = true;
														
 
															-	export let speechRecognitionListening = false;
														
 
															 	export let prompt = '';
														
 
															 	export let messages = [];
														
@@ -51,62 +50,170 @@
 
															 		}
														
 
															 	}
														
 
															+	let mediaRecorder;
														
 
															+	let audioChunks = [];
														
 
															+	let isRecording = false;
														
 
															+	const MIN_DECIBELS = -45;
														
 
															+
														
 
															+	const startRecording = async () => {
														
 
															+		const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
														
 
															+		mediaRecorder = new MediaRecorder(stream);
														
 
															+		mediaRecorder.onstart = () => {
														
 
															+			isRecording = true;
														
 
															+			console.log('Recording started');
														
 
															+		};
														
 
															+		mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
														
 
															+		mediaRecorder.onstop = async () => {
														
 
															+			isRecording = false;
														
 
															+			console.log('Recording stopped');
														
 
															+
														
 
															+			// Create a blob from the audio chunks
														
 
															+			const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
														
 
															+
														
 
															+			const file = blobToFile(audioBlob, 'recording.wav');
														
 
															+
														
 
															+			const res = await transcribeAudio(localStorage.token, file).catch((error) => {
														
 
															+				toast.error(error);
														
 
															+				return null;
														
 
															+			});
														
 
															+
														
 
															+			if (res) {
														
 
															+				prompt = res.text;
														
 
															+				await tick();
														
 
															+
														
 
															+				const inputElement = document.getElementById('chat-textarea');
														
 
															+				inputElement?.focus();
														
 
															+
														
 
															+				if (prompt !== '' && $settings?.speechAutoSend === true) {
														
 
															+					submitPrompt(prompt, user);
														
 
															+				}
														
 
															+			}
														
 
															+
														
 
															+			// saveRecording(audioBlob);
														
 
															+			audioChunks = [];
														
 
															+		};
														
 
															+
														
 
															+		// Start recording
														
 
															+		mediaRecorder.start();
														
 
															+
														
 
															+		// Monitor silence
														
 
															+		monitorSilence(stream);
														
 
															+	};
														
 
															+
														
 
															+	const monitorSilence = (stream) => {
														
 
															+		const audioContext = new AudioContext();
														
 
															+		const audioStreamSource = audioContext.createMediaStreamSource(stream);
														
 
															+		const analyser = audioContext.createAnalyser();
														
 
															+		analyser.minDecibels = MIN_DECIBELS;
														
 
															+		audioStreamSource.connect(analyser);
														
 
															+
														
 
															+		const bufferLength = analyser.frequencyBinCount;
														
 
															+		const domainData = new Uint8Array(bufferLength);
														
 
															+
														
 
															+		let lastSoundTime = Date.now();
														
 
															+
														
 
															+		const detectSound = () => {
														
 
															+			analyser.getByteFrequencyData(domainData);
														
 
															+
														
 
															+			if (domainData.some((value) => value > 0)) {
														
 
															+				lastSoundTime = Date.now();
														
 
															+			}
														
 
															+
														
 
															+			if (isRecording && Date.now() - lastSoundTime > 3000) {
														
 
															+				mediaRecorder.stop();
														
 
															+				audioContext.close();
														
 
															+				return;
														
 
															+			}
														
 
															+
														
 
															+			window.requestAnimationFrame(detectSound);
														
 
															+		};
														
 
															+
														
 
															+		window.requestAnimationFrame(detectSound);
														
 
															+	};
														
 
															+
														
 
															+	const saveRecording = (blob) => {
														
 
															+		const url = URL.createObjectURL(blob);
														
 
															+		const a = document.createElement('a');
														
 
															+		document.body.appendChild(a);
														
 
															+		a.style = 'display: none';
														
 
															+		a.href = url;
														
 
															+		a.download = 'recording.wav';
														
 
															+		a.click();
														
 
															+		window.URL.revokeObjectURL(url);
														
 
															+	};
														
 
															+
														
 
															 	const speechRecognitionHandler = () => {
														
 
															 		// Check if SpeechRecognition is supported
														
 
															-		if (speechRecognitionListening) {
														
 
															-			speechRecognition.stop();
														
 
															-		} else {
														
 
															-			if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
														
 
															-				// Create a SpeechRecognition object
														
 
															-				speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
														
 
															-
														
 
															-				// Set continuous to true for continuous recognition
														
 
															-				speechRecognition.continuous = true;
														
 
															-
														
 
															-				// Set the timeout for turning off the recognition after inactivity (in milliseconds)
														
 
															-				const inactivityTimeout = 3000; // 3 seconds
														
 
															-
														
 
															-				let timeoutId;
														
 
															-				// Start recognition
														
 
															-				speechRecognition.start();
														
 
															-				speechRecognitionListening = true;
														
 
															-
														
 
															-				// Event triggered when speech is recognized
														
 
															-				speechRecognition.onresult = function (event) {
														
 
															-					// Clear the inactivity timeout
														
 
															-					clearTimeout(timeoutId);
														
 
															-
														
 
															-					// Handle recognized speech
														
 
															-					console.log(event);
														
 
															-					const transcript = event.results[Object.keys(event.results).length - 1][0].transcript;
														
 
															-					prompt = `${prompt}${transcript}`;
														
 
															-
														
 
															-					// Restart the inactivity timeout
														
 
															-					timeoutId = setTimeout(() => {
														
 
															-						console.log('Speech recognition turned off due to inactivity.');
														
 
															-						speechRecognition.stop();
														
 
															-					}, inactivityTimeout);
														
 
															-				};
														
 
															+		if (isRecording) {
														
 
															+			if (speechRecognition) {
														
 
															+				speechRecognition.stop();
														
 
															+			}
														
 
															-				// Event triggered when recognition is ended
														
 
															-				speechRecognition.onend = function () {
														
 
															-					// Restart recognition after it ends
														
 
															-					console.log('recognition ended');
														
 
															-					speechRecognitionListening = false;
														
 
															-					if (prompt !== '' && $settings?.speechAutoSend === true) {
														
 
															-						submitPrompt(prompt, user);
														
 
															-					}
														
 
															-				};
														
 
															+			if (mediaRecorder) {
														
 
															+				mediaRecorder.stop();
														
 
															+			}
														
 
															+		} else {
														
 
															+			isRecording = true;
														
 
															-				// Event triggered when an error occurs
														
 
															-				speechRecognition.onerror = function (event) {
														
 
															-					console.log(event);
														
 
															-					toast.error(`Speech recognition error: ${event.error}`);
														
 
															-					speechRecognitionListening = false;
														
 
															-				};
														
 
															+			if ($settings?.voice?.STTEngine ?? '' !== '') {
														
 
															+				startRecording();
														
 
															 			} else {
														
 
															-				toast.error('SpeechRecognition API is not supported in this browser.');
														
 
															+				if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
														
 
															+					// Create a SpeechRecognition object
														
 
															+					speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
														
 
															+
														
 
															+					// Set continuous to true for continuous recognition
														
 
															+					speechRecognition.continuous = true;
														
 
															+
														
 
															+					// Set the timeout for turning off the recognition after inactivity (in milliseconds)
														
 
															+					const inactivityTimeout = 3000; // 3 seconds
														
 
															+
														
 
															+					let timeoutId;
														
 
															+					// Start recognition
														
 
															+					speechRecognition.start();
														
 
															+
														
 
															+					// Event triggered when speech is recognized
														
 
															+					speechRecognition.onresult = async (event) => {
														
 
															+						// Clear the inactivity timeout
														
 
															+						clearTimeout(timeoutId);
														
 
															+
														
 
															+						// Handle recognized speech
														
 
															+						console.log(event);
														
 
															+						const transcript = event.results[Object.keys(event.results).length - 1][0].transcript;
														
 
															+
														
 
															+						prompt = `${prompt}${transcript}`;
														
 
															+
														
 
															+						await tick();
														
 
															+						const inputElement = document.getElementById('chat-textarea');
														
 
															+						inputElement?.focus();
														
 
															+
														
 
															+						// Restart the inactivity timeout
														
 
															+						timeoutId = setTimeout(() => {
														
 
															+							console.log('Speech recognition turned off due to inactivity.');
														
 
															+							speechRecognition.stop();
														
 
															+						}, inactivityTimeout);
														
 
															+					};
														
 
															+
														
 
															+					// Event triggered when recognition is ended
														
 
															+					speechRecognition.onend = function () {
														
 
															+						// Restart recognition after it ends
														
 
															+						console.log('recognition ended');
														
 
															+						isRecording = false;
														
 
															+						if (prompt !== '' && $settings?.speechAutoSend === true) {
														
 
															+							submitPrompt(prompt, user);
														
 
															+						}
														
 
															+					};
														
 
															+
														
 
															+					// Event triggered when an error occurs
														
 
															+					speechRecognition.onerror = function (event) {
														
 
															+						console.log(event);
														
 
															+						toast.error(`Speech recognition error: ${event.error}`);
														
 
															+						isRecording = false;
														
 
															+					};
														
 
															+				} else {
														
 
															+					toast.error('SpeechRecognition API is not supported in this browser.');
														
 
															+				}
														
 
															 			}
														
 
															 		}
														
 
															 	};
														
@@ -550,7 +657,7 @@
 
															 								: ' pl-4'} rounded-xl resize-none h-[48px]"
														
 
															 							placeholder={chatInputPlaceholder !== ''
														
 
															 								? chatInputPlaceholder
														
 
															-								: speechRecognitionListening
														
 
															+								: isRecording
														
 
															 								? 'Listening...'
														
 
															 								: 'Send a message'}
														
 
															 							bind:value={prompt}
														
@@ -659,6 +766,10 @@
 
															 								e.target.style.height = Math.min(e.target.scrollHeight, 200) + 'px';
														
 
															 								user = null;
														
 
															 							}}
														
 
															+							on:focus={(e) => {
														
 
															+								e.target.style.height = '';
														
 
															+								e.target.style.height = Math.min(e.target.scrollHeight, 200) + 'px';
														
 
															+							}}
														
 
															 							on:paste={(e) => {
														
 
															 								const clipboardData = e.clipboardData || window.clipboardData;
														
@@ -696,7 +807,7 @@
 
															 											speechRecognitionHandler();
														
 
															 										}}
														
 
															 									>
														
 
															-										{#if speechRecognitionListening}
														
 
															+										{#if isRecording}
														
 
															 											<svg
														
 
															 												class=" w-5 h-5 translate-y-[0.5px]"
														
 
															 												fill="currentColor"
														
--- a/src/lib/components/chat/Messages/ResponseMessage.svelte
+++ b/src/lib/components/chat/Messages/ResponseMessage.svelte
@@ -148,7 +148,7 @@
 
															 		} else {
														
 
															 			speaking = true;
														
 
															-			if ($settings?.speech?.engine === 'openai') {
														
 
															+			if ($settings?.voice?.TTSEngine === 'openai') {
														
 
															 				loadingSpeech = true;
														
 
															 				const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
														
@@ -179,7 +179,7 @@
 
															 				for (const [idx, sentence] of sentences.entries()) {
														
 
															 					const res = await synthesizeOpenAISpeech(
														
 
															 						localStorage.token,
														
 
															-						$settings?.speech?.speaker,
														
 
															+						$settings?.voice?.speaker,
														
 
															 						sentence
														
 
															 					).catch((error) => {
														
 
															 						toast.error(error);
														
@@ -204,7 +204,7 @@
 
															 						clearInterval(getVoicesLoop);
														
 
															 						const voice =
														
 
															-							voices?.filter((v) => v.name === $settings?.speech?.speaker)?.at(0) ?? undefined;
														
 
															+							voices?.filter((v) => v.name === $settings?.voice?.speaker)?.at(0) ?? undefined;
														
 
															 						const speak = new SpeechSynthesisUtterance(message.content);
														
--- a/src/lib/components/chat/Settings/Voice.svelte
+++ b/src/lib/components/chat/Settings/Voice.svelte
@@ -1,17 +1,21 @@
 
															 <script lang="ts">
														
 
															 	import { createEventDispatcher, onMount } from 'svelte';
														
 
															+	import toast from 'svelte-french-toast';
														
 
															 	const dispatch = createEventDispatcher();
														
 
															 	export let saveSettings: Function;
														
 
															 	// Voice
														
 
															+	let STTEngines = ['', 'openai'];
														
 
															+	let STTEngine = '';
														
 
															+
														
 
															 	let conversationMode = false;
														
 
															 	let speechAutoSend = false;
														
 
															 	let responseAutoPlayback = false;
														
 
															-	let engines = ['', 'openai'];
														
 
															-	let engine = '';
														
 
															+	let TTSEngines = ['', 'openai'];
														
 
															+	let TTSEngine = '';
														
 
															 	let voices = [];
														
 
															 	let speaker = '';
														
@@ -70,10 +74,11 @@
 
															 		speechAutoSend = settings.speechAutoSend ?? false;
														
 
															 		responseAutoPlayback = settings.responseAutoPlayback ?? false;
														
 
															-		engine = settings?.speech?.engine ?? '';
														
 
															-		speaker = settings?.speech?.speaker ?? '';
														
 
															+		STTEngine = settings?.voice?.STTEngine ?? '';
														
 
															+		TTSEngine = settings?.voice?.TTSEngine ?? '';
														
 
															+		speaker = settings?.voice?.speaker ?? '';
														
 
															-		if (engine === 'openai') {
														
 
															+		if (TTSEngine === 'openai') {
														
 
															 			getOpenAIVoices();
														
 
															 		} else {
														
 
															 			getWebAPIVoices();
														
@@ -85,37 +90,37 @@
 
															 	class="flex flex-col h-full justify-between space-y-3 text-sm"
														
 
															 	on:submit|preventDefault={() => {
														
 
															 		saveSettings({
														
 
															-			speech: {
														
 
															-				engine: engine !== '' ? engine : undefined,
														
 
															+			voice: {
														
 
															+				STTEngine: STTEngine !== '' ? STTEngine : undefined,
														
 
															+				TTSEngine: TTSEngine !== '' ? TTSEngine : undefined,
														
 
															 				speaker: speaker !== '' ? speaker : undefined
														
 
															 			}
														
 
															 		});
														
 
															 		dispatch('save');
														
 
															 	}}
														
 
															 >
														
 
															-	<div class=" space-y-3">
														
 
															+	<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-80">
														
 
															 		<div>
														
 
															-			<div class=" mb-1 text-sm font-medium">TTS Settings</div>
														
 
															+			<div class=" mb-1 text-sm font-medium">STT Settings</div>
														
 
															 			<div class=" py-0.5 flex w-full justify-between">
														
 
															-				<div class=" self-center text-xs font-medium">Speech Engine</div>
														
 
															+				<div class=" self-center text-xs font-medium">Speech-to-Text Engine</div>
														
 
															 				<div class="flex items-center relative">
														
 
															 					<select
														
 
															 						class="w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
														
 
															-						bind:value={engine}
														
 
															+						bind:value={STTEngine}
														
 
															 						placeholder="Select a mode"
														
 
															 						on:change={(e) => {
														
 
															-							if (e.target.value === 'openai') {
														
 
															-								getOpenAIVoices();
														
 
															-								speaker = 'alloy';
														
 
															-							} else {
														
 
															-								getWebAPIVoices();
														
 
															-								speaker = '';
														
 
															+							if (e.target.value !== '') {
														
 
															+								navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
														
 
															+									toast.error(`Permission denied when accessing microphone: ${err}`);
														
 
															+									STTEngine = '';
														
 
															+								});
														
 
															 							}
														
 
															 						}}
														
 
															 					>
														
 
															 						<option value="">Default (Web API)</option>
														
 
															-						<option value="openai">Open AI</option>
														
 
															+						<option value="whisper-local">Whisper (Local)</option>
														
 
															 					</select>
														
 
															 				</div>
														
 
															 			</div>
														
@@ -155,6 +160,33 @@
 
															 					{/if}
														
 
															 				</button>
														
 
															 			</div>
														
 
															+		</div>
														
 
															+
														
 
															+		<div>
														
 
															+			<div class=" mb-1 text-sm font-medium">TTS Settings</div>
														
 
															+
														
 
															+			<div class=" py-0.5 flex w-full justify-between">
														
 
															+				<div class=" self-center text-xs font-medium">Text-to-Speech Engine</div>
														
 
															+				<div class="flex items-center relative">
														
 
															+					<select
														
 
															+						class="w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
														
 
															+						bind:value={TTSEngine}
														
 
															+						placeholder="Select a mode"
														
 
															+						on:change={(e) => {
														
 
															+							if (e.target.value === 'openai') {
														
 
															+								getOpenAIVoices();
														
 
															+								speaker = 'alloy';
														
 
															+							} else {
														
 
															+								getWebAPIVoices();
														
 
															+								speaker = '';
														
 
															+							}
														
 
															+						}}
														
 
															+					>
														
 
															+						<option value="">Default (Web API)</option>
														
 
															+						<option value="openai">Open AI</option>
														
 
															+					</select>
														
 
															+				</div>
														
 
															+			</div>
														
 
															 			<div class=" py-0.5 flex w-full justify-between">
														
 
															 				<div class=" self-center text-xs font-medium">Auto-playback response</div>
														
@@ -177,7 +209,7 @@
 
															 		<hr class=" dark:border-gray-700" />
														
 
															-		{#if engine === ''}
														
 
															+		{#if TTSEngine === ''}
														
 
															 			<div>
														
 
															 				<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
														
 
															 				<div class="flex w-full">
														
@@ -196,7 +228,7 @@
 
															 					</div>
														
 
															 				</div>
														
 
															 			</div>
														
 
															-		{:else if engine === 'openai'}
														
 
															+		{:else if TTSEngine === 'openai'}
														
 
															 			<div>
														
 
															 				<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
														
 
															 				<div class="flex w-full">