瀏覽代碼

Merge pull request #704 from ollama-webui/tts

feat: tts optimisation
Timothy Jaeryang Baek 1 年之前
父節點
當前提交
182ab8b8a2
共有 2 個文件被更改,包括 87 次插入29 次删除
  1. 70 29
      src/lib/components/chat/Messages/ResponseMessage.svelte
  2. 17 0
      src/lib/utils/index.ts

+ 70 - 29
src/lib/components/chat/Messages/ResponseMessage.svelte

@@ -15,6 +15,7 @@
 	import CodeBlock from './CodeBlock.svelte';
 
 	import { synthesizeOpenAISpeech } from '$lib/apis/openai';
+	import { extractSentences } from '$lib/utils';
 
 	export let modelfiles = [];
 	export let message;
@@ -35,8 +36,10 @@
 
 	let tooltipInstance = null;
 
-	let audioMap = {};
+	let sentencesAudio = {};
 	let speaking = null;
+	let speakingIdx = null;
+
 	let loadingSpeech = false;
 
 	$: tokens = marked.lexer(message.content);
@@ -116,44 +119,82 @@
 		}
 	};
 
+	const playAudio = (idx) => {
+		return new Promise((res) => {
+			speakingIdx = idx;
+			const audio = sentencesAudio[idx];
+			audio.play();
+			audio.onended = async (e) => {
+				await new Promise((r) => setTimeout(r, 300));
+
+				if (Object.keys(sentencesAudio).length - 1 === idx) {
+					speaking = null;
+				}
+
+				res(e);
+			};
+		});
+	};
+
 	const toggleSpeakMessage = async () => {
 		if (speaking) {
 			speechSynthesis.cancel();
-			speaking = null;
 
-			audioMap[message.id].pause();
-			audioMap[message.id].currentTime = 0;
+			sentencesAudio[speakingIdx].pause();
+			sentencesAudio[speakingIdx].currentTime = 0;
+
+			speaking = null;
+			speakingIdx = null;
 		} else {
 			speaking = true;
 
 			if ($settings?.speech?.engine === 'openai') {
 				loadingSpeech = true;
-				const res = await synthesizeOpenAISpeech(
-					localStorage.token,
-					$settings?.speech?.speaker,
-					message.content
-				).catch((error) => {
-					toast.error(error);
-					return null;
-				});
-
-				if (res) {
-					const blob = await res.blob();
-					const blobUrl = URL.createObjectURL(blob);
-					console.log(blobUrl);
-
-					loadingSpeech = false;
-
-					const audio = new Audio(blobUrl);
-					audioMap[message.id] = audio;
-
-					audio.onended = () => {
-						speaking = null;
-						if ($settings.conversationMode) {
-							document.getElementById('voice-input-button')?.click();
+
+				const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
+					const lastIndex = mergedTexts.length - 1;
+					if (lastIndex >= 0) {
+						const previousText = mergedTexts[lastIndex];
+						const wordCount = previousText.split(/\s+/).length;
+						if (wordCount < 2) {
+							mergedTexts[lastIndex] = previousText + ' ' + currentText;
+						} else {
+							mergedTexts.push(currentText);
 						}
-					};
-					audio.play().catch((e) => console.error('Error playing audio:', e));
+					} else {
+						mergedTexts.push(currentText);
+					}
+					return mergedTexts;
+				}, []);
+
+				console.log(sentences);
+
+				sentencesAudio = sentences.reduce((a, e, i, arr) => {
+					a[i] = null;
+					return a;
+				}, {});
+
+				let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
+
+				for (const [idx, sentence] of sentences.entries()) {
+					const res = await synthesizeOpenAISpeech(
+						localStorage.token,
+						$settings?.speech?.speaker,
+						sentence
+					).catch((error) => {
+						toast.error(error);
+						return null;
+					});
+
+					if (res) {
+						const blob = await res.blob();
+						const blobUrl = URL.createObjectURL(blob);
+						const audio = new Audio(blobUrl);
+						sentencesAudio[idx] = audio;
+						loadingSpeech = false;
+
+						lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
+					}
 				}
 			} else {
 				let voices = [];

+ 17 - 0
src/lib/utils/index.ts

@@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => {
 
 	return url.protocol === 'http:' || url.protocol === 'https:';
 };
+
+export const removeEmojis = (str) => {
+	// Regular expression to match emojis
+	const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
+
+	// Replace emojis with an empty string
+	return str.replace(emojiRegex, '');
+};
+
+export const extractSentences = (text) => {
+	// Split the paragraph into sentences based on common punctuation marks
+	const sentences = text.split(/(?<=[.!?])/);
+
+	return sentences
+		.map((sentence) => removeEmojis(sentence.trim()))
+		.filter((sentence) => sentence !== '');
+};