|
@@ -1,11 +1,14 @@
|
|
|
<script lang="ts">
|
|
|
import { toast } from 'svelte-sonner';
|
|
|
import { createEventDispatcher, onMount, getContext } from 'svelte';
|
|
|
+ import { KokoroTTS } from 'kokoro-js';
|
|
|
|
|
|
import { user, settings, config } from '$lib/stores';
|
|
|
import { getVoices as _getVoices } from '$lib/apis/audio';
|
|
|
|
|
|
import Switch from '$lib/components/common/Switch.svelte';
|
|
|
+ import { round } from '@huggingface/transformers';
|
|
|
+ import Spinner from '$lib/components/common/Spinner.svelte';
|
|
|
const dispatch = createEventDispatcher();
|
|
|
|
|
|
const i18n = getContext('i18n');
|
|
@@ -20,6 +23,13 @@
|
|
|
|
|
|
let STTEngine = '';
|
|
|
|
|
|
+ let TTSEngine = '';
|
|
|
+ let TTSEngineConfig = {};
|
|
|
+
|
|
|
+ let TTSModel = null;
|
|
|
+ let TTSModelProgress = null;
|
|
|
+ let TTSModelLoading = false;
|
|
|
+
|
|
|
let voices = [];
|
|
|
let voice = '';
|
|
|
|
|
@@ -28,23 +38,37 @@
|
|
|
const speedOptions = [2, 1.75, 1.5, 1.25, 1, 0.75, 0.5];
|
|
|
|
|
|
const getVoices = async () => {
|
|
|
- if ($config.audio.tts.engine === '') {
|
|
|
- const getVoicesLoop = setInterval(async () => {
|
|
|
- voices = await speechSynthesis.getVoices();
|
|
|
+ if (TTSEngine === 'browser-kokoro') {
|
|
|
+ if (!TTSModel) {
|
|
|
+ await loadKokoro();
|
|
|
+ }
|
|
|
|
|
|
- // do your loop
|
|
|
- if (voices.length > 0) {
|
|
|
- clearInterval(getVoicesLoop);
|
|
|
- }
|
|
|
- }, 100);
|
|
|
- } else {
|
|
|
- const res = await _getVoices(localStorage.token).catch((e) => {
|
|
|
- toast.error(`${e}`);
|
|
|
+ voices = Object.entries(TTSModel.voices).map(([key, value]) => {
|
|
|
+ return {
|
|
|
+ id: key,
|
|
|
+ name: value.name,
|
|
|
+ localService: false
|
|
|
+ };
|
|
|
});
|
|
|
-
|
|
|
- if (res) {
|
|
|
- console.log(res);
|
|
|
- voices = res.voices;
|
|
|
+ } else {
|
|
|
+ if ($config.audio.tts.engine === '') {
|
|
|
+ const getVoicesLoop = setInterval(async () => {
|
|
|
+ voices = await speechSynthesis.getVoices();
|
|
|
+
|
|
|
+ // do your loop
|
|
|
+ if (voices.length > 0) {
|
|
|
+ clearInterval(getVoicesLoop);
|
|
|
+ }
|
|
|
+ }, 100);
|
|
|
+ } else {
|
|
|
+ const res = await _getVoices(localStorage.token).catch((e) => {
|
|
|
+ toast.error(`${e}`);
|
|
|
+ });
|
|
|
+
|
|
|
+ if (res) {
|
|
|
+ console.log(res);
|
|
|
+ voices = res.voices;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
};
|
|
@@ -67,6 +91,9 @@
|
|
|
|
|
|
STTEngine = $settings?.audio?.stt?.engine ?? '';
|
|
|
|
|
|
+ TTSEngine = $settings?.audio?.tts?.engine ?? '';
|
|
|
+ TTSEngineConfig = $settings?.audio?.tts?.engineConfig ?? {};
|
|
|
+
|
|
|
if ($settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice) {
|
|
|
voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
|
|
|
} else {
|
|
@@ -77,6 +104,51 @@
|
|
|
|
|
|
await getVoices();
|
|
|
});
|
|
|
+
|
|
|
+ $: if (TTSEngine && TTSEngineConfig) {
|
|
|
+ onTTSEngineChange();
|
|
|
+ }
|
|
|
+
|
|
|
+ const onTTSEngineChange = async () => {
|
|
|
+ if (TTSEngine === 'browser-kokoro') {
|
|
|
+ await loadKokoro();
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ const loadKokoro = async () => {
|
|
|
+ if (TTSEngine === 'browser-kokoro') {
|
|
|
+ voices = [];
|
|
|
+
|
|
|
+ if (TTSEngineConfig?.dtype) {
|
|
|
+ TTSModel = null;
|
|
|
+ TTSModelProgress = null;
|
|
|
+ TTSModelLoading = true;
|
|
|
+
|
|
|
+ const model_id = 'onnx-community/Kokoro-82M-v1.0-ONNX';
|
|
|
+
|
|
|
+ TTSModel = await KokoroTTS.from_pretrained(model_id, {
|
|
|
+ dtype: TTSEngineConfig.dtype, // Options: "fp32", "fp16", "q8", "q4", "q4f16"
|
|
|
+ device: !!navigator?.gpu ? 'webgpu' : 'wasm', // Detect WebGPU
|
|
|
+ progress_callback: (e) => {
|
|
|
+ TTSModelProgress = e;
|
|
|
+ console.log(e);
|
|
|
+ }
|
|
|
+ });
|
|
|
+
|
|
|
+ await getVoices();
|
|
|
+
|
|
|
+ // const rawAudio = await tts.generate(inputText, {
|
|
|
+ // // Use `tts.list_voices()` to list all available voices
|
|
|
+ // voice: voice
|
|
|
+ // });
|
|
|
+
|
|
|
+ // const blobUrl = URL.createObjectURL(await rawAudio.toBlob());
|
|
|
+ // const audio = new Audio(blobUrl);
|
|
|
+
|
|
|
+ // audio.play();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ };
|
|
|
</script>
|
|
|
|
|
|
<form
|
|
@@ -88,6 +160,8 @@
|
|
|
engine: STTEngine !== '' ? STTEngine : undefined
|
|
|
},
|
|
|
tts: {
|
|
|
+ engine: TTSEngine !== '' ? TTSEngine : undefined,
|
|
|
+ engineConfig: TTSEngineConfig,
|
|
|
playbackRate: playbackRate,
|
|
|
voice: voice !== '' ? voice : undefined,
|
|
|
defaultVoice: $config?.audio?.tts?.voice ?? '',
|
|
@@ -142,6 +216,39 @@
|
|
|
<div>
|
|
|
<div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
|
|
|
|
|
|
+ <div class=" py-0.5 flex w-full justify-between">
|
|
|
+ <div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
|
|
|
+ <div class="flex items-center relative">
|
|
|
+ <select
|
|
|
+ class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
|
|
+ bind:value={TTSEngine}
|
|
|
+ placeholder="Select an engine"
|
|
|
+ >
|
|
|
+ <option value="">{$i18n.t('Default')}</option>
|
|
|
+ <option value="browser-kokoro">{$i18n.t('Kokoro.js (Browser)')}</option>
|
|
|
+ </select>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ {#if TTSEngine === 'browser-kokoro'}
|
|
|
+ <div class=" py-0.5 flex w-full justify-between">
|
|
|
+ <div class=" self-center text-xs font-medium">{$i18n.t('Kokoro.js Dtype')}</div>
|
|
|
+ <div class="flex items-center relative">
|
|
|
+ <select
|
|
|
+ class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
|
|
+ bind:value={TTSEngineConfig.dtype}
|
|
|
+ placeholder="Select dtype"
|
|
|
+ >
|
|
|
+ <option value="" disabled selected>Select dtype</option>
|
|
|
+ <option value="fp32">fp32</option>
|
|
|
+ <option value="fp16">fp16</option>
|
|
|
+ <option value="q8">q8</option>
|
|
|
+ <option value="q4">q4</option>
|
|
|
+ </select>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ {/if}
|
|
|
+
|
|
|
<div class=" py-0.5 flex w-full justify-between">
|
|
|
<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>
|
|
|
|
|
@@ -178,7 +285,46 @@
|
|
|
|
|
|
<hr class=" dark:border-gray-850" />
|
|
|
|
|
|
- {#if $config.audio.tts.engine === ''}
|
|
|
+ {#if TTSEngine === 'browser-kokoro'}
|
|
|
+ {#if TTSModel}
|
|
|
+ <div>
|
|
|
+ <div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
|
|
|
+ <div class="flex w-full">
|
|
|
+ <div class="flex-1">
|
|
|
+ <input
|
|
|
+ list="voice-list"
|
|
|
+ class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
|
|
+ bind:value={voice}
|
|
|
+ placeholder="Select a voice"
|
|
|
+ />
|
|
|
+
|
|
|
+ <datalist id="voice-list">
|
|
|
+ {#each voices as voice}
|
|
|
+ <option value={voice.id}>{voice.name}</option>
|
|
|
+ {/each}
|
|
|
+ </datalist>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ {:else}
|
|
|
+ <div>
|
|
|
+ <div class=" mb-2.5 text-sm font-medium flex gap-2 items-center">
|
|
|
+ <Spinner className="size-4" />
|
|
|
+
|
|
|
+ <div class=" text-sm font-medium shimmer">
|
|
|
+ {$i18n.t('Loading Kokoro.js...')}
|
|
|
+ {TTSModelProgress && TTSModelProgress.status === 'progress'
|
|
|
+ ? `(${Math.round(TTSModelProgress.progress * 10) / 10}%)`
|
|
|
+ : ''}
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="text-xs text-gray-500">
|
|
|
+ {$i18n.t('Please do not close the settings page while loading the model.')}
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ {/if}
|
|
|
+ {:else if $config.audio.tts.engine === ''}
|
|
|
<div>
|
|
|
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
|
|
|
<div class="flex w-full">
|