123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388 |
- <script lang="ts">
- import { toast } from 'svelte-sonner';
- import { createEventDispatcher, onMount, getContext } from 'svelte';
- import { KokoroTTS } from 'kokoro-js';
- import { user, settings, config } from '$lib/stores';
- import { getVoices as _getVoices } from '$lib/apis/audio';
- import Switch from '$lib/components/common/Switch.svelte';
- import { round } from '@huggingface/transformers';
- import Spinner from '$lib/components/common/Spinner.svelte';
- const dispatch = createEventDispatcher();
- const i18n = getContext('i18n');
- export let saveSettings: Function;
- // Audio
- let conversationMode = false;
- let speechAutoSend = false;
- let responseAutoPlayback = false;
- let nonLocalVoices = false;
- let STTEngine = '';
- let TTSEngine = '';
- let TTSEngineConfig = {};
- let TTSModel = null;
- let TTSModelProgress = null;
- let TTSModelLoading = false;
- let voices = [];
- let voice = '';
- // Audio speed control
- let playbackRate = 1;
- const speedOptions = [2, 1.75, 1.5, 1.25, 1, 0.75, 0.5];
- const getVoices = async () => {
- if (TTSEngine === 'browser-kokoro') {
- if (!TTSModel) {
- await loadKokoro();
- }
- voices = Object.entries(TTSModel.voices).map(([key, value]) => {
- return {
- id: key,
- name: value.name,
- localService: false
- };
- });
- } else {
- if ($config.audio.tts.engine === '') {
- const getVoicesLoop = setInterval(async () => {
- voices = await speechSynthesis.getVoices();
- // do your loop
- if (voices.length > 0) {
- clearInterval(getVoicesLoop);
- }
- }, 100);
- } else {
- const res = await _getVoices(localStorage.token).catch((e) => {
- toast.error(`${e}`);
- });
- if (res) {
- console.log(res);
- voices = res.voices;
- }
- }
- }
- };
- const toggleResponseAutoPlayback = async () => {
- responseAutoPlayback = !responseAutoPlayback;
- saveSettings({ responseAutoPlayback: responseAutoPlayback });
- };
- const toggleSpeechAutoSend = async () => {
- speechAutoSend = !speechAutoSend;
- saveSettings({ speechAutoSend: speechAutoSend });
- };
- onMount(async () => {
- playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
- conversationMode = $settings.conversationMode ?? false;
- speechAutoSend = $settings.speechAutoSend ?? false;
- responseAutoPlayback = $settings.responseAutoPlayback ?? false;
- STTEngine = $settings?.audio?.stt?.engine ?? '';
- TTSEngine = $settings?.audio?.tts?.engine ?? '';
- TTSEngineConfig = $settings?.audio?.tts?.engineConfig ?? {};
- if ($settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice) {
- voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
- } else {
- voice = $config.audio.tts.voice ?? '';
- }
- nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false;
- await getVoices();
- });
- $: if (TTSEngine && TTSEngineConfig) {
- onTTSEngineChange();
- }
- const onTTSEngineChange = async () => {
- if (TTSEngine === 'browser-kokoro') {
- await loadKokoro();
- }
- };
- const loadKokoro = async () => {
- if (TTSEngine === 'browser-kokoro') {
- voices = [];
- if (TTSEngineConfig?.dtype) {
- TTSModel = null;
- TTSModelProgress = null;
- TTSModelLoading = true;
- const model_id = 'onnx-community/Kokoro-82M-v1.0-ONNX';
- TTSModel = await KokoroTTS.from_pretrained(model_id, {
- dtype: TTSEngineConfig.dtype, // Options: "fp32", "fp16", "q8", "q4", "q4f16"
- device: !!navigator?.gpu ? 'webgpu' : 'wasm', // Detect WebGPU
- progress_callback: (e) => {
- TTSModelProgress = e;
- console.log(e);
- }
- });
- await getVoices();
- // const rawAudio = await tts.generate(inputText, {
- // // Use `tts.list_voices()` to list all available voices
- // voice: voice
- // });
- // const blobUrl = URL.createObjectURL(await rawAudio.toBlob());
- // const audio = new Audio(blobUrl);
- // audio.play();
- }
- }
- };
- </script>
- <form
- class="flex flex-col h-full justify-between space-y-3 text-sm"
- on:submit|preventDefault={async () => {
- saveSettings({
- audio: {
- stt: {
- engine: STTEngine !== '' ? STTEngine : undefined
- },
- tts: {
- engine: TTSEngine !== '' ? TTSEngine : undefined,
- engineConfig: TTSEngineConfig,
- playbackRate: playbackRate,
- voice: voice !== '' ? voice : undefined,
- defaultVoice: $config?.audio?.tts?.voice ?? '',
- nonLocalVoices: $config.audio.tts.engine === '' ? nonLocalVoices : undefined
- }
- }
- });
- dispatch('save');
- }}
- >
- <div class=" space-y-3 overflow-y-scroll max-h-[28rem] lg:max-h-full">
- <div>
- <div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>
- {#if $config.audio.stt.engine !== 'web'}
- <div class=" py-0.5 flex w-full justify-between">
- <div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
- <div class="flex items-center relative">
- <select
- class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
- bind:value={STTEngine}
- placeholder="Select an engine"
- >
- <option value="">{$i18n.t('Default')}</option>
- <option value="web">{$i18n.t('Web API')}</option>
- </select>
- </div>
- </div>
- {/if}
- <div class=" py-0.5 flex w-full justify-between">
- <div class=" self-center text-xs font-medium">
- {$i18n.t('Instant Auto-Send After Voice Transcription')}
- </div>
- <button
- class="p-1 px-3 text-xs flex rounded-sm transition"
- on:click={() => {
- toggleSpeechAutoSend();
- }}
- type="button"
- >
- {#if speechAutoSend === true}
- <span class="ml-2 self-center">{$i18n.t('On')}</span>
- {:else}
- <span class="ml-2 self-center">{$i18n.t('Off')}</span>
- {/if}
- </button>
- </div>
- </div>
- <div>
- <div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
- <div class=" py-0.5 flex w-full justify-between">
- <div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
- <div class="flex items-center relative">
- <select
- class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
- bind:value={TTSEngine}
- placeholder="Select an engine"
- >
- <option value="">{$i18n.t('Default')}</option>
- <option value="browser-kokoro">{$i18n.t('Kokoro.js (Browser)')}</option>
- </select>
- </div>
- </div>
- {#if TTSEngine === 'browser-kokoro'}
- <div class=" py-0.5 flex w-full justify-between">
- <div class=" self-center text-xs font-medium">{$i18n.t('Kokoro.js Dtype')}</div>
- <div class="flex items-center relative">
- <select
- class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
- bind:value={TTSEngineConfig.dtype}
- placeholder="Select dtype"
- >
- <option value="" disabled selected>Select dtype</option>
- <option value="fp32">fp32</option>
- <option value="fp16">fp16</option>
- <option value="q8">q8</option>
- <option value="q4">q4</option>
- </select>
- </div>
- </div>
- {/if}
- <div class=" py-0.5 flex w-full justify-between">
- <div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>
- <button
- class="p-1 px-3 text-xs flex rounded-sm transition"
- on:click={() => {
- toggleResponseAutoPlayback();
- }}
- type="button"
- >
- {#if responseAutoPlayback === true}
- <span class="ml-2 self-center">{$i18n.t('On')}</span>
- {:else}
- <span class="ml-2 self-center">{$i18n.t('Off')}</span>
- {/if}
- </button>
- </div>
- <div class=" py-0.5 flex w-full justify-between">
- <div class=" self-center text-xs font-medium">{$i18n.t('Speech Playback Speed')}</div>
- <div class="flex items-center relative">
- <select
- class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
- bind:value={playbackRate}
- >
- {#each speedOptions as option}
- <option value={option} selected={playbackRate === option}>{option}x</option>
- {/each}
- </select>
- </div>
- </div>
- </div>
- <hr class=" border-gray-100 dark:border-gray-850" />
- {#if TTSEngine === 'browser-kokoro'}
- {#if TTSModel}
- <div>
- <div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
- <div class="flex w-full">
- <div class="flex-1">
- <input
- list="voice-list"
- class="w-full rounded-lg py-2 px-4 text-sm bg-white dark:text-gray-300 dark:bg-gray-850 outline-hidden"
- bind:value={voice}
- placeholder="Select a voice"
- />
- <datalist id="voice-list">
- {#each voices as voice}
- <option value={voice.id}>{voice.name}</option>
- {/each}
- </datalist>
- </div>
- </div>
- </div>
- {:else}
- <div>
- <div class=" mb-2.5 text-sm font-medium flex gap-2 items-center">
- <Spinner className="size-4" />
- <div class=" text-sm font-medium shimmer">
- {$i18n.t('Loading Kokoro.js...')}
- {TTSModelProgress && TTSModelProgress.status === 'progress'
- ? `(${Math.round(TTSModelProgress.progress * 10) / 10}%)`
- : ''}
- </div>
- </div>
- <div class="text-xs text-gray-500">
- {$i18n.t('Please do not close the settings page while loading the model.')}
- </div>
- </div>
- {/if}
- {:else if $config.audio.tts.engine === ''}
- <div>
- <div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
- <div class="flex w-full">
- <div class="flex-1">
- <select
- class="w-full rounded-lg py-2 px-4 text-sm bg-white dark:text-gray-300 dark:bg-gray-850 outline-hidden"
- bind:value={voice}
- >
- <option value="" selected={voice !== ''}>{$i18n.t('Default')}</option>
- {#each voices.filter((v) => nonLocalVoices || v.localService === true) as _voice}
- <option
- value={_voice.name}
- class="bg-gray-100 dark:bg-gray-700"
- selected={voice === _voice.name}>{_voice.name}</option
- >
- {/each}
- </select>
- </div>
- </div>
- <div class="flex items-center justify-between my-1.5">
- <div class="text-xs">
- {$i18n.t('Allow non-local voices')}
- </div>
- <div class="mt-1">
- <Switch bind:state={nonLocalVoices} />
- </div>
- </div>
- </div>
- {:else if $config.audio.tts.engine !== ''}
- <div>
- <div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
- <div class="flex w-full">
- <div class="flex-1">
- <input
- list="voice-list"
- class="w-full rounded-lg py-2 px-4 text-sm bg-white dark:text-gray-300 dark:bg-gray-850 outline-hidden"
- bind:value={voice}
- placeholder="Select a voice"
- />
- <datalist id="voice-list">
- {#each voices as voice}
- <option value={voice.id}>{voice.name}</option>
- {/each}
- </datalist>
- </div>
- </div>
- </div>
- {/if}
- </div>
- <div class="flex justify-end text-sm font-medium">
- <button
- class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
- type="submit"
- >
- {$i18n.t('Save')}
- </button>
- </div>
- </form>
|