Audio.svelte 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. <script lang="ts">
  2. import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
  3. import { user, settings, config } from '$lib/stores';
  4. import { createEventDispatcher, onMount, getContext } from 'svelte';
  5. import { toast } from 'svelte-sonner';
  6. import Switch from '$lib/components/common/Switch.svelte';
  7. import { getBackendConfig } from '$lib/apis';
  8. import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
  9. const dispatch = createEventDispatcher();
  10. const i18n = getContext('i18n');
  11. export let saveHandler: Function;
  12. // Audio
  13. let TTS_OPENAI_API_BASE_URL = '';
  14. let TTS_OPENAI_API_KEY = '';
  15. let TTS_API_KEY = '';
  16. let TTS_ENGINE = '';
  17. let TTS_MODEL = '';
  18. let TTS_VOICE = '';
  19. let STT_OPENAI_API_BASE_URL = '';
  20. let STT_OPENAI_API_KEY = '';
  21. let STT_ENGINE = '';
  22. let STT_MODEL = '';
  23. let voices = [];
  24. let models = [];
  25. let nonLocalVoices = false;
  26. const getOpenAIVoices = () => {
  27. voices = [
  28. { name: 'alloy' },
  29. { name: 'echo' },
  30. { name: 'fable' },
  31. { name: 'onyx' },
  32. { name: 'nova' },
  33. { name: 'shimmer' }
  34. ];
  35. };
  36. const getOpenAIModels = () => {
  37. models = [{ name: 'tts-1' }, { name: 'tts-1-hd' }];
  38. };
  39. const getWebAPIVoices = () => {
  40. const getVoicesLoop = setInterval(async () => {
  41. voices = await speechSynthesis.getVoices();
  42. // do your loop
  43. if (voices.length > 0) {
  44. clearInterval(getVoicesLoop);
  45. }
  46. }, 100);
  47. };
  48. // Fetch available ElevenLabs voices
  49. const getVoices = async () => {
  50. const response = await fetch('/voices', {
  51. method: 'GET',
  52. headers: {
  53. 'Authorization': `Bearer ${localStorage.token}`
  54. }
  55. });
  56. if (response.ok) {
  57. const data = await response.json();
  58. voices = data.voices.map(name => ({ name })); // Update voices array with fetched names
  59. } else {
  60. toast.error('Failed to fetch voices');
  61. }
  62. };
  63. const updateConfigHandler = async () => {
  64. const res = await updateAudioConfig(localStorage.token, {
  65. tts: {
  66. OPENAI_API_BASE_URL: TTS_OPENAI_API_BASE_URL,
  67. OPENAI_API_KEY: TTS_OPENAI_API_KEY,
  68. API_KEY: TTS_API_KEY,
  69. ENGINE: TTS_ENGINE,
  70. MODEL: TTS_MODEL,
  71. VOICE: TTS_VOICE
  72. },
  73. stt: {
  74. OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
  75. OPENAI_API_KEY: STT_OPENAI_API_KEY,
  76. ENGINE: STT_ENGINE,
  77. MODEL: STT_MODEL
  78. }
  79. });
  80. if (res) {
  81. toast.success($i18n.t('Audio settings updated successfully'));
  82. config.set(await getBackendConfig());
  83. }
  84. };
  85. onMount(async () => {
  86. // Fetch available voices on component mount
  87. await getVoices();
  88. const res = await getAudioConfig(localStorage.token);
  89. if (res) {
  90. console.log(res);
  91. TTS_OPENAI_API_BASE_URL = res.tts.OPENAI_API_BASE_URL;
  92. TTS_OPENAI_API_KEY = res.tts.OPENAI_API_KEY;
  93. TTS_API_KEY = res.tts.API_KEY;
  94. TTS_ENGINE = res.tts.ENGINE;
  95. TTS_MODEL = res.tts.MODEL;
  96. TTS_VOICE = res.tts.VOICE;
  97. STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
  98. STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
  99. STT_ENGINE = res.stt.ENGINE;
  100. STT_MODEL = res.stt.MODEL;
  101. }
  102. if (TTS_ENGINE === 'openai') {
  103. getOpenAIVoices();
  104. getOpenAIModels();
  105. } else if(TTS_ENGINE === 'elevenlabs') {
  106. await getVoices(); //Get voices if TTS_ENGINE is ElevenLabs
  107. } else {
  108. getWebAPIVoices();
  109. }
  110. });
  111. </script>
  112. <form
  113. class="flex flex-col h-full justify-between space-y-3 text-sm"
  114. on:submit|preventDefault={async () => {
  115. await updateConfigHandler();
  116. dispatch('save');
  117. }}
  118. >
  119. <div class=" space-y-3 overflow-y-scroll scrollbar-hidden h-full">
  120. <div class="flex flex-col gap-3">
  121. <div>
  122. <div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>
  123. <div class=" py-0.5 flex w-full justify-between">
  124. <div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
  125. <div class="flex items-center relative">
  126. <select
  127. class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
  128. bind:value={STT_ENGINE}
  129. placeholder="Select an engine"
  130. >
  131. <option value="">{$i18n.t('Whisper (Local)')}</option>
  132. <option value="openai">OpenAI</option>
  133. <option value="web">{$i18n.t('Web API')}</option>
  134. </select>
  135. </div>
  136. </div>
  137. {#if STT_ENGINE === 'openai'}
  138. <div>
  139. <div class="mt-1 flex gap-2 mb-1">
  140. <input
  141. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  142. placeholder={$i18n.t('API Base URL')}
  143. bind:value={STT_OPENAI_API_BASE_URL}
  144. required
  145. />
  146. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={STT_OPENAI_API_KEY} />
  147. </div>
  148. </div>
  149. <hr class=" dark:border-gray-850 my-2" />
  150. <div>
  151. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('STT Model')}</div>
  152. <div class="flex w-full">
  153. <div class="flex-1">
  154. <input
  155. list="model-list"
  156. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  157. bind:value={STT_MODEL}
  158. placeholder="Select a model"
  159. />
  160. <datalist id="model-list">
  161. <option value="whisper-1" />
  162. </datalist>
  163. </div>
  164. </div>
  165. </div>
  166. {/if}
  167. </div>
  168. <hr class=" dark:border-gray-800" />
  169. <div>
  170. <div class=" mb-1 text-sm font-medium">{$i18n.t('TTS Settings')}</div>
  171. <div class=" py-0.5 flex w-full justify-between">
  172. <div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
  173. <div class="flex items-center relative">
  174. <select
  175. class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
  176. bind:value={TTS_ENGINE}
  177. placeholder="Select a mode"
  178. on:change={async (e) => {
  179. if (e.target.value === 'openai') {
  180. getOpenAIVoices();
  181. TTS_VOICE = 'alloy';
  182. TTS_MODEL = 'tts-1';
  183. } else if(e.target.value === 'elevenlabs') {
  184. await getVoices();
  185. } else {
  186. getWebAPIVoices();
  187. TTS_VOICE = '';
  188. TTS_MODEL = '';
  189. }
  190. }}
  191. >
  192. <option value="">{$i18n.t('Web API')}</option>
  193. <option value="openai">{$i18n.t('OpenAI')}</option>
  194. <option value="elevenlabs">{$i18n.t('ElevenLabs')}</option>
  195. </select>
  196. </div>
  197. </div>
  198. {#if TTS_ENGINE === 'openai'}
  199. <div>
  200. <div class="mt-1 flex gap-2 mb-1">
  201. <input
  202. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  203. placeholder={$i18n.t('API Base URL')}
  204. bind:value={TTS_OPENAI_API_BASE_URL}
  205. required
  206. />
  207. <SensitiveInput placeholder={$i18n.t('API Key')} bind:value={TTS_OPENAI_API_KEY} />
  208. </div>
  209. </div>
  210. {:else if TTS_ENGINE === 'elevenlabs'}
  211. <div>
  212. <div class="mt-1 flex gap-2 mb-1">
  213. <input
  214. class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  215. placeholder={$i18n.t('API Key')}
  216. bind:value={TTS_API_KEY}
  217. required
  218. />
  219. </div>
  220. </div>
  221. {/if}
  222. <hr class=" dark:border-gray-850 my-2" />
  223. {#if TTS_ENGINE !== ''}
  224. <div>
  225. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  226. <div class="flex w-full">
  227. <div class="flex-1">
  228. <select
  229. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  230. bind:value={TTS_VOICE}
  231. >
  232. <option value="" selected={TTS_VOICE !== ''}>{$i18n.t('Default')}</option>
  233. {#each voices as voice}
  234. <option
  235. value={voice.name}
  236. class="bg-gray-100 dark:bg-gray-700"
  237. selected={TTS_VOICE === voice.name}>{voice.name}</option
  238. >
  239. {/each}
  240. </select>
  241. </div>
  242. </div>
  243. </div>
  244. {:else if TTS_ENGINE === 'openai'}
  245. <div class=" flex gap-2">
  246. <div class="w-full">
  247. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  248. <div class="flex w-full">
  249. <div class="flex-1">
  250. <input
  251. list="voice-list"
  252. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  253. bind:value={TTS_VOICE}
  254. placeholder="Select a voice"
  255. />
  256. <datalist id="voice-list">
  257. {#each voices as voice}
  258. <option value={voice.name} />
  259. {/each}
  260. </datalist>
  261. </div>
  262. </div>
  263. </div>
  264. <div class="w-full">
  265. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
  266. <div class="flex w-full">
  267. <div class="flex-1">
  268. <input
  269. list="model-list"
  270. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  271. bind:value={TTS_MODEL}
  272. placeholder="Select a model"
  273. />
  274. <datalist id="model-list">
  275. {#each models as model}
  276. <option value={model.name} />
  277. {/each}
  278. </datalist>
  279. </div>
  280. </div>
  281. </div>
  282. </div>
  283. {:else if TTS_ENGINE === 'elevenlabs'}
  284. <div class=" flex gap-2">
  285. <div class="w-full">
  286. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
  287. <div class="flex w-full">
  288. <div class="flex-1">
  289. <input
  290. list="voice-list"
  291. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  292. bind:value={TTS_VOICE}
  293. placeholder="Select a voice"
  294. />
  295. <datalist id="voice-list">
  296. {#each voices as voice}
  297. <option value={voice.name} />
  298. {/each}
  299. </datalist>
  300. </div>
  301. </div>
  302. </div>
  303. <div class="w-full">
  304. <div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
  305. <div class="flex w-full">
  306. <div class="flex-1">
  307. <input
  308. list="model-list"
  309. class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
  310. bind:value={TTS_MODEL}
  311. placeholder="Select a model"
  312. />
  313. <datalist id="model-list">
  314. {#each models as model}
  315. <option value={model.name} />
  316. {/each}
  317. </datalist>
  318. </div>
  319. </div>
  320. </div>
  321. </div>
  322. {/if}
  323. </div>
  324. </div>
  325. </div>
  326. <div class="flex justify-end text-sm font-medium">
  327. <button
  328. class=" px-4 py-2 bg-emerald-700 hover:bg-emerald-800 text-gray-100 transition rounded-lg"
  329. type="submit"
  330. >
  331. {$i18n.t('Save')}
  332. </button>
  333. </div>
  334. </form>