9 ماه پیش · 9d5e3e2a91
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@@ -10,12 +10,12 @@ from fastapi import (
 
				     File,
			
 
				     Form,
			
 
				 )
			
 
				-
			
 
				 from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
			
 
				 
			
 
				 from fastapi.middleware.cors import CORSMiddleware
			
 
				 from pydantic import BaseModel
			
 
				 
			
 
				+from typing import List
			
 
				 import uuid
			
 
				 import requests
			
 
				 import hashlib
			
@@ -31,6 +31,7 @@ from utils.utils import (
 
				 )
			
 
				 from utils.misc import calculate_sha256
			
 
				 
			
 
				+
			
 
				 from config import (
			
 
				     SRC_LOG_LEVELS,
			
 
				     CACHE_DIR,
			
@@ -252,15 +253,15 @@ async def speech(request: Request, user=Depends(get_verified_user)):
 
				             )
			
 
				 
			
 
				     elif app.state.config.TTS_ENGINE == "elevenlabs":
			
 
				-
			
 
				         payload = None
			
 
				         try:
			
 
				             payload = json.loads(body.decode("utf-8"))
			
 
				         except Exception as e:
			
 
				             log.exception(e)
			
 
				-            pass
			
 
				+            raise HTTPException(status_code=400, detail="Invalid JSON payload")
			
 
				 
			
 
				-        url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}"
			
 
				+        voice_id = payload.get("voice", "")
			
 
				+        url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
			
 
				 
			
 
				         headers = {
			
 
				             "Accept": "audio/mpeg",
			
@@ -435,3 +436,69 @@ def transcribe(
 
				             status_code=status.HTTP_400_BAD_REQUEST,
			
 
				             detail=ERROR_MESSAGES.DEFAULT(e),
			
 
				         )
			
 
				+
			
 
				+
			
 
				+def get_available_models() -> List[dict]:
			
 
				+    if app.state.config.TTS_ENGINE == "openai":
			
 
				+        return [{"id": "tts-1"}, {"id": "tts-1-hd"}]
			
 
				+    elif app.state.config.TTS_ENGINE == "elevenlabs":
			
 
				+        headers = {
			
 
				+            "xi-api-key": app.state.config.TTS_API_KEY,
			
 
				+            "Content-Type": "application/json",
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.get(
			
 
				+                "https://api.elevenlabs.io/v1/models", headers=headers
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+            models = response.json()
			
 
				+            return [
			
 
				+                {"name": model["name"], "id": model["model_id"]} for model in models
			
 
				+            ]
			
 
				+        except requests.RequestException as e:
			
 
				+            log.error(f"Error fetching voices: {str(e)}")
			
 
				+    return []
			
 
				+
			
 
				+
			
 
				+@app.get("/models")
			
 
				+async def get_models(user=Depends(get_verified_user)):
			
 
				+    return {"models": get_available_models()}
			
 
				+
			
 
				+
			
 
				+def get_available_voices() -> List[dict]:
			
 
				+    if app.state.config.TTS_ENGINE == "openai":
			
 
				+        return [
			
 
				+            {"name": "alloy", "id": "alloy"},
			
 
				+            {"name": "echo", "id": "echo"},
			
 
				+            {"name": "fable", "id": "fable"},
			
 
				+            {"name": "onyx", "id": "onyx"},
			
 
				+            {"name": "nova", "id": "nova"},
			
 
				+            {"name": "shimmer", "id": "shimmer"},
			
 
				+        ]
			
 
				+    elif app.state.config.TTS_ENGINE == "elevenlabs":
			
 
				+        headers = {
			
 
				+            "xi-api-key": app.state.config.TTS_API_KEY,
			
 
				+            "Content-Type": "application/json",
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.get(
			
 
				+                "https://api.elevenlabs.io/v1/voices", headers=headers
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+            voices_data = response.json()
			
 
				+
			
 
				+            voices = []
			
 
				+            for voice in voices_data.get("voices", []):
			
 
				+                voices.append({"name": voice["name"], "id": voice["voice_id"]})
			
 
				+            return voices
			
 
				+        except requests.RequestException as e:
			
 
				+            log.error(f"Error fetching voices: {str(e)}")
			
 
				+
			
 
				+    return []
			
 
				+
			
 
				+
			
 
				+@app.get("/voices")
			
 
				+async def get_voices(user=Depends(get_verified_user)):
			
 
				+    return {"voices": get_available_voices()}
			
--- a/src/lib/apis/audio/index.ts
+++ b/src/lib/apis/audio/index.ts
@@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async (
 
				 
			
 
				 	return res;
			
 
				 };
			
 
				+
			
 
				+export const getModels = async (token: string = '') => {
			
 
				+	let error = null;
			
 
				+
			
 
				+	const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {
			
 
				+		method: 'GET',
			
 
				+		headers: {
			
 
				+			'Content-Type': 'application/json',
			
 
				+			Authorization: `Bearer ${token}`
			
 
				+		}
			
 
				+	})
			
 
				+		.then(async (res) => {
			
 
				+			if (!res.ok) throw await res.json();
			
 
				+			return res.json();
			
 
				+		})
			
 
				+		.catch((err) => {
			
 
				+			error = err.detail;
			
 
				+			console.log(err);
			
 
				+
			
 
				+			return null;
			
 
				+		});
			
 
				+
			
 
				+	if (error) {
			
 
				+		throw error;
			
 
				+	}
			
 
				+
			
 
				+	return res;
			
 
				+};
			
 
				+
			
 
				+export const getVoices = async (token: string = '') => {
			
 
				+	let error = null;
			
 
				+
			
 
				+	const res = await fetch(`${AUDIO_API_BASE_URL}/voices`, {
			
 
				+		method: 'GET',
			
 
				+		headers: {
			
 
				+			'Content-Type': 'application/json',
			
 
				+			Authorization: `Bearer ${token}`
			
 
				+		}
			
 
				+	})
			
 
				+		.then(async (res) => {
			
 
				+			if (!res.ok) throw await res.json();
			
 
				+			return res.json();
			
 
				+		})
			
 
				+		.catch((err) => {
			
 
				+			error = err.detail;
			
 
				+			console.log(err);
			
 
				+
			
 
				+			return null;
			
 
				+		});
			
 
				+
			
 
				+	if (error) {
			
 
				+		throw error;
			
 
				+	}
			
 
				+
			
 
				+	return res;
			
 
				+};
			
--- a/src/lib/components/admin/Settings/Audio.svelte
+++ b/src/lib/components/admin/Settings/Audio.svelte
@@ -1,12 +1,18 @@
 
				 <script lang="ts">
			
 
				-	import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
			
 
				-	import { user, settings, config } from '$lib/stores';
			
 
				-	import { createEventDispatcher, onMount, getContext } from 'svelte';
			
 
				 	import { toast } from 'svelte-sonner';
			
 
				-	import Switch from '$lib/components/common/Switch.svelte';
			
 
				+	import { createEventDispatcher, onMount, getContext } from 'svelte';
			
 
				+	const dispatch = createEventDispatcher();
			
 
				+
			
 
				 	import { getBackendConfig } from '$lib/apis';
			
 
				+	import {
			
 
				+		getAudioConfig,
			
 
				+		updateAudioConfig,
			
 
				+		getModels as _getModels,
			
 
				+		getVoices as _getVoices
			
 
				+	} from '$lib/apis/audio';
			
 
				+	import { user, settings, config } from '$lib/stores';
			
 
				+
			
 
				 	import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
			
 
				-	const dispatch = createEventDispatcher();
			
 
				 
			
 
				 	const i18n = getContext('i18n');
			
 
				 
			
@@ -30,30 +36,41 @@
 
				 	let models = [];
			
 
				 	let nonLocalVoices = false;
			
 
				 
			
 
				-	const getOpenAIVoices = () => {
			
 
				-		voices = [
			
 
				-			{ name: 'alloy' },
			
 
				-			{ name: 'echo' },
			
 
				-			{ name: 'fable' },
			
 
				-			{ name: 'onyx' },
			
 
				-			{ name: 'nova' },
			
 
				-			{ name: 'shimmer' }
			
 
				-		];
			
 
				-	};
			
 
				+	const getModels = async () => {
			
 
				+		if (TTS_ENGINE === '') {
			
 
				+			models = [];
			
 
				+		} else {
			
 
				+			const res = await _getModels(localStorage.token).catch((e) => {
			
 
				+				toast.error(e);
			
 
				+			});
			
 
				 
			
 
				-	const getOpenAIModels = () => {
			
 
				-		models = [{ name: 'tts-1' }, { name: 'tts-1-hd' }];
			
 
				+			if (res) {
			
 
				+				console.log(res);
			
 
				+				models = res.models;
			
 
				+			}
			
 
				+		}
			
 
				 	};
			
 
				 
			
 
				-	const getWebAPIVoices = () => {
			
 
				-		const getVoicesLoop = setInterval(async () => {
			
 
				-			voices = await speechSynthesis.getVoices();
			
 
				+	const getVoices = async () => {
			
 
				+		if (TTS_ENGINE === '') {
			
 
				+			const getVoicesLoop = setInterval(async () => {
			
 
				+				voices = await speechSynthesis.getVoices();
			
 
				+
			
 
				+				// do your loop
			
 
				+				if (voices.length > 0) {
			
 
				+					clearInterval(getVoicesLoop);
			
 
				+				}
			
 
				+			}, 100);
			
 
				+		} else {
			
 
				+			const res = await _getVoices(localStorage.token).catch((e) => {
			
 
				+				toast.error(e);
			
 
				+			});
			
 
				 
			
 
				-			// do your loop
			
 
				-			if (voices.length > 0) {
			
 
				-				clearInterval(getVoicesLoop);
			
 
				+			if (res) {
			
 
				+				console.log(res);
			
 
				+				voices = res.voices;
			
 
				 			}
			
 
				-		}, 100);
			
 
				+		}
			
 
				 	};
			
 
				 
			
 
				 	const updateConfigHandler = async () => {
			
@@ -101,12 +118,8 @@
 
				 			STT_MODEL = res.stt.MODEL;
			
 
				 		}
			
 
				 
			
 
				-		if (TTS_ENGINE === 'openai') {
			
 
				-			getOpenAIVoices();
			
 
				-			getOpenAIModels();
			
 
				-		} else {
			
 
				-			getWebAPIVoices();
			
 
				-		}
			
 
				+		await getVoices();
			
 
				+		await getModels();
			
 
				 	});
			
 
				 </script>
			
 
				 
			
@@ -185,13 +198,15 @@
 
				 							class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
			
 
				 							bind:value={TTS_ENGINE}
			
 
				 							placeholder="Select a mode"
			
 
				-							on:change={(e) => {
			
 
				+							on:change={async (e) => {
			
 
				+								await updateConfigHandler();
			
 
				+								await getVoices();
			
 
				+								await getModels();
			
 
				+
			
 
				 								if (e.target.value === 'openai') {
			
 
				-									getOpenAIVoices();
			
 
				 									TTS_VOICE = 'alloy';
			
 
				 									TTS_MODEL = 'tts-1';
			
 
				 								} else {
			
 
				-									getWebAPIVoices();
			
 
				 									TTS_VOICE = '';
			
 
				 									TTS_MODEL = '';
			
 
				 								}
			
@@ -268,7 +283,7 @@
 
				 
			
 
				 									<datalist id="voice-list">
			
 
				 										{#each voices as voice}
			
 
				-											<option value={voice.name} />
			
 
				+											<option value={voice.id}>{voice.name}</option>
			
 
				 										{/each}
			
 
				 									</datalist>
			
 
				 								</div>
			
@@ -279,15 +294,15 @@
 
				 							<div class="flex w-full">
			
 
				 								<div class="flex-1">
			
 
				 									<input
			
 
				-										list="model-list"
			
 
				+										list="tts-model-list"
			
 
				 										class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
			
 
				 										bind:value={TTS_MODEL}
			
 
				 										placeholder="Select a model"
			
 
				 									/>
			
 
				 
			
 
				-									<datalist id="model-list">
			
 
				+									<datalist id="tts-model-list">
			
 
				 										{#each models as model}
			
 
				-											<option value={model.name} />
			
 
				+											<option value={model.id} />
			
 
				 										{/each}
			
 
				 									</datalist>
			
 
				 								</div>
			
@@ -309,7 +324,7 @@
 
				 
			
 
				 									<datalist id="voice-list">
			
 
				 										{#each voices as voice}
			
 
				-											<option value={voice.name} />
			
 
				+											<option value={voice.id}>{voice.name}</option>
			
 
				 										{/each}
			
 
				 									</datalist>
			
 
				 								</div>
			
@@ -320,15 +335,15 @@
 
				 							<div class="flex w-full">
			
 
				 								<div class="flex-1">
			
 
				 									<input
			
 
				-										list="model-list"
			
 
				+										list="tts-model-list"
			
 
				 										class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
			
 
				 										bind:value={TTS_MODEL}
			
 
				 										placeholder="Select a model"
			
 
				 									/>
			
 
				 
			
 
				-									<datalist id="model-list">
			
 
				+									<datalist id="tts-model-list">
			
 
				 										{#each models as model}
			
 
				-											<option value={model.name} />
			
 
				+											<option value={model.id} />
			
 
				 										{/each}
			
 
				 									</datalist>
			
 
				 								</div>
			
--- a/src/lib/components/chat/Settings/Audio.svelte
+++ b/src/lib/components/chat/Settings/Audio.svelte
@@ -1,7 +1,10 @@
 
				 <script lang="ts">
			
 
				-	import { user, settings, config } from '$lib/stores';
			
 
				-	import { createEventDispatcher, onMount, getContext } from 'svelte';
			
 
				 	import { toast } from 'svelte-sonner';
			
 
				+	import { createEventDispatcher, onMount, getContext } from 'svelte';
			
 
				+
			
 
				+	import { user, settings, config } from '$lib/stores';
			
 
				+	import { getVoices as _getVoices } from '$lib/apis/audio';
			
 
				+
			
 
				 	import Switch from '$lib/components/common/Switch.svelte';
			
 
				 	const dispatch = createEventDispatcher();
			
 
				 
			
@@ -20,26 +23,26 @@
 
				 	let voices = [];
			
 
				 	let voice = '';
			
 
				 
			
 
				-	const getOpenAIVoices = () => {
			
 
				-		voices = [
			
 
				-			{ name: 'alloy' },
			
 
				-			{ name: 'echo' },
			
 
				-			{ name: 'fable' },
			
 
				-			{ name: 'onyx' },
			
 
				-			{ name: 'nova' },
			
 
				-			{ name: 'shimmer' }
			
 
				-		];
			
 
				-	};
			
 
				+	const getVoices = async () => {
			
 
				+		if ($config.audio.tts.engine === '') {
			
 
				+			const getVoicesLoop = setInterval(async () => {
			
 
				+				voices = await speechSynthesis.getVoices();
			
 
				 
			
 
				-	const getWebAPIVoices = () => {
			
 
				-		const getVoicesLoop = setInterval(async () => {
			
 
				-			voices = await speechSynthesis.getVoices();
			
 
				+				// do your loop
			
 
				+				if (voices.length > 0) {
			
 
				+					clearInterval(getVoicesLoop);
			
 
				+				}
			
 
				+			}, 100);
			
 
				+		} else {
			
 
				+			const res = await _getVoices(localStorage.token).catch((e) => {
			
 
				+				toast.error(e);
			
 
				+			});
			
 
				 
			
 
				-			// do your loop
			
 
				-			if (voices.length > 0) {
			
 
				-				clearInterval(getVoicesLoop);
			
 
				+			if (res) {
			
 
				+				console.log(res);
			
 
				+				voices = res.voices;
			
 
				 			}
			
 
				-		}, 100);
			
 
				+		}
			
 
				 	};
			
 
				 
			
 
				 	const toggleResponseAutoPlayback = async () => {
			
@@ -61,11 +64,7 @@
 
				 		voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
			
 
				 		nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false;
			
 
				 
			
 
				-		if ($config.audio.tts.engine === 'openai') {
			
 
				-			getOpenAIVoices();
			
 
				-		} else {
			
 
				-			getWebAPIVoices();
			
 
				-		}
			
 
				+		await getVoices();
			
 
				 	});
			
 
				 </script>
			
 
				 
			
@@ -195,7 +194,7 @@
 
				 
			
 
				 						<datalist id="voice-list">
			
 
				 							{#each voices as voice}
			
 
				-								<option value={voice.name} />
			
 
				+								<option value={voice.id}>{voice.name}</option>
			
 
				 							{/each}
			
 
				 						</datalist>
			
 
				 					</div>