10 tháng trước cách đây · e664a429a1
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@@ -43,6 +43,7 @@ from config import (
 
				     AUDIO_STT_OPENAI_API_KEY,
			
 
				     AUDIO_TTS_OPENAI_API_BASE_URL,
			
 
				     AUDIO_TTS_OPENAI_API_KEY,
			
 
				+    AUDIO_TTS_API_KEY,
			
 
				     AUDIO_STT_ENGINE,
			
 
				     AUDIO_STT_MODEL,
			
 
				     AUDIO_TTS_ENGINE,
			
@@ -75,6 +76,7 @@ app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
 
				 app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
			
 
				 app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
			
 
				 app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
			
 
				+app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
			
 
				 
			
 
				 # setting device type for whisper model
			
 
				 whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
			
@@ -87,6 +89,7 @@ SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)
 
				 class TTSConfigForm(BaseModel):
			
 
				     OPENAI_API_BASE_URL: str
			
 
				     OPENAI_API_KEY: str
			
 
				+    API_KEY: str
			
 
				     ENGINE: str
			
 
				     MODEL: str
			
 
				     VOICE: str
			
@@ -137,6 +140,7 @@ async def get_audio_config(user=Depends(get_admin_user)):
 
				         "tts": {
			
 
				             "OPENAI_API_BASE_URL": app.state.config.TTS_OPENAI_API_BASE_URL,
			
 
				             "OPENAI_API_KEY": app.state.config.TTS_OPENAI_API_KEY,
			
 
				+            "API_KEY": app.state.config.TTS_API_KEY,
			
 
				             "ENGINE": app.state.config.TTS_ENGINE,
			
 
				             "MODEL": app.state.config.TTS_MODEL,
			
 
				             "VOICE": app.state.config.TTS_VOICE,
			
@@ -156,6 +160,7 @@ async def update_audio_config(
 
				 ):
			
 
				     app.state.config.TTS_OPENAI_API_BASE_URL = form_data.tts.OPENAI_API_BASE_URL
			
 
				     app.state.config.TTS_OPENAI_API_KEY = form_data.tts.OPENAI_API_KEY
			
 
				+    app.state.config.TTS_API_KEY = form_data.tts.API_KEY
			
 
				     app.state.config.TTS_ENGINE = form_data.tts.ENGINE
			
 
				     app.state.config.TTS_MODEL = form_data.tts.MODEL
			
 
				     app.state.config.TTS_VOICE = form_data.tts.VOICE
			
@@ -169,6 +174,7 @@ async def update_audio_config(
 
				         "tts": {
			
 
				             "OPENAI_API_BASE_URL": app.state.config.TTS_OPENAI_API_BASE_URL,
			
 
				             "OPENAI_API_KEY": app.state.config.TTS_OPENAI_API_KEY,
			
 
				+            "API_KEY": app.state.config.TTS_API_KEY,
			
 
				             "ENGINE": app.state.config.TTS_ENGINE,
			
 
				             "MODEL": app.state.config.TTS_MODEL,
			
 
				             "VOICE": app.state.config.TTS_VOICE,
			
@@ -194,55 +200,111 @@ async def speech(request: Request, user=Depends(get_verified_user)):
 
				     if file_path.is_file():
			
 
				         return FileResponse(file_path)
			
 
				 
			
 
				-    headers = {}
			
 
				-    headers["Authorization"] = f"Bearer {app.state.config.TTS_OPENAI_API_KEY}"
			
 
				-    headers["Content-Type"] = "application/json"
			
 
				-
			
 
				-    try:
			
 
				-        body = body.decode("utf-8")
			
 
				-        body = json.loads(body)
			
 
				-        body["model"] = app.state.config.TTS_MODEL
			
 
				-        body = json.dumps(body).encode("utf-8")
			
 
				-    except Exception as e:
			
 
				-        pass
			
 
				-
			
 
				-    r = None
			
 
				-    try:
			
 
				-        r = requests.post(
			
 
				-            url=f"{app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech",
			
 
				-            data=body,
			
 
				-            headers=headers,
			
 
				-            stream=True,
			
 
				-        )
			
 
				-
			
 
				-        r.raise_for_status()
			
 
				-
			
 
				-        # Save the streaming content to a file
			
 
				-        with open(file_path, "wb") as f:
			
 
				-            for chunk in r.iter_content(chunk_size=8192):
			
 
				-                f.write(chunk)
			
 
				-
			
 
				-        with open(file_body_path, "w") as f:
			
 
				-            json.dump(json.loads(body.decode("utf-8")), f)
			
 
				-
			
 
				-        # Return the saved file
			
 
				-        return FileResponse(file_path)
			
 
				+    if app.state.config.TTS_ENGINE == "openai":
			
 
				+        headers = {}
			
 
				+        headers["Authorization"] = f"Bearer {app.state.config.TTS_OPENAI_API_KEY}"
			
 
				+        headers["Content-Type"] = "application/json"
			
 
				+
			
 
				+        try:
			
 
				+            body = body.decode("utf-8")
			
 
				+            body = json.loads(body)
			
 
				+            body["model"] = app.state.config.TTS_MODEL
			
 
				+            body = json.dumps(body).encode("utf-8")
			
 
				+        except Exception as e:
			
 
				+            pass
			
 
				+
			
 
				+        r = None
			
 
				+        try:
			
 
				+            r = requests.post(
			
 
				+                url=f"{app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech",
			
 
				+                data=body,
			
 
				+                headers=headers,
			
 
				+                stream=True,
			
 
				+            )
			
 
				 
			
 
				-    except Exception as e:
			
 
				-        log.exception(e)
			
 
				-        error_detail = "Open WebUI: Server Connection Error"
			
 
				-        if r is not None:
			
 
				-            try:
			
 
				-                res = r.json()
			
 
				-                if "error" in res:
			
 
				-                    error_detail = f"External: {res['error']['message']}"
			
 
				-            except:
			
 
				-                error_detail = f"External: {e}"
			
 
				+            r.raise_for_status()
			
 
				+
			
 
				+            # Save the streaming content to a file
			
 
				+            with open(file_path, "wb") as f:
			
 
				+                for chunk in r.iter_content(chunk_size=8192):
			
 
				+                    f.write(chunk)
			
 
				+
			
 
				+            with open(file_body_path, "w") as f:
			
 
				+                json.dump(json.loads(body.decode("utf-8")), f)
			
 
				+
			
 
				+            # Return the saved file
			
 
				+            return FileResponse(file_path)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            log.exception(e)
			
 
				+            error_detail = "Open WebUI: Server Connection Error"
			
 
				+            if r is not None:
			
 
				+                try:
			
 
				+                    res = r.json()
			
 
				+                    if "error" in res:
			
 
				+                        error_detail = f"External: {res['error']['message']}"
			
 
				+                except:
			
 
				+                    error_detail = f"External: {e}"
			
 
				+
			
 
				+            raise HTTPException(
			
 
				+                status_code=r.status_code if r != None else 500,
			
 
				+                detail=error_detail,
			
 
				+            )
			
 
				 
			
 
				-        raise HTTPException(
			
 
				-            status_code=r.status_code if r != None else 500,
			
 
				-            detail=error_detail,
			
 
				-        )
			
 
				+    elif app.state.config.TTS_ENGINE == "elevenlabs":
			
 
				+
			
 
				+        payload = None
			
 
				+        try:
			
 
				+            payload = json.loads(body.decode("utf-8"))
			
 
				+        except Exception as e:
			
 
				+            log.exception(e)
			
 
				+            pass
			
 
				+
			
 
				+        url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}"
			
 
				+
			
 
				+        headers = {
			
 
				+            "Accept": "audio/mpeg",
			
 
				+            "Content-Type": "application/json",
			
 
				+            "xi-api-key": app.state.config.TTS_API_KEY,
			
 
				+        }
			
 
				+
			
 
				+        data = {
			
 
				+            "text": payload["input"],
			
 
				+            "model_id": app.state.config.TTS_MODEL,
			
 
				+            "voice_settings": {"stability": 0.5, "similarity_boost": 0.5},
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            r = requests.post(url, json=data, headers=headers)
			
 
				+
			
 
				+            r.raise_for_status()
			
 
				+
			
 
				+            # Save the streaming content to a file
			
 
				+            with open(file_path, "wb") as f:
			
 
				+                for chunk in r.iter_content(chunk_size=8192):
			
 
				+                    f.write(chunk)
			
 
				+
			
 
				+            with open(file_body_path, "w") as f:
			
 
				+                json.dump(json.loads(body.decode("utf-8")), f)
			
 
				+
			
 
				+            # Return the saved file
			
 
				+            return FileResponse(file_path)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            log.exception(e)
			
 
				+            error_detail = "Open WebUI: Server Connection Error"
			
 
				+            if r is not None:
			
 
				+                try:
			
 
				+                    res = r.json()
			
 
				+                    if "error" in res:
			
 
				+                        error_detail = f"External: {res['error']['message']}"
			
 
				+                except:
			
 
				+                    error_detail = f"External: {e}"
			
 
				+
			
 
				+            raise HTTPException(
			
 
				+                status_code=r.status_code if r != None else 500,
			
 
				+                detail=error_detail,
			
 
				+            )
			
 
				 
			
 
				 
			
 
				 @app.post("/transcriptions")
			
--- a/backend/config.py
+++ b/backend/config.py
@@ -1339,6 +1339,11 @@ AUDIO_TTS_OPENAI_API_KEY = PersistentConfig(
 
				     os.getenv("AUDIO_TTS_OPENAI_API_KEY", OPENAI_API_KEY),
			
 
				 )
			
 
				 
			
 
				+AUDIO_TTS_API_KEY = PersistentConfig(
			
 
				+    "AUDIO_TTS_API_KEY",
			
 
				+    "audio.tts.api_key",
			
 
				+    os.getenv("AUDIO_TTS_API_KEY", ""),
			
 
				+)
			
 
				 
			
 
				 AUDIO_TTS_ENGINE = PersistentConfig(
			
 
				     "AUDIO_TTS_ENGINE",
			
--- a/src/lib/components/admin/Settings/Audio.svelte
+++ b/src/lib/components/admin/Settings/Audio.svelte
@@ -16,6 +16,7 @@
 
				 
			
 
				 	let TTS_OPENAI_API_BASE_URL = '';
			
 
				 	let TTS_OPENAI_API_KEY = '';
			
 
				+	let TTS_API_KEY = '';
			
 
				 	let TTS_ENGINE = '';
			
 
				 	let TTS_MODEL = '';
			
 
				 	let TTS_VOICE = '';
			
@@ -60,6 +61,7 @@
 
				 			tts: {
			
 
				 				OPENAI_API_BASE_URL: TTS_OPENAI_API_BASE_URL,
			
 
				 				OPENAI_API_KEY: TTS_OPENAI_API_KEY,
			
 
				+				TTS_API_KEY: TTS_API_KEY,
			
 
				 				ENGINE: TTS_ENGINE,
			
 
				 				MODEL: TTS_MODEL,
			
 
				 				VOICE: TTS_VOICE
			
@@ -86,6 +88,7 @@
 
				 			console.log(res);
			
 
				 			TTS_OPENAI_API_BASE_URL = res.tts.OPENAI_API_BASE_URL;
			
 
				 			TTS_OPENAI_API_KEY = res.tts.OPENAI_API_KEY;
			
 
				+			TTS_API_KEY = res.tts.TTS_API_KEY;
			
 
				 
			
 
				 			TTS_ENGINE = res.tts.ENGINE;
			
 
				 			TTS_MODEL = res.tts.MODEL;
			
@@ -190,11 +193,13 @@
 
				 								} else {
			
 
				 									getWebAPIVoices();
			
 
				 									TTS_VOICE = '';
			
 
				+									TTS_MODEL = '';
			
 
				 								}
			
 
				 							}}
			
 
				 						>
			
 
				 							<option value="">{$i18n.t('Web API')}</option>
			
 
				 							<option value="openai">{$i18n.t('OpenAI')}</option>
			
 
				+							<option value="elevenlabs">{$i18n.t('Eleven Labs')}</option>
			
 
				 						</select>
			
 
				 					</div>
			
 
				 				</div>
			
@@ -212,6 +217,17 @@
 
				 							<SensitiveInput placeholder={$i18n.t('API Key')} bind:value={TTS_OPENAI_API_KEY} />
			
 
				 						</div>
			
 
				 					</div>
			
 
				+				{:else if TTS_ENGINE === 'elevenlabs'}
			
 
				+					<div>
			
 
				+						<div class="mt-1 flex gap-2 mb-1">
			
 
				+							<input
			
 
				+								class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
			
 
				+								placeholder={$i18n.t('API Key')}
			
 
				+								bind:value={TTS_API_KEY}
			
 
				+								required
			
 
				+							/>
			
 
				+						</div>
			
 
				+					</div>
			
 
				 				{/if}
			
 
				 
			
 
				 				<hr class=" dark:border-gray-850 my-2" />
			
@@ -278,6 +294,47 @@
 
				 							</div>
			
 
				 						</div>
			
 
				 					</div>
			
 
				+				{:else if TTS_ENGINE === 'elevenlabs'}
			
 
				+					<div class=" flex gap-2">
			
 
				+						<div class="w-full">
			
 
				+							<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Voice')}</div>
			
 
				+							<div class="flex w-full">
			
 
				+								<div class="flex-1">
			
 
				+									<input
			
 
				+										list="voice-list"
			
 
				+										class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
			
 
				+										bind:value={TTS_VOICE}
			
 
				+										placeholder="Select a voice"
			
 
				+									/>
			
 
				+
			
 
				+									<datalist id="voice-list">
			
 
				+										{#each voices as voice}
			
 
				+											<option value={voice.name} />
			
 
				+										{/each}
			
 
				+									</datalist>
			
 
				+								</div>
			
 
				+							</div>
			
 
				+						</div>
			
 
				+						<div class="w-full">
			
 
				+							<div class=" mb-1.5 text-sm font-medium">{$i18n.t('TTS Model')}</div>
			
 
				+							<div class="flex w-full">
			
 
				+								<div class="flex-1">
			
 
				+									<input
			
 
				+										list="model-list"
			
 
				+										class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-none"
			
 
				+										bind:value={TTS_MODEL}
			
 
				+										placeholder="Select a model"
			
 
				+									/>
			
 
				+
			
 
				+									<datalist id="model-list">
			
 
				+										{#each models as model}
			
 
				+											<option value={model.name} />
			
 
				+										{/each}
			
 
				+									</datalist>
			
 
				+								</div>
			
 
				+							</div>
			
 
				+						</div>
			
 
				+					</div>
			
 
				 				{/if}
			
 
				 			</div>
			
 
				 		</div>