7 meses atrás · eacb69074e
--- a/backend/open_webui/apps/audio/main.py
+++ b/backend/open_webui/apps/audio/main.py
@@ -309,20 +309,33 @@ async def speech(request: Request, user=Depends(get_verified_user)):
 
				             log.exception(e)
			
 
				             raise HTTPException(status_code=400, detail="Invalid JSON payload")
			
 
				 
			
 
				-        import azure.cognitiveservices.speech as speechsdk
			
 
				+        region = "uksouth"
			
 
				+        language = "en-GB-SoniaNeural"
			
 
				+        locale = "en-GB"
			
 
				+        output_format = "audio-24khz-160kbitrate-mono-mp3"
			
 
				+        url = f"https://{region}.tts.speech.microsoft.com/cognitiveservices/v1"
			
 
				 
			
 
				-        config = speechsdk.SpeechConfig(subscription=app.state.config.TTS_API_KEY, region="uksouth")
			
 
				-        speaker_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=False, filename=str(file_path))
			
 
				+        headers = {
			
 
				+            'Ocp-Apim-Subscription-Key': app.state.config.TTS_API_KEY,
			
 
				+            'Content-Type': 'application/ssml+xml',
			
 
				+            'X-Microsoft-OutputFormat': output_format
			
 
				+        }
			
 
				 
			
 
				-        client = speechsdk.SpeechSynthesizer(speech_config=config, audio_config=speaker_config)
			
 
				-        result = client.speak_text(payload["input"])
			
 
				+        data = f"""<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{locale}">
			
 
				+                <voice name="{language}">{payload["input"]}</voice>
			
 
				+            </speak>"""
			
 
				 
			
 
				-        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
			
 
				+        response = requests.post(url, headers=headers, data=data)
			
 
				+
			
 
				+        if response.status_code == 200:
			
 
				+            with open(file_path, "wb") as f:
			
 
				+                f.write(response.content)
			
 
				             return FileResponse(file_path)
			
 
				         else:
			
 
				+            log.error(f"Error synthesizing speech - {response.reason}")
			
 
				             raise HTTPException(
			
 
				                 status_code=500,
			
 
				-                detail=f"Error synthesizing speech - {result.reason}")
			
 
				+                detail=f"Error synthesizing speech - {response.reason}")
			
 
				 
			
 
				 
			
 
				 
			
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -79,8 +79,6 @@ extract_msg
 
				 pydub
			
 
				 duckduckgo-search~=6.2.11
			
 
				 
			
 
				-azure-cognitiveservices-speech==1.40.0
			
 
				-
			
 
				 ## Tests
			
 
				 docker~=7.1.0
			
 
				 pytest~=8.3.2