Timothy J. Baek 11 月之前
父節點
當前提交
4a73a01c24
共有 3 個文件被更改,包括 33 次插入26 次删除
  1. 15 3
      backend/apps/audio/main.py
  2. 15 2
      src/lib/components/chat/MessageInput.svelte
  3. 3 21
      src/lib/components/chat/Settings/Audio.svelte

+ 15 - 3
backend/apps/audio/main.py

@@ -17,7 +17,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from faster_whisper import WhisperModel
 from faster_whisper import WhisperModel
 from pydantic import BaseModel
 from pydantic import BaseModel
 
 
-
+import uuid
 import requests
 import requests
 import hashlib
 import hashlib
 from pathlib import Path
 from pathlib import Path
@@ -181,8 +181,15 @@ def transcribe(
         )
         )
 
 
     try:
     try:
-        filename = file.filename
-        file_path = f"{UPLOAD_DIR}/{filename}"
+        ext = file.filename.split(".")[-1]
+
+        id = uuid.uuid4()
+        filename = f"{id}.{ext}"
+
+        file_dir = f"{CACHE_DIR}/audio/transcriptions"
+        os.makedirs(file_dir, exist_ok=True)
+        file_path = f"{file_dir}/{filename}"
+
         contents = file.file.read()
         contents = file.file.read()
         with open(file_path, "wb") as f:
         with open(file_path, "wb") as f:
             f.write(contents)
             f.write(contents)
@@ -215,6 +222,11 @@ def transcribe(
 
 
         transcript = "".join([segment.text for segment in list(segments)])
         transcript = "".join([segment.text for segment in list(segments)])
 
 
+        # save the transcript to a json file
+        transcript_file = f"{file_dir}/{id}.json"
+        with open(transcript_file, "w") as f:
+            json.dump({"transcript": transcript}, f)
+
         return {"text": transcript.strip()}
         return {"text": transcript.strip()}
 
 
     except Exception as e:
     except Exception as e:

+ 15 - 2
src/lib/components/chat/MessageInput.svelte

@@ -842,8 +842,21 @@
 												id="voice-input-button"
 												id="voice-input-button"
 												class=" text-gray-600 dark:text-gray-300 hover:bg-gray-50 dark:hover:bg-gray-850 transition rounded-full p-1.5 mr-0.5 self-center"
 												class=" text-gray-600 dark:text-gray-300 hover:bg-gray-50 dark:hover:bg-gray-850 transition rounded-full p-1.5 mr-0.5 self-center"
 												type="button"
 												type="button"
-												on:click={() => {
-													recording = true;
+												on:click={async () => {
+													const res = await navigator.mediaDevices
+														.getUserMedia({ audio: true })
+														.catch(function (err) {
+															toast.error(
+																$i18n.t(`Permission denied when accessing microphone: {{error}}`, {
+																	error: err
+																})
+															);
+															return null;
+														});
+
+													if (res) {
+														recording = true;
+													}
 												}}
 												}}
 											>
 											>
 												<svg
 												<svg

+ 3 - 21
src/lib/components/chat/Settings/Audio.svelte

@@ -168,7 +168,7 @@
 					<select
 					<select
 						class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
 						class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
 						bind:value={STTEngine}
 						bind:value={STTEngine}
-						placeholder="Select a mode"
+						placeholder="Select an engine"
 						on:change={(e) => {
 						on:change={(e) => {
 							if (e.target.value !== '') {
 							if (e.target.value !== '') {
 								navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
 								navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
@@ -182,30 +182,12 @@
 							}
 							}
 						}}
 						}}
 					>
 					>
-						<option value="">{$i18n.t('Default (Web API)')}</option>
-						<option value="whisper-local">{$i18n.t('Whisper (Local)')}</option>
+						<option value="">{$i18n.t('Default (Whisper)')}</option>
+						<option value="web">{$i18n.t('Web API')}</option>
 					</select>
 					</select>
 				</div>
 				</div>
 			</div>
 			</div>
 
 
-			<div class=" py-0.5 flex w-full justify-between">
-				<div class=" self-center text-xs font-medium">{$i18n.t('Conversation Mode')}</div>
-
-				<button
-					class="p-1 px-3 text-xs flex rounded transition"
-					on:click={() => {
-						toggleConversationMode();
-					}}
-					type="button"
-				>
-					{#if conversationMode === true}
-						<span class="ml-2 self-center">{$i18n.t('On')}</span>
-					{:else}
-						<span class="ml-2 self-center">{$i18n.t('Off')}</span>
-					{/if}
-				</button>
-			</div>
-
 			<div class=" py-0.5 flex w-full justify-between">
 			<div class=" py-0.5 flex w-full justify-between">
 				<div class=" self-center text-xs font-medium">
 				<div class=" self-center text-xs font-medium">
 					{$i18n.t('Auto-send input after 3 sec.')}
 					{$i18n.t('Auto-send input after 3 sec.')}