VoiceRecording.svelte 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. <script lang="ts">
  2. import { toast } from 'svelte-sonner';
  3. import { createEventDispatcher, tick, getContext } from 'svelte';
  4. import { config, settings } from '$lib/stores';
  5. import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
  6. import { transcribeAudio } from '$lib/apis/audio';
  7. const i18n = getContext('i18n');
  8. const dispatch = createEventDispatcher();
  9. export let recording = false;
  10. let loading = false;
  11. let confirmed = false;
  12. let durationSeconds = 0;
  13. let durationCounter = null;
  14. let transcription = '';
  15. const startDurationCounter = () => {
  16. durationCounter = setInterval(() => {
  17. durationSeconds++;
  18. }, 1000);
  19. };
  20. const stopDurationCounter = () => {
  21. clearInterval(durationCounter);
  22. durationSeconds = 0;
  23. };
  24. $: if (recording) {
  25. startRecording();
  26. } else {
  27. stopRecording();
  28. }
  29. const formatSeconds = (seconds) => {
  30. const minutes = Math.floor(seconds / 60);
  31. const remainingSeconds = seconds % 60;
  32. const formattedSeconds = remainingSeconds < 10 ? `0${remainingSeconds}` : remainingSeconds;
  33. return `${minutes}:${formattedSeconds}`;
  34. };
  35. let speechRecognition;
  36. let mediaRecorder;
  37. let audioChunks = [];
  38. const MIN_DECIBELS = -45;
  39. const VISUALIZER_BUFFER_LENGTH = 300;
  40. let visualizerData = Array(VISUALIZER_BUFFER_LENGTH).fill(0);
  41. // Function to calculate the RMS level from time domain data
  42. const calculateRMS = (data: Uint8Array) => {
  43. let sumSquares = 0;
  44. for (let i = 0; i < data.length; i++) {
  45. const normalizedValue = (data[i] - 128) / 128; // Normalize the data
  46. sumSquares += normalizedValue * normalizedValue;
  47. }
  48. return Math.sqrt(sumSquares / data.length);
  49. };
  50. const normalizeRMS = (rms) => {
  51. rms = rms * 10;
  52. const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
  53. const scaledRMS = Math.pow(rms, exp);
  54. // Scale between 0.01 (1%) and 1.0 (100%)
  55. return Math.min(1.0, Math.max(0.01, scaledRMS));
  56. };
  57. const analyseAudio = (stream) => {
  58. const audioContext = new AudioContext();
  59. const audioStreamSource = audioContext.createMediaStreamSource(stream);
  60. const analyser = audioContext.createAnalyser();
  61. analyser.minDecibels = MIN_DECIBELS;
  62. audioStreamSource.connect(analyser);
  63. const bufferLength = analyser.frequencyBinCount;
  64. const domainData = new Uint8Array(bufferLength);
  65. const timeDomainData = new Uint8Array(analyser.fftSize);
  66. let lastSoundTime = Date.now();
  67. const detectSound = () => {
  68. const processFrame = () => {
  69. if (!recording || loading) return;
  70. if (recording && !loading) {
  71. analyser.getByteTimeDomainData(timeDomainData);
  72. analyser.getByteFrequencyData(domainData);
  73. // Calculate RMS level from time domain data
  74. const rmsLevel = calculateRMS(timeDomainData);
  75. // Push the calculated decibel level to visualizerData
  76. visualizerData.push(normalizeRMS(rmsLevel));
  77. // Ensure visualizerData array stays within the buffer length
  78. if (visualizerData.length >= VISUALIZER_BUFFER_LENGTH) {
  79. visualizerData.shift();
  80. }
  81. visualizerData = visualizerData;
  82. // if (domainData.some((value) => value > 0)) {
  83. // lastSoundTime = Date.now();
  84. // }
  85. // if (recording && Date.now() - lastSoundTime > 3000) {
  86. // if ($settings?.speechAutoSend ?? false) {
  87. // confirmRecording();
  88. // }
  89. // }
  90. }
  91. window.requestAnimationFrame(processFrame);
  92. };
  93. window.requestAnimationFrame(processFrame);
  94. };
  95. detectSound();
  96. };
  97. const transcribeHandler = async (audioBlob) => {
  98. // Create a blob from the audio chunks
  99. await tick();
  100. const file = blobToFile(audioBlob, 'recording.wav');
  101. const res = await transcribeAudio(localStorage.token, file).catch((error) => {
  102. toast.error(error);
  103. return null;
  104. });
  105. if (res) {
  106. console.log(res.text);
  107. dispatch('confirm', res.text);
  108. }
  109. };
  110. const saveRecording = (blob) => {
  111. const url = URL.createObjectURL(blob);
  112. const a = document.createElement('a');
  113. document.body.appendChild(a);
  114. a.style = 'display: none';
  115. a.href = url;
  116. a.download = 'recording.wav';
  117. a.click();
  118. window.URL.revokeObjectURL(url);
  119. };
  120. const startRecording = async () => {
  121. startDurationCounter();
  122. const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  123. mediaRecorder = new MediaRecorder(stream);
  124. mediaRecorder.onstart = () => {
  125. console.log('Recording started');
  126. audioChunks = [];
  127. analyseAudio(stream);
  128. };
  129. mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
  130. mediaRecorder.onstop = async () => {
  131. console.log('Recording stopped');
  132. if (($settings?.audio?.stt?.engine ?? '') === 'web') {
  133. audioChunks = [];
  134. } else {
  135. if (confirmed) {
  136. const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
  137. await transcribeHandler(audioBlob);
  138. confirmed = false;
  139. loading = false;
  140. }
  141. audioChunks = [];
  142. recording = false;
  143. }
  144. };
  145. mediaRecorder.start();
  146. if ($config.audio.stt.engine === 'web' || ($settings?.audio?.stt?.engine ?? '') === 'web') {
  147. if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
  148. // Create a SpeechRecognition object
  149. speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
  150. // Set continuous to true for continuous recognition
  151. speechRecognition.continuous = true;
  152. // Set the timeout for turning off the recognition after inactivity (in milliseconds)
  153. const inactivityTimeout = 2000; // 3 seconds
  154. let timeoutId;
  155. // Start recognition
  156. speechRecognition.start();
  157. // Event triggered when speech is recognized
  158. speechRecognition.onresult = async (event) => {
  159. // Clear the inactivity timeout
  160. clearTimeout(timeoutId);
  161. // Handle recognized speech
  162. console.log(event);
  163. const transcript = event.results[Object.keys(event.results).length - 1][0].transcript;
  164. transcription = `${transcription}${transcript}`;
  165. await tick();
  166. document.getElementById('chat-textarea')?.focus();
  167. // Restart the inactivity timeout
  168. timeoutId = setTimeout(() => {
  169. console.log('Speech recognition turned off due to inactivity.');
  170. speechRecognition.stop();
  171. }, inactivityTimeout);
  172. };
  173. // Event triggered when recognition is ended
  174. speechRecognition.onend = function () {
  175. // Restart recognition after it ends
  176. console.log('recognition ended');
  177. confirmRecording();
  178. dispatch('confirm', transcription);
  179. confirmed = false;
  180. loading = false;
  181. };
  182. // Event triggered when an error occurs
  183. speechRecognition.onerror = function (event) {
  184. console.log(event);
  185. toast.error($i18n.t(`Speech recognition error: {{error}}`, { error: event.error }));
  186. dispatch('cancel');
  187. stopRecording();
  188. };
  189. }
  190. }
  191. };
  192. const stopRecording = async () => {
  193. if (recording && mediaRecorder) {
  194. await mediaRecorder.stop();
  195. }
  196. stopDurationCounter();
  197. audioChunks = [];
  198. };
  199. const confirmRecording = async () => {
  200. loading = true;
  201. confirmed = true;
  202. if (recording && mediaRecorder) {
  203. await mediaRecorder.stop();
  204. }
  205. clearInterval(durationCounter);
  206. };
  207. </script>
  208. <div
  209. class="{loading
  210. ? ' bg-gray-100/50 dark:bg-gray-850/50'
  211. : 'bg-indigo-300/10 dark:bg-indigo-500/10 '} rounded-full flex p-2.5"
  212. >
  213. <div class="flex items-center mr-1">
  214. <button
  215. type="button"
  216. class="p-1.5
  217. {loading
  218. ? ' bg-gray-200 dark:bg-gray-700/50'
  219. : 'bg-indigo-400/20 text-indigo-600 dark:text-indigo-300 '}
  220. rounded-full"
  221. on:click={async () => {
  222. dispatch('cancel');
  223. stopRecording();
  224. }}
  225. >
  226. <svg
  227. xmlns="http://www.w3.org/2000/svg"
  228. fill="none"
  229. viewBox="0 0 24 24"
  230. stroke-width="3"
  231. stroke="currentColor"
  232. class="size-4"
  233. >
  234. <path stroke-linecap="round" stroke-linejoin="round" d="M6 18 18 6M6 6l12 12" />
  235. </svg>
  236. </button>
  237. </div>
  238. <div
  239. class="flex flex-1 self-center items-center justify-between ml-2 mx-1 overflow-hidden h-6"
  240. dir="rtl"
  241. >
  242. <div class="flex-1 flex items-center gap-0.5 h-6">
  243. {#each visualizerData.slice().reverse() as rms}
  244. <div
  245. class="w-[2px]
  246. {loading
  247. ? ' bg-gray-500 dark:bg-gray-400 '
  248. : 'bg-indigo-500 dark:bg-indigo-400 '}
  249. inline-block h-full"
  250. style="height: {Math.min(100, Math.max(14, rms * 100))}%;"
  251. />
  252. {/each}
  253. </div>
  254. </div>
  255. <div class=" mx-1.5 pr-1 flex justify-center items-center">
  256. <div
  257. class="text-sm
  258. {loading ? ' text-gray-500 dark:text-gray-400 ' : ' text-indigo-400 '}
  259. font-medium flex-1 mx-auto text-center"
  260. >
  261. {formatSeconds(durationSeconds)}
  262. </div>
  263. </div>
  264. <div class="flex items-center mr-1">
  265. {#if loading}
  266. <div class=" text-gray-500 rounded-full cursor-not-allowed">
  267. <svg
  268. width="24"
  269. height="24"
  270. viewBox="0 0 24 24"
  271. xmlns="http://www.w3.org/2000/svg"
  272. fill="currentColor"
  273. ><style>
  274. .spinner_OSmW {
  275. transform-origin: center;
  276. animation: spinner_T6mA 0.75s step-end infinite;
  277. }
  278. @keyframes spinner_T6mA {
  279. 8.3% {
  280. transform: rotate(30deg);
  281. }
  282. 16.6% {
  283. transform: rotate(60deg);
  284. }
  285. 25% {
  286. transform: rotate(90deg);
  287. }
  288. 33.3% {
  289. transform: rotate(120deg);
  290. }
  291. 41.6% {
  292. transform: rotate(150deg);
  293. }
  294. 50% {
  295. transform: rotate(180deg);
  296. }
  297. 58.3% {
  298. transform: rotate(210deg);
  299. }
  300. 66.6% {
  301. transform: rotate(240deg);
  302. }
  303. 75% {
  304. transform: rotate(270deg);
  305. }
  306. 83.3% {
  307. transform: rotate(300deg);
  308. }
  309. 91.6% {
  310. transform: rotate(330deg);
  311. }
  312. 100% {
  313. transform: rotate(360deg);
  314. }
  315. }
  316. </style><g class="spinner_OSmW"
  317. ><rect x="11" y="1" width="2" height="5" opacity=".14" /><rect
  318. x="11"
  319. y="1"
  320. width="2"
  321. height="5"
  322. transform="rotate(30 12 12)"
  323. opacity=".29"
  324. /><rect
  325. x="11"
  326. y="1"
  327. width="2"
  328. height="5"
  329. transform="rotate(60 12 12)"
  330. opacity=".43"
  331. /><rect
  332. x="11"
  333. y="1"
  334. width="2"
  335. height="5"
  336. transform="rotate(90 12 12)"
  337. opacity=".57"
  338. /><rect
  339. x="11"
  340. y="1"
  341. width="2"
  342. height="5"
  343. transform="rotate(120 12 12)"
  344. opacity=".71"
  345. /><rect
  346. x="11"
  347. y="1"
  348. width="2"
  349. height="5"
  350. transform="rotate(150 12 12)"
  351. opacity=".86"
  352. /><rect x="11" y="1" width="2" height="5" transform="rotate(180 12 12)" /></g
  353. ></svg
  354. >
  355. </div>
  356. {:else}
  357. <button
  358. type="button"
  359. class="p-1.5 bg-indigo-500 text-white dark:bg-indigo-500 dark:text-blue-950 rounded-full"
  360. on:click={async () => {
  361. await confirmRecording();
  362. }}
  363. >
  364. <svg
  365. xmlns="http://www.w3.org/2000/svg"
  366. fill="none"
  367. viewBox="0 0 24 24"
  368. stroke-width="2.5"
  369. stroke="currentColor"
  370. class="size-4"
  371. >
  372. <path stroke-linecap="round" stroke-linejoin="round" d="m4.5 12.75 6 6 9-13.5" />
  373. </svg>
  374. </button>
  375. {/if}
  376. </div>
  377. </div>
  378. <style>
  379. .visualizer {
  380. display: flex;
  381. height: 100%;
  382. }
  383. .visualizer-bar {
  384. width: 2px;
  385. background-color: #4a5aba; /* or whatever color you need */
  386. }
  387. </style>