VoiceRecording.svelte 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. <script lang="ts">
  2. import { toast } from 'svelte-sonner';
  3. import { createEventDispatcher, tick, getContext } from 'svelte';
  4. import { config, settings } from '$lib/stores';
  5. import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
  6. import { transcribeAudio } from '$lib/apis/audio';
  7. const i18n = getContext('i18n');
  8. const dispatch = createEventDispatcher();
  9. export let recording = false;
  10. let loading = false;
  11. let confirmed = false;
  12. let durationSeconds = 0;
  13. let durationCounter = null;
  14. let transcription = '';
  15. const startDurationCounter = () => {
  16. durationCounter = setInterval(() => {
  17. durationSeconds++;
  18. }, 1000);
  19. };
  20. const stopDurationCounter = () => {
  21. clearInterval(durationCounter);
  22. durationSeconds = 0;
  23. };
  24. $: if (recording) {
  25. startRecording();
  26. } else {
  27. stopRecording();
  28. }
  29. const formatSeconds = (seconds) => {
  30. const minutes = Math.floor(seconds / 60);
  31. const remainingSeconds = seconds % 60;
  32. const formattedSeconds = remainingSeconds < 10 ? `0${remainingSeconds}` : remainingSeconds;
  33. return `${minutes}:${formattedSeconds}`;
  34. };
  35. let stream;
  36. let speechRecognition;
  37. let mediaRecorder;
  38. let audioChunks = [];
  39. const MIN_DECIBELS = -45;
  40. const VISUALIZER_BUFFER_LENGTH = 300;
  41. let visualizerData = Array(VISUALIZER_BUFFER_LENGTH).fill(0);
  42. // Function to calculate the RMS level from time domain data
  43. const calculateRMS = (data: Uint8Array) => {
  44. let sumSquares = 0;
  45. for (let i = 0; i < data.length; i++) {
  46. const normalizedValue = (data[i] - 128) / 128; // Normalize the data
  47. sumSquares += normalizedValue * normalizedValue;
  48. }
  49. return Math.sqrt(sumSquares / data.length);
  50. };
  51. const normalizeRMS = (rms) => {
  52. rms = rms * 10;
  53. const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
  54. const scaledRMS = Math.pow(rms, exp);
  55. // Scale between 0.01 (1%) and 1.0 (100%)
  56. return Math.min(1.0, Math.max(0.01, scaledRMS));
  57. };
  58. const analyseAudio = (stream) => {
  59. const audioContext = new AudioContext();
  60. const audioStreamSource = audioContext.createMediaStreamSource(stream);
  61. const analyser = audioContext.createAnalyser();
  62. analyser.minDecibels = MIN_DECIBELS;
  63. audioStreamSource.connect(analyser);
  64. const bufferLength = analyser.frequencyBinCount;
  65. const domainData = new Uint8Array(bufferLength);
  66. const timeDomainData = new Uint8Array(analyser.fftSize);
  67. let lastSoundTime = Date.now();
  68. const detectSound = () => {
  69. const processFrame = () => {
  70. if (!recording || loading) return;
  71. if (recording && !loading) {
  72. analyser.getByteTimeDomainData(timeDomainData);
  73. analyser.getByteFrequencyData(domainData);
  74. // Calculate RMS level from time domain data
  75. const rmsLevel = calculateRMS(timeDomainData);
  76. // Push the calculated decibel level to visualizerData
  77. visualizerData.push(normalizeRMS(rmsLevel));
  78. // Ensure visualizerData array stays within the buffer length
  79. if (visualizerData.length >= VISUALIZER_BUFFER_LENGTH) {
  80. visualizerData.shift();
  81. }
  82. visualizerData = visualizerData;
  83. // if (domainData.some((value) => value > 0)) {
  84. // lastSoundTime = Date.now();
  85. // }
  86. // if (recording && Date.now() - lastSoundTime > 3000) {
  87. // if ($settings?.speechAutoSend ?? false) {
  88. // confirmRecording();
  89. // }
  90. // }
  91. }
  92. window.requestAnimationFrame(processFrame);
  93. };
  94. window.requestAnimationFrame(processFrame);
  95. };
  96. detectSound();
  97. };
  98. const transcribeHandler = async (audioBlob) => {
  99. // Create a blob from the audio chunks
  100. await tick();
  101. const file = blobToFile(audioBlob, 'recording.wav');
  102. const res = await transcribeAudio(localStorage.token, file).catch((error) => {
  103. toast.error(error);
  104. return null;
  105. });
  106. if (res) {
  107. console.log(res.text);
  108. dispatch('confirm', res.text);
  109. }
  110. };
  111. const saveRecording = (blob) => {
  112. const url = URL.createObjectURL(blob);
  113. const a = document.createElement('a');
  114. document.body.appendChild(a);
  115. a.style = 'display: none';
  116. a.href = url;
  117. a.download = 'recording.wav';
  118. a.click();
  119. window.URL.revokeObjectURL(url);
  120. };
  121. const startRecording = async () => {
  122. startDurationCounter();
  123. stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  124. mediaRecorder = new MediaRecorder(stream);
  125. mediaRecorder.onstart = () => {
  126. console.log('Recording started');
  127. audioChunks = [];
  128. analyseAudio(stream);
  129. };
  130. mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
  131. mediaRecorder.onstop = async () => {
  132. console.log('Recording stopped');
  133. if (($settings?.audio?.stt?.engine ?? '') === 'web') {
  134. audioChunks = [];
  135. } else {
  136. if (confirmed) {
  137. const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
  138. await transcribeHandler(audioBlob);
  139. confirmed = false;
  140. loading = false;
  141. }
  142. audioChunks = [];
  143. recording = false;
  144. }
  145. };
  146. mediaRecorder.start();
  147. if ($config.audio.stt.engine === 'web' || ($settings?.audio?.stt?.engine ?? '') === 'web') {
  148. if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
  149. // Create a SpeechRecognition object
  150. speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
  151. // Set continuous to true for continuous recognition
  152. speechRecognition.continuous = true;
  153. // Set the timeout for turning off the recognition after inactivity (in milliseconds)
  154. const inactivityTimeout = 2000; // 3 seconds
  155. let timeoutId;
  156. // Start recognition
  157. speechRecognition.start();
  158. // Event triggered when speech is recognized
  159. speechRecognition.onresult = async (event) => {
  160. // Clear the inactivity timeout
  161. clearTimeout(timeoutId);
  162. // Handle recognized speech
  163. console.log(event);
  164. const transcript = event.results[Object.keys(event.results).length - 1][0].transcript;
  165. transcription = `${transcription}${transcript}`;
  166. await tick();
  167. document.getElementById('chat-textarea')?.focus();
  168. // Restart the inactivity timeout
  169. timeoutId = setTimeout(() => {
  170. console.log('Speech recognition turned off due to inactivity.');
  171. speechRecognition.stop();
  172. }, inactivityTimeout);
  173. };
  174. // Event triggered when recognition is ended
  175. speechRecognition.onend = function () {
  176. // Restart recognition after it ends
  177. console.log('recognition ended');
  178. confirmRecording();
  179. dispatch('confirm', transcription);
  180. confirmed = false;
  181. loading = false;
  182. };
  183. // Event triggered when an error occurs
  184. speechRecognition.onerror = function (event) {
  185. console.log(event);
  186. toast.error($i18n.t(`Speech recognition error: {{error}}`, { error: event.error }));
  187. dispatch('cancel');
  188. stopRecording();
  189. };
  190. }
  191. }
  192. };
  193. const stopRecording = async () => {
  194. if (recording && mediaRecorder) {
  195. await mediaRecorder.stop();
  196. }
  197. stopDurationCounter();
  198. audioChunks = [];
  199. if (stream) {
  200. const tracks = stream.getTracks();
  201. tracks.forEach((track) => track.stop());
  202. }
  203. stream = null;
  204. };
  205. const confirmRecording = async () => {
  206. loading = true;
  207. confirmed = true;
  208. if (recording && mediaRecorder) {
  209. await mediaRecorder.stop();
  210. }
  211. clearInterval(durationCounter);
  212. if (stream) {
  213. const tracks = stream.getTracks();
  214. tracks.forEach((track) => track.stop());
  215. }
  216. stream = null;
  217. };
  218. </script>
  219. <div
  220. class="{loading
  221. ? ' bg-gray-100/50 dark:bg-gray-850/50'
  222. : 'bg-indigo-300/10 dark:bg-indigo-500/10 '} rounded-full flex p-2.5"
  223. >
  224. <div class="flex items-center mr-1">
  225. <button
  226. type="button"
  227. class="p-1.5
  228. {loading
  229. ? ' bg-gray-200 dark:bg-gray-700/50'
  230. : 'bg-indigo-400/20 text-indigo-600 dark:text-indigo-300 '}
  231. rounded-full"
  232. on:click={async () => {
  233. dispatch('cancel');
  234. stopRecording();
  235. }}
  236. >
  237. <svg
  238. xmlns="http://www.w3.org/2000/svg"
  239. fill="none"
  240. viewBox="0 0 24 24"
  241. stroke-width="3"
  242. stroke="currentColor"
  243. class="size-4"
  244. >
  245. <path stroke-linecap="round" stroke-linejoin="round" d="M6 18 18 6M6 6l12 12" />
  246. </svg>
  247. </button>
  248. </div>
  249. <div
  250. class="flex flex-1 self-center items-center justify-between ml-2 mx-1 overflow-hidden h-6"
  251. dir="rtl"
  252. >
  253. <div class="flex-1 flex items-center gap-0.5 h-6">
  254. {#each visualizerData.slice().reverse() as rms}
  255. <div
  256. class="w-[2px]
  257. {loading
  258. ? ' bg-gray-500 dark:bg-gray-400 '
  259. : 'bg-indigo-500 dark:bg-indigo-400 '}
  260. inline-block h-full"
  261. style="height: {Math.min(100, Math.max(14, rms * 100))}%;"
  262. />
  263. {/each}
  264. </div>
  265. </div>
  266. <div class=" mx-1.5 pr-1 flex justify-center items-center">
  267. <div
  268. class="text-sm
  269. {loading ? ' text-gray-500 dark:text-gray-400 ' : ' text-indigo-400 '}
  270. font-medium flex-1 mx-auto text-center"
  271. >
  272. {formatSeconds(durationSeconds)}
  273. </div>
  274. </div>
  275. <div class="flex items-center mr-1">
  276. {#if loading}
  277. <div class=" text-gray-500 rounded-full cursor-not-allowed">
  278. <svg
  279. width="24"
  280. height="24"
  281. viewBox="0 0 24 24"
  282. xmlns="http://www.w3.org/2000/svg"
  283. fill="currentColor"
  284. ><style>
  285. .spinner_OSmW {
  286. transform-origin: center;
  287. animation: spinner_T6mA 0.75s step-end infinite;
  288. }
  289. @keyframes spinner_T6mA {
  290. 8.3% {
  291. transform: rotate(30deg);
  292. }
  293. 16.6% {
  294. transform: rotate(60deg);
  295. }
  296. 25% {
  297. transform: rotate(90deg);
  298. }
  299. 33.3% {
  300. transform: rotate(120deg);
  301. }
  302. 41.6% {
  303. transform: rotate(150deg);
  304. }
  305. 50% {
  306. transform: rotate(180deg);
  307. }
  308. 58.3% {
  309. transform: rotate(210deg);
  310. }
  311. 66.6% {
  312. transform: rotate(240deg);
  313. }
  314. 75% {
  315. transform: rotate(270deg);
  316. }
  317. 83.3% {
  318. transform: rotate(300deg);
  319. }
  320. 91.6% {
  321. transform: rotate(330deg);
  322. }
  323. 100% {
  324. transform: rotate(360deg);
  325. }
  326. }
  327. </style><g class="spinner_OSmW"
  328. ><rect x="11" y="1" width="2" height="5" opacity=".14" /><rect
  329. x="11"
  330. y="1"
  331. width="2"
  332. height="5"
  333. transform="rotate(30 12 12)"
  334. opacity=".29"
  335. /><rect
  336. x="11"
  337. y="1"
  338. width="2"
  339. height="5"
  340. transform="rotate(60 12 12)"
  341. opacity=".43"
  342. /><rect
  343. x="11"
  344. y="1"
  345. width="2"
  346. height="5"
  347. transform="rotate(90 12 12)"
  348. opacity=".57"
  349. /><rect
  350. x="11"
  351. y="1"
  352. width="2"
  353. height="5"
  354. transform="rotate(120 12 12)"
  355. opacity=".71"
  356. /><rect
  357. x="11"
  358. y="1"
  359. width="2"
  360. height="5"
  361. transform="rotate(150 12 12)"
  362. opacity=".86"
  363. /><rect x="11" y="1" width="2" height="5" transform="rotate(180 12 12)" /></g
  364. ></svg
  365. >
  366. </div>
  367. {:else}
  368. <button
  369. type="button"
  370. class="p-1.5 bg-indigo-500 text-white dark:bg-indigo-500 dark:text-blue-950 rounded-full"
  371. on:click={async () => {
  372. await confirmRecording();
  373. }}
  374. >
  375. <svg
  376. xmlns="http://www.w3.org/2000/svg"
  377. fill="none"
  378. viewBox="0 0 24 24"
  379. stroke-width="2.5"
  380. stroke="currentColor"
  381. class="size-4"
  382. >
  383. <path stroke-linecap="round" stroke-linejoin="round" d="m4.5 12.75 6 6 9-13.5" />
  384. </svg>
  385. </button>
  386. {/if}
  387. </div>
  388. </div>
  389. <style>
  390. .visualizer {
  391. display: flex;
  392. height: 100%;
  393. }
  394. .visualizer-bar {
  395. width: 2px;
  396. background-color: #4a5aba; /* or whatever color you need */
  397. }
  398. </style>