VoiceRecording.svelte 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. <script lang="ts">
  2. import { toast } from 'svelte-sonner';
  3. import { createEventDispatcher, tick, getContext, onMount, onDestroy } from 'svelte';
  4. import { config, settings } from '$lib/stores';
  5. import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
  6. import { transcribeAudio } from '$lib/apis/audio';
  7. const i18n = getContext('i18n');
  8. const dispatch = createEventDispatcher();
  9. export let recording = false;
  10. export let className = ' p-2.5 w-full max-w-full';
  11. let loading = false;
  12. let confirmed = false;
  13. let durationSeconds = 0;
  14. let durationCounter = null;
  15. let transcription = '';
  16. const startDurationCounter = () => {
  17. durationCounter = setInterval(() => {
  18. durationSeconds++;
  19. }, 1000);
  20. };
  21. const stopDurationCounter = () => {
  22. clearInterval(durationCounter);
  23. durationSeconds = 0;
  24. };
  25. $: if (recording) {
  26. startRecording();
  27. } else {
  28. stopRecording();
  29. }
  30. const formatSeconds = (seconds) => {
  31. const minutes = Math.floor(seconds / 60);
  32. const remainingSeconds = seconds % 60;
  33. const formattedSeconds = remainingSeconds < 10 ? `0${remainingSeconds}` : remainingSeconds;
  34. return `${minutes}:${formattedSeconds}`;
  35. };
  36. let stream;
  37. let speechRecognition;
  38. let mediaRecorder;
  39. let audioChunks = [];
  40. const MIN_DECIBELS = -45;
  41. let VISUALIZER_BUFFER_LENGTH = 300;
  42. let visualizerData = Array(VISUALIZER_BUFFER_LENGTH).fill(0);
  43. // Function to calculate the RMS level from time domain data
  44. const calculateRMS = (data: Uint8Array) => {
  45. let sumSquares = 0;
  46. for (let i = 0; i < data.length; i++) {
  47. const normalizedValue = (data[i] - 128) / 128; // Normalize the data
  48. sumSquares += normalizedValue * normalizedValue;
  49. }
  50. return Math.sqrt(sumSquares / data.length);
  51. };
  52. const normalizeRMS = (rms) => {
  53. rms = rms * 10;
  54. const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
  55. const scaledRMS = Math.pow(rms, exp);
  56. // Scale between 0.01 (1%) and 1.0 (100%)
  57. return Math.min(1.0, Math.max(0.01, scaledRMS));
  58. };
  59. const analyseAudio = (stream) => {
  60. const audioContext = new AudioContext();
  61. const audioStreamSource = audioContext.createMediaStreamSource(stream);
  62. const analyser = audioContext.createAnalyser();
  63. analyser.minDecibels = MIN_DECIBELS;
  64. audioStreamSource.connect(analyser);
  65. const bufferLength = analyser.frequencyBinCount;
  66. const domainData = new Uint8Array(bufferLength);
  67. const timeDomainData = new Uint8Array(analyser.fftSize);
  68. let lastSoundTime = Date.now();
  69. const detectSound = () => {
  70. const processFrame = () => {
  71. if (!recording || loading) return;
  72. if (recording && !loading) {
  73. analyser.getByteTimeDomainData(timeDomainData);
  74. analyser.getByteFrequencyData(domainData);
  75. // Calculate RMS level from time domain data
  76. const rmsLevel = calculateRMS(timeDomainData);
  77. // Push the calculated decibel level to visualizerData
  78. visualizerData.push(normalizeRMS(rmsLevel));
  79. // Ensure visualizerData array stays within the buffer length
  80. if (visualizerData.length >= VISUALIZER_BUFFER_LENGTH) {
  81. visualizerData.shift();
  82. }
  83. visualizerData = visualizerData;
  84. // if (domainData.some((value) => value > 0)) {
  85. // lastSoundTime = Date.now();
  86. // }
  87. // if (recording && Date.now() - lastSoundTime > 3000) {
  88. // if ($settings?.speechAutoSend ?? false) {
  89. // confirmRecording();
  90. // }
  91. // }
  92. }
  93. window.requestAnimationFrame(processFrame);
  94. };
  95. window.requestAnimationFrame(processFrame);
  96. };
  97. detectSound();
  98. };
  99. const transcribeHandler = async (audioBlob) => {
  100. // Create a blob from the audio chunks
  101. await tick();
  102. const file = blobToFile(audioBlob, 'recording.wav');
  103. const res = await transcribeAudio(localStorage.token, file).catch((error) => {
  104. toast.error(error);
  105. return null;
  106. });
  107. if (res) {
  108. console.log(res);
  109. dispatch('confirm', res);
  110. }
  111. };
  112. const saveRecording = (blob) => {
  113. const url = URL.createObjectURL(blob);
  114. const a = document.createElement('a');
  115. document.body.appendChild(a);
  116. a.style = 'display: none';
  117. a.href = url;
  118. a.download = 'recording.wav';
  119. a.click();
  120. window.URL.revokeObjectURL(url);
  121. };
  122. const startRecording = async () => {
  123. startDurationCounter();
  124. stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  125. mediaRecorder = new MediaRecorder(stream);
  126. mediaRecorder.onstart = () => {
  127. console.log('Recording started');
  128. audioChunks = [];
  129. analyseAudio(stream);
  130. };
  131. mediaRecorder.ondataavailable = (event) => audioChunks.push(event.data);
  132. mediaRecorder.onstop = async () => {
  133. console.log('Recording stopped');
  134. if ($config.audio.stt.engine === 'web' || ($settings?.audio?.stt?.engine ?? '') === 'web') {
  135. audioChunks = [];
  136. } else {
  137. if (confirmed) {
  138. const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
  139. await transcribeHandler(audioBlob);
  140. confirmed = false;
  141. loading = false;
  142. }
  143. audioChunks = [];
  144. recording = false;
  145. }
  146. };
  147. mediaRecorder.start();
  148. if ($config.audio.stt.engine === 'web' || ($settings?.audio?.stt?.engine ?? '') === 'web') {
  149. if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
  150. // Create a SpeechRecognition object
  151. speechRecognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
  152. // Set continuous to true for continuous recognition
  153. speechRecognition.continuous = true;
  154. // Set the timeout for turning off the recognition after inactivity (in milliseconds)
  155. const inactivityTimeout = 2000; // 3 seconds
  156. let timeoutId;
  157. // Start recognition
  158. speechRecognition.start();
  159. // Event triggered when speech is recognized
  160. speechRecognition.onresult = async (event) => {
  161. // Clear the inactivity timeout
  162. clearTimeout(timeoutId);
  163. // Handle recognized speech
  164. console.log(event);
  165. const transcript = event.results[Object.keys(event.results).length - 1][0].transcript;
  166. transcription = `${transcription}${transcript}`;
  167. await tick();
  168. document.getElementById('chat-input')?.focus();
  169. // Restart the inactivity timeout
  170. timeoutId = setTimeout(() => {
  171. console.log('Speech recognition turned off due to inactivity.');
  172. speechRecognition.stop();
  173. }, inactivityTimeout);
  174. };
  175. // Event triggered when recognition is ended
  176. speechRecognition.onend = function () {
  177. // Restart recognition after it ends
  178. console.log('recognition ended');
  179. confirmRecording();
  180. dispatch('confirm', { text: transcription });
  181. confirmed = false;
  182. loading = false;
  183. };
  184. // Event triggered when an error occurs
  185. speechRecognition.onerror = function (event) {
  186. console.log(event);
  187. toast.error($i18n.t(`Speech recognition error: {{error}}`, { error: event.error }));
  188. dispatch('cancel');
  189. stopRecording();
  190. };
  191. }
  192. }
  193. };
  194. const stopRecording = async () => {
  195. if (recording && mediaRecorder) {
  196. await mediaRecorder.stop();
  197. }
  198. if (speechRecognition) {
  199. speechRecognition.stop();
  200. }
  201. stopDurationCounter();
  202. audioChunks = [];
  203. if (stream) {
  204. const tracks = stream.getTracks();
  205. tracks.forEach((track) => track.stop());
  206. }
  207. stream = null;
  208. };
  209. const confirmRecording = async () => {
  210. loading = true;
  211. confirmed = true;
  212. if (recording && mediaRecorder) {
  213. await mediaRecorder.stop();
  214. }
  215. clearInterval(durationCounter);
  216. if (stream) {
  217. const tracks = stream.getTracks();
  218. tracks.forEach((track) => track.stop());
  219. }
  220. stream = null;
  221. };
  222. let resizeObserver;
  223. let containerWidth;
  224. let maxVisibleItems = 300;
  225. $: maxVisibleItems = Math.floor(containerWidth / 5); // 2px width + 0.5px gap
  226. onMount(() => {
  227. // listen to width changes
  228. resizeObserver = new ResizeObserver(() => {
  229. VISUALIZER_BUFFER_LENGTH = Math.floor(window.innerWidth / 4);
  230. if (visualizerData.length > VISUALIZER_BUFFER_LENGTH) {
  231. visualizerData = visualizerData.slice(visualizerData.length - VISUALIZER_BUFFER_LENGTH);
  232. } else {
  233. visualizerData = Array(VISUALIZER_BUFFER_LENGTH - visualizerData.length)
  234. .fill(0)
  235. .concat(visualizerData);
  236. }
  237. });
  238. resizeObserver.observe(document.body);
  239. });
  240. onDestroy(() => {
  241. // remove resize observer
  242. resizeObserver.disconnect();
  243. });
  244. </script>
  245. <div
  246. bind:clientWidth={containerWidth}
  247. class="{loading
  248. ? ' bg-gray-100/50 dark:bg-gray-850/50'
  249. : 'bg-indigo-300/10 dark:bg-indigo-500/10 '} rounded-full flex justify-between {className}"
  250. >
  251. <div class="flex items-center mr-1">
  252. <button
  253. type="button"
  254. class="p-1.5
  255. {loading
  256. ? ' bg-gray-200 dark:bg-gray-700/50'
  257. : 'bg-indigo-400/20 text-indigo-600 dark:text-indigo-300 '}
  258. rounded-full"
  259. on:click={async () => {
  260. stopRecording();
  261. dispatch('cancel');
  262. }}
  263. >
  264. <svg
  265. xmlns="http://www.w3.org/2000/svg"
  266. fill="none"
  267. viewBox="0 0 24 24"
  268. stroke-width="3"
  269. stroke="currentColor"
  270. class="size-4"
  271. >
  272. <path stroke-linecap="round" stroke-linejoin="round" d="M6 18 18 6M6 6l12 12" />
  273. </svg>
  274. </button>
  275. </div>
  276. <div
  277. class="flex flex-1 self-center items-center justify-between ml-2 mx-1 overflow-hidden h-6"
  278. dir="rtl"
  279. >
  280. <div
  281. class="flex items-center gap-0.5 h-6 w-full max-w-full overflow-hidden overflow-x-hidden flex-wrap"
  282. >
  283. {#each visualizerData.slice().reverse() as rms}
  284. <div class="flex items-center h-full">
  285. <div
  286. class="w-[2px] flex-shrink-0
  287. {loading
  288. ? ' bg-gray-500 dark:bg-gray-400 '
  289. : 'bg-indigo-500 dark:bg-indigo-400 '}
  290. inline-block h-full"
  291. style="height: {Math.min(100, Math.max(14, rms * 100))}%;"
  292. />
  293. </div>
  294. {/each}
  295. </div>
  296. </div>
  297. <div class="flex">
  298. <div class=" mx-1.5 pr-1 flex justify-center items-center">
  299. <div
  300. class="text-sm
  301. {loading ? ' text-gray-500 dark:text-gray-400 ' : ' text-indigo-400 '}
  302. font-medium flex-1 mx-auto text-center"
  303. >
  304. {formatSeconds(durationSeconds)}
  305. </div>
  306. </div>
  307. <div class="flex items-center">
  308. {#if loading}
  309. <div class=" text-gray-500 rounded-full cursor-not-allowed">
  310. <svg
  311. width="24"
  312. height="24"
  313. viewBox="0 0 24 24"
  314. xmlns="http://www.w3.org/2000/svg"
  315. fill="currentColor"
  316. ><style>
  317. .spinner_OSmW {
  318. transform-origin: center;
  319. animation: spinner_T6mA 0.75s step-end infinite;
  320. }
  321. @keyframes spinner_T6mA {
  322. 8.3% {
  323. transform: rotate(30deg);
  324. }
  325. 16.6% {
  326. transform: rotate(60deg);
  327. }
  328. 25% {
  329. transform: rotate(90deg);
  330. }
  331. 33.3% {
  332. transform: rotate(120deg);
  333. }
  334. 41.6% {
  335. transform: rotate(150deg);
  336. }
  337. 50% {
  338. transform: rotate(180deg);
  339. }
  340. 58.3% {
  341. transform: rotate(210deg);
  342. }
  343. 66.6% {
  344. transform: rotate(240deg);
  345. }
  346. 75% {
  347. transform: rotate(270deg);
  348. }
  349. 83.3% {
  350. transform: rotate(300deg);
  351. }
  352. 91.6% {
  353. transform: rotate(330deg);
  354. }
  355. 100% {
  356. transform: rotate(360deg);
  357. }
  358. }
  359. </style><g class="spinner_OSmW"
  360. ><rect x="11" y="1" width="2" height="5" opacity=".14" /><rect
  361. x="11"
  362. y="1"
  363. width="2"
  364. height="5"
  365. transform="rotate(30 12 12)"
  366. opacity=".29"
  367. /><rect
  368. x="11"
  369. y="1"
  370. width="2"
  371. height="5"
  372. transform="rotate(60 12 12)"
  373. opacity=".43"
  374. /><rect
  375. x="11"
  376. y="1"
  377. width="2"
  378. height="5"
  379. transform="rotate(90 12 12)"
  380. opacity=".57"
  381. /><rect
  382. x="11"
  383. y="1"
  384. width="2"
  385. height="5"
  386. transform="rotate(120 12 12)"
  387. opacity=".71"
  388. /><rect
  389. x="11"
  390. y="1"
  391. width="2"
  392. height="5"
  393. transform="rotate(150 12 12)"
  394. opacity=".86"
  395. /><rect x="11" y="1" width="2" height="5" transform="rotate(180 12 12)" /></g
  396. ></svg
  397. >
  398. </div>
  399. {:else}
  400. <button
  401. type="button"
  402. class="p-1.5 bg-indigo-500 text-white dark:bg-indigo-500 dark:text-blue-950 rounded-full"
  403. on:click={async () => {
  404. await confirmRecording();
  405. }}
  406. >
  407. <svg
  408. xmlns="http://www.w3.org/2000/svg"
  409. fill="none"
  410. viewBox="0 0 24 24"
  411. stroke-width="2.5"
  412. stroke="currentColor"
  413. class="size-4"
  414. >
  415. <path stroke-linecap="round" stroke-linejoin="round" d="m4.5 12.75 6 6 9-13.5" />
  416. </svg>
  417. </button>
  418. {/if}
  419. </div>
  420. </div>
  421. </div>
  422. <style>
  423. .visualizer {
  424. display: flex;
  425. height: 100%;
  426. }
  427. .visualizer-bar {
  428. width: 2px;
  429. background-color: #4a5aba; /* or whatever color you need */
  430. }
  431. </style>