CallOverlay.svelte 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. <script lang="ts">
  2. import { settings, showCallOverlay } from '$lib/stores';
  3. import { onMount, tick, getContext } from 'svelte';
  4. import { blobToFile, calculateSHA256, findWordIndices } from '$lib/utils';
  5. import { transcribeAudio } from '$lib/apis/audio';
  6. import { toast } from 'svelte-sonner';
  7. const i18n = getContext('i18n');
  8. export let submitPrompt: Function;
  9. let loading = false;
  10. let confirmed = false;
  11. let assistantSpeaking = false;
  12. let assistantAudio = null;
  13. let rmsLevel = 0;
  14. let hasStartedSpeaking = false;
  15. let audioContext;
  16. let analyser;
  17. let dataArray;
  18. let audioElement;
  19. let animationFrameId;
  20. let speechRecognition;
  21. let mediaRecorder;
  22. let audioChunks = [];
  23. const MIN_DECIBELS = -45;
  24. const VISUALIZER_BUFFER_LENGTH = 300;
  25. let visualizerData = Array(VISUALIZER_BUFFER_LENGTH).fill(0);
  26. const startAudio = () => {
  27. audioContext = new (window.AudioContext || window.webkitAudioContext)();
  28. analyser = audioContext.createAnalyser();
  29. const source = audioContext.createMediaElementSource(audioElement);
  30. source.connect(analyser);
  31. analyser.connect(audioContext.destination);
  32. analyser.fftSize = 32; // Adjust the fftSize
  33. dataArray = new Uint8Array(analyser.frequencyBinCount);
  34. visualize();
  35. };
  36. const visualize = () => {
  37. analyser.getByteFrequencyData(dataArray);
  38. div1Height = dataArray[1] / 2;
  39. div2Height = dataArray[3] / 2;
  40. div3Height = dataArray[5] / 2;
  41. div4Height = dataArray[7] / 2;
  42. animationFrameId = requestAnimationFrame(visualize);
  43. };
  44. // Function to calculate the RMS level from time domain data
  45. const calculateRMS = (data: Uint8Array) => {
  46. let sumSquares = 0;
  47. for (let i = 0; i < data.length; i++) {
  48. const normalizedValue = (data[i] - 128) / 128; // Normalize the data
  49. sumSquares += normalizedValue * normalizedValue;
  50. }
  51. return Math.sqrt(sumSquares / data.length);
  52. };
  53. const normalizeRMS = (rms) => {
  54. rms = rms * 10;
  55. const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
  56. const scaledRMS = Math.pow(rms, exp);
  57. // Scale between 0.01 (1%) and 1.0 (100%)
  58. return Math.min(1.0, Math.max(0.01, scaledRMS));
  59. };
  60. const analyseAudio = (stream) => {
  61. const audioContext = new AudioContext();
  62. const audioStreamSource = audioContext.createMediaStreamSource(stream);
  63. const analyser = audioContext.createAnalyser();
  64. analyser.minDecibels = MIN_DECIBELS;
  65. audioStreamSource.connect(analyser);
  66. const bufferLength = analyser.frequencyBinCount;
  67. const domainData = new Uint8Array(bufferLength);
  68. const timeDomainData = new Uint8Array(analyser.fftSize);
  69. let lastSoundTime = Date.now();
  70. hasStartedSpeaking = false;
  71. const detectSound = () => {
  72. const processFrame = () => {
  73. if (!mediaRecorder || !$showCallOverlay) {
  74. if (mediaRecorder) {
  75. mediaRecorder.stop();
  76. }
  77. return;
  78. }
  79. analyser.getByteTimeDomainData(timeDomainData);
  80. analyser.getByteFrequencyData(domainData);
  81. // Calculate RMS level from time domain data
  82. rmsLevel = calculateRMS(timeDomainData);
  83. // Check if initial speech/noise has started
  84. const hasSound = domainData.some((value) => value > 0);
  85. if (hasSound) {
  86. if (assistantSpeaking) {
  87. speechSynthesis.cancel();
  88. if (assistantAudio) {
  89. assistantAudio.pause();
  90. assistantAudio.currentTime = 0;
  91. }
  92. }
  93. hasStartedSpeaking = true;
  94. lastSoundTime = Date.now();
  95. }
  96. // Start silence detection only after initial speech/noise has been detected
  97. if (hasStartedSpeaking) {
  98. if (Date.now() - lastSoundTime > 2000) {
  99. confirmed = true;
  100. if (mediaRecorder) {
  101. mediaRecorder.stop();
  102. }
  103. }
  104. }
  105. window.requestAnimationFrame(processFrame);
  106. };
  107. window.requestAnimationFrame(processFrame);
  108. };
  109. detectSound();
  110. };
  111. const transcribeHandler = async (audioBlob) => {
  112. // Create a blob from the audio chunks
  113. await tick();
  114. const file = blobToFile(audioBlob, 'recording.wav');
  115. const res = await transcribeAudio(localStorage.token, file).catch((error) => {
  116. toast.error(error);
  117. return null;
  118. });
  119. if (res) {
  120. toast.success(res.text);
  121. const _responses = await submitPrompt(res.text);
  122. console.log(_responses);
  123. if (_responses.at(0)) {
  124. const response = _responses[0];
  125. if (response) {
  126. assistantSpeaking = true;
  127. if ($settings?.audio?.TTSEngine ?? '') {
  128. speechSynthesis.speak(new SpeechSynthesisUtterance(response));
  129. } else {
  130. console.log('openai');
  131. }
  132. }
  133. }
  134. }
  135. };
  136. const stopRecordingCallback = async () => {
  137. if ($showCallOverlay) {
  138. if (confirmed) {
  139. loading = true;
  140. const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
  141. await transcribeHandler(audioBlob);
  142. confirmed = false;
  143. loading = false;
  144. }
  145. audioChunks = [];
  146. mediaRecorder = false;
  147. startRecording();
  148. } else {
  149. audioChunks = [];
  150. mediaRecorder = false;
  151. }
  152. };
  153. const startRecording = async () => {
  154. const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  155. mediaRecorder = new MediaRecorder(stream);
  156. mediaRecorder.onstart = () => {
  157. console.log('Recording started');
  158. audioChunks = [];
  159. analyseAudio(stream);
  160. };
  161. mediaRecorder.ondataavailable = (event) => {
  162. if (hasStartedSpeaking) {
  163. audioChunks.push(event.data);
  164. }
  165. };
  166. mediaRecorder.onstop = async () => {
  167. console.log('Recording stopped');
  168. await stopRecordingCallback();
  169. };
  170. mediaRecorder.start();
  171. };
  172. $: if ($showCallOverlay) {
  173. startRecording();
  174. }
  175. </script>
  176. {#if $showCallOverlay}
  177. <div class=" absolute w-full h-full flex z-[999]">
  178. <div
  179. class="absolute w-full h-full bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
  180. >
  181. <div class="max-w-lg w-full h-screen flex flex-col justify-between p-6">
  182. <div>
  183. <!-- navbar -->
  184. </div>
  185. <div class="flex justify-center items-center w-ull">
  186. {#if loading}
  187. <svg
  188. class="size-44 text-gray-900 dark:text-gray-400"
  189. viewBox="0 0 24 24"
  190. fill="currentColor"
  191. xmlns="http://www.w3.org/2000/svg"
  192. ><style>
  193. .spinner_qM83 {
  194. animation: spinner_8HQG 1.05s infinite;
  195. }
  196. .spinner_oXPr {
  197. animation-delay: 0.1s;
  198. }
  199. .spinner_ZTLf {
  200. animation-delay: 0.2s;
  201. }
  202. @keyframes spinner_8HQG {
  203. 0%,
  204. 57.14% {
  205. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  206. transform: translate(0);
  207. }
  208. 28.57% {
  209. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  210. transform: translateY(-6px);
  211. }
  212. 100% {
  213. transform: translate(0);
  214. }
  215. }
  216. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  217. class="spinner_qM83 spinner_oXPr"
  218. cx="12"
  219. cy="12"
  220. r="3"
  221. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  222. >
  223. {:else}
  224. <div
  225. class=" {rmsLevel * 100 > 4
  226. ? ' size-52'
  227. : rmsLevel * 100 > 2
  228. ? 'size-48'
  229. : rmsLevel * 100 > 1
  230. ? 'size-[11.5rem]'
  231. : 'size-44'} transition-all bg-black dark:bg-white rounded-full"
  232. />
  233. {/if}
  234. </div>
  235. <div class="flex justify-between items-center pb-2 w-full">
  236. <div>
  237. <button class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900">
  238. <svg
  239. xmlns="http://www.w3.org/2000/svg"
  240. fill="none"
  241. viewBox="0 0 24 24"
  242. stroke-width="1.5"
  243. stroke="currentColor"
  244. class="size-5"
  245. >
  246. <path
  247. stroke-linecap="round"
  248. stroke-linejoin="round"
  249. d="M6.827 6.175A2.31 2.31 0 0 1 5.186 7.23c-.38.054-.757.112-1.134.175C2.999 7.58 2.25 8.507 2.25 9.574V18a2.25 2.25 0 0 0 2.25 2.25h15A2.25 2.25 0 0 0 21.75 18V9.574c0-1.067-.75-1.994-1.802-2.169a47.865 47.865 0 0 0-1.134-.175 2.31 2.31 0 0 1-1.64-1.055l-.822-1.316a2.192 2.192 0 0 0-1.736-1.039 48.774 48.774 0 0 0-5.232 0 2.192 2.192 0 0 0-1.736 1.039l-.821 1.316Z"
  250. />
  251. <path
  252. stroke-linecap="round"
  253. stroke-linejoin="round"
  254. d="M16.5 12.75a4.5 4.5 0 1 1-9 0 4.5 4.5 0 0 1 9 0ZM18.75 10.5h.008v.008h-.008V10.5Z"
  255. />
  256. </svg>
  257. </button>
  258. </div>
  259. <div>
  260. <button type="button">
  261. <div class=" line-clamp-1 text-sm font-medium">
  262. {#if loading}
  263. Thinking...
  264. {:else}
  265. Listening... {Math.round(rmsLevel * 100)}
  266. {/if}
  267. </div>
  268. </button>
  269. </div>
  270. <div>
  271. <button
  272. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  273. on:click={async () => {
  274. showCallOverlay.set(false);
  275. }}
  276. type="button"
  277. >
  278. <svg
  279. xmlns="http://www.w3.org/2000/svg"
  280. viewBox="0 0 20 20"
  281. fill="currentColor"
  282. class="size-5"
  283. >
  284. <path
  285. d="M6.28 5.22a.75.75 0 0 0-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 1 0 1.06 1.06L10 11.06l3.72 3.72a.75.75 0 1 0 1.06-1.06L11.06 10l3.72-3.72a.75.75 0 0 0-1.06-1.06L10 8.94 6.28 5.22Z"
  286. />
  287. </svg>
  288. </button>
  289. </div>
  290. </div>
  291. </div>
  292. </div>
  293. </div>
  294. {/if}