CallOverlay.svelte 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946
  1. <script lang="ts">
  2. import { config, models, settings, showCallOverlay } from '$lib/stores';
  3. import { onMount, tick, getContext, onDestroy, createEventDispatcher } from 'svelte';
  4. import { DropdownMenu } from 'bits-ui';
  5. import Dropdown from '$lib/components/common/Dropdown.svelte';
  6. import { flyAndScale } from '$lib/utils/transitions';
  7. const dispatch = createEventDispatcher();
  8. import { blobToFile } from '$lib/utils';
  9. import { generateEmoji } from '$lib/apis';
  10. import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
  11. import { toast } from 'svelte-sonner';
  12. import Tooltip from '$lib/components/common/Tooltip.svelte';
  13. import VideoInputMenu from './CallOverlay/VideoInputMenu.svelte';
  14. const i18n = getContext('i18n');
  15. export let eventTarget: EventTarget;
  16. export let submitPrompt: Function;
  17. export let stopResponse: Function;
  18. export let files;
  19. export let chatId;
  20. export let modelId;
  21. let wakeLock = null;
  22. let model = null;
  23. let loading = false;
  24. let confirmed = false;
  25. let interrupted = false;
  26. let assistantSpeaking = false;
  27. let emoji = null;
  28. let camera = false;
  29. let cameraStream = null;
  30. let chatStreaming = false;
  31. let rmsLevel = 0;
  32. let hasStartedSpeaking = false;
  33. let mediaRecorder;
  34. let audioStream = null;
  35. let audioChunks = [];
  36. let videoInputDevices = [];
  37. let selectedVideoInputDeviceId = null;
  38. const getVideoInputDevices = async () => {
  39. const devices = await navigator.mediaDevices.enumerateDevices();
  40. videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
  41. if (!!navigator.mediaDevices.getDisplayMedia) {
  42. videoInputDevices = [
  43. ...videoInputDevices,
  44. {
  45. deviceId: 'screen',
  46. label: 'Screen Share'
  47. }
  48. ];
  49. }
  50. console.log(videoInputDevices);
  51. if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
  52. selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
  53. }
  54. };
  55. const startCamera = async () => {
  56. await getVideoInputDevices();
  57. if (cameraStream === null) {
  58. camera = true;
  59. await tick();
  60. try {
  61. await startVideoStream();
  62. } catch (err) {
  63. console.error('Error accessing webcam: ', err);
  64. }
  65. }
  66. };
  67. const startVideoStream = async () => {
  68. const video = document.getElementById('camera-feed');
  69. if (video) {
  70. if (selectedVideoInputDeviceId === 'screen') {
  71. cameraStream = await navigator.mediaDevices.getDisplayMedia({
  72. video: {
  73. cursor: 'always'
  74. },
  75. audio: false
  76. });
  77. } else {
  78. cameraStream = await navigator.mediaDevices.getUserMedia({
  79. video: {
  80. deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
  81. }
  82. });
  83. }
  84. if (cameraStream) {
  85. await getVideoInputDevices();
  86. video.srcObject = cameraStream;
  87. await video.play();
  88. }
  89. }
  90. };
  91. const stopVideoStream = async () => {
  92. if (cameraStream) {
  93. const tracks = cameraStream.getTracks();
  94. tracks.forEach((track) => track.stop());
  95. }
  96. cameraStream = null;
  97. };
  98. const takeScreenshot = () => {
  99. const video = document.getElementById('camera-feed');
  100. const canvas = document.getElementById('camera-canvas');
  101. if (!canvas) {
  102. return;
  103. }
  104. const context = canvas.getContext('2d');
  105. // Make the canvas match the video dimensions
  106. canvas.width = video.videoWidth;
  107. canvas.height = video.videoHeight;
  108. // Draw the image from the video onto the canvas
  109. context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
  110. // Convert the canvas to a data base64 URL and console log it
  111. const dataURL = canvas.toDataURL('image/png');
  112. console.log(dataURL);
  113. return dataURL;
  114. };
  115. const stopCamera = async () => {
  116. await stopVideoStream();
  117. camera = false;
  118. };
  119. const MIN_DECIBELS = -55;
  120. const VISUALIZER_BUFFER_LENGTH = 300;
  121. const transcribeHandler = async (audioBlob) => {
  122. // Create a blob from the audio chunks
  123. await tick();
  124. const file = blobToFile(audioBlob, 'recording.wav');
  125. const res = await transcribeAudio(localStorage.token, file).catch((error) => {
  126. toast.error(error);
  127. return null;
  128. });
  129. if (res) {
  130. console.log(res.text);
  131. if (res.text !== '') {
  132. const _responses = await submitPrompt(res.text, { _raw: true });
  133. console.log(_responses);
  134. }
  135. }
  136. };
  137. const stopRecordingCallback = async (_continue = true) => {
  138. if ($showCallOverlay) {
  139. console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
  140. // deep copy the audioChunks array
  141. const _audioChunks = audioChunks.slice(0);
  142. audioChunks = [];
  143. mediaRecorder = false;
  144. if (_continue) {
  145. startRecording();
  146. }
  147. if (confirmed) {
  148. loading = true;
  149. emoji = null;
  150. if (cameraStream) {
  151. const imageUrl = takeScreenshot();
  152. files = [
  153. {
  154. type: 'image',
  155. url: imageUrl
  156. }
  157. ];
  158. }
  159. const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
  160. await transcribeHandler(audioBlob);
  161. confirmed = false;
  162. loading = false;
  163. }
  164. } else {
  165. audioChunks = [];
  166. mediaRecorder = false;
  167. if (audioStream) {
  168. const tracks = audioStream.getTracks();
  169. tracks.forEach((track) => track.stop());
  170. }
  171. audioStream = null;
  172. }
  173. };
  174. const startRecording = async () => {
  175. audioStream = await navigator.mediaDevices.getUserMedia({ audio: true });
  176. mediaRecorder = new MediaRecorder(audioStream);
  177. mediaRecorder.onstart = () => {
  178. console.log('Recording started');
  179. audioChunks = [];
  180. analyseAudio(audioStream);
  181. };
  182. mediaRecorder.ondataavailable = (event) => {
  183. if (hasStartedSpeaking) {
  184. audioChunks.push(event.data);
  185. }
  186. };
  187. mediaRecorder.onstop = (e) => {
  188. console.log('Recording stopped', e);
  189. stopRecordingCallback();
  190. };
  191. mediaRecorder.start();
  192. };
  193. const stopAudioStream = async () => {
  194. if (audioStream) {
  195. const tracks = audioStream.getTracks();
  196. tracks.forEach((track) => track.stop());
  197. }
  198. audioStream = null;
  199. };
  200. // Function to calculate the RMS level from time domain data
  201. const calculateRMS = (data: Uint8Array) => {
  202. let sumSquares = 0;
  203. for (let i = 0; i < data.length; i++) {
  204. const normalizedValue = (data[i] - 128) / 128; // Normalize the data
  205. sumSquares += normalizedValue * normalizedValue;
  206. }
  207. return Math.sqrt(sumSquares / data.length);
  208. };
  209. const analyseAudio = (stream) => {
  210. const audioContext = new AudioContext();
  211. const audioStreamSource = audioContext.createMediaStreamSource(stream);
  212. const analyser = audioContext.createAnalyser();
  213. analyser.minDecibels = MIN_DECIBELS;
  214. audioStreamSource.connect(analyser);
  215. const bufferLength = analyser.frequencyBinCount;
  216. const domainData = new Uint8Array(bufferLength);
  217. const timeDomainData = new Uint8Array(analyser.fftSize);
  218. let lastSoundTime = Date.now();
  219. hasStartedSpeaking = false;
  220. console.log('🔊 Sound detection started', lastSoundTime, hasStartedSpeaking);
  221. const detectSound = () => {
  222. const processFrame = () => {
  223. if (!mediaRecorder || !$showCallOverlay) {
  224. return;
  225. }
  226. if (assistantSpeaking && !($settings?.voiceInterruption ?? false)) {
  227. // Mute the audio if the assistant is speaking
  228. analyser.maxDecibels = 0;
  229. analyser.minDecibels = -1;
  230. } else {
  231. analyser.minDecibels = MIN_DECIBELS;
  232. analyser.maxDecibels = -30;
  233. }
  234. analyser.getByteTimeDomainData(timeDomainData);
  235. analyser.getByteFrequencyData(domainData);
  236. // Calculate RMS level from time domain data
  237. rmsLevel = calculateRMS(timeDomainData);
  238. // Check if initial speech/noise has started
  239. const hasSound = domainData.some((value) => value > 0);
  240. if (hasSound) {
  241. // BIG RED TEXT
  242. console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
  243. if (!hasStartedSpeaking) {
  244. hasStartedSpeaking = true;
  245. stopAllAudio();
  246. }
  247. lastSoundTime = Date.now();
  248. }
  249. // Start silence detection only after initial speech/noise has been detected
  250. if (hasStartedSpeaking) {
  251. if (Date.now() - lastSoundTime > 2000) {
  252. confirmed = true;
  253. if (mediaRecorder) {
  254. console.log('%c%s', 'color: red; font-size: 20px;', '🔇 Silence detected');
  255. mediaRecorder.stop();
  256. return;
  257. }
  258. }
  259. }
  260. window.requestAnimationFrame(processFrame);
  261. };
  262. window.requestAnimationFrame(processFrame);
  263. };
  264. detectSound();
  265. };
  266. let finishedMessages = {};
  267. let currentMessageId = null;
  268. let currentUtterance = null;
  269. const speakSpeechSynthesisHandler = (content) => {
  270. if ($showCallOverlay) {
  271. return new Promise((resolve) => {
  272. let voices = [];
  273. const getVoicesLoop = setInterval(async () => {
  274. voices = await speechSynthesis.getVoices();
  275. if (voices.length > 0) {
  276. clearInterval(getVoicesLoop);
  277. const voice =
  278. voices
  279. ?.filter(
  280. (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
  281. )
  282. ?.at(0) ?? undefined;
  283. currentUtterance = new SpeechSynthesisUtterance(content);
  284. currentUtterance.rate = $settings.audio?.tts?.speedRate ?? 1;
  285. if (voice) {
  286. currentUtterance.voice = voice;
  287. }
  288. speechSynthesis.speak(currentUtterance);
  289. currentUtterance.onend = async (e) => {
  290. await new Promise((r) => setTimeout(r, 200));
  291. resolve(e);
  292. };
  293. }
  294. }, 100);
  295. });
  296. } else {
  297. return Promise.resolve();
  298. }
  299. };
  300. const playAudio = (audio) => {
  301. if ($showCallOverlay) {
  302. return new Promise((resolve) => {
  303. const audioElement = document.getElementById('audioElement') as HTMLAudioElement;
  304. if (audioElement) {
  305. audioElement.src = audio.src;
  306. audioElement.muted = true;
  307. audioElement.playbackRate = $settings.audio?.tts?.speedRate ?? 1;
  308. audioElement
  309. .play()
  310. .then(() => {
  311. audioElement.muted = false;
  312. })
  313. .catch((error) => {
  314. console.error(error);
  315. });
  316. audioElement.onended = async (e) => {
  317. await new Promise((r) => setTimeout(r, 100));
  318. resolve(e);
  319. };
  320. }
  321. });
  322. } else {
  323. return Promise.resolve();
  324. }
  325. };
  326. const stopAllAudio = async () => {
  327. assistantSpeaking = false;
  328. interrupted = true;
  329. if (chatStreaming) {
  330. stopResponse();
  331. }
  332. if (currentUtterance) {
  333. speechSynthesis.cancel();
  334. currentUtterance = null;
  335. }
  336. const audioElement = document.getElementById('audioElement');
  337. if (audioElement) {
  338. audioElement.muted = true;
  339. audioElement.pause();
  340. audioElement.currentTime = 0;
  341. }
  342. };
  343. let audioAbortController = new AbortController();
  344. // Audio cache map where key is the content and value is the Audio object.
  345. const audioCache = new Map();
  346. const emojiCache = new Map();
  347. const fetchAudio = async (content) => {
  348. if (!audioCache.has(content)) {
  349. try {
  350. // Set the emoji for the content if needed
  351. if ($settings?.showEmojiInCall ?? false) {
  352. const emoji = await generateEmoji(localStorage.token, modelId, content, chatId);
  353. if (emoji) {
  354. emojiCache.set(content, emoji);
  355. }
  356. }
  357. if ($config.audio.tts.engine !== '') {
  358. const res = await synthesizeOpenAISpeech(
  359. localStorage.token,
  360. $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
  361. content
  362. ).catch((error) => {
  363. console.error(error);
  364. return null;
  365. });
  366. if (res) {
  367. const blob = await res.blob();
  368. const blobUrl = URL.createObjectURL(blob);
  369. audioCache.set(content, new Audio(blobUrl));
  370. }
  371. } else {
  372. audioCache.set(content, true);
  373. }
  374. } catch (error) {
  375. console.error('Error synthesizing speech:', error);
  376. }
  377. }
  378. return audioCache.get(content);
  379. };
  380. let messages = {};
  381. const monitorAndPlayAudio = async (id, signal) => {
  382. while (!signal.aborted) {
  383. if (messages[id] && messages[id].length > 0) {
  384. // Retrieve the next content string from the queue
  385. const content = messages[id].shift(); // Dequeues the content for playing
  386. if (audioCache.has(content)) {
  387. // If content is available in the cache, play it
  388. // Set the emoji for the content if available
  389. if (($settings?.showEmojiInCall ?? false) && emojiCache.has(content)) {
  390. emoji = emojiCache.get(content);
  391. } else {
  392. emoji = null;
  393. }
  394. if ($config.audio.tts.engine !== '') {
  395. try {
  396. console.log(
  397. '%c%s',
  398. 'color: red; font-size: 20px;',
  399. `Playing audio for content: ${content}`
  400. );
  401. const audio = audioCache.get(content);
  402. await playAudio(audio); // Here ensure that playAudio is indeed correct method to execute
  403. console.log(`Played audio for content: ${content}`);
  404. await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
  405. } catch (error) {
  406. console.error('Error playing audio:', error);
  407. }
  408. } else {
  409. await speakSpeechSynthesisHandler(content);
  410. }
  411. } else {
  412. // If not available in the cache, push it back to the queue and delay
  413. messages[id].unshift(content); // Re-queue the content at the start
  414. console.log(`Audio for "${content}" not yet available in the cache, re-queued...`);
  415. await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
  416. }
  417. } else if (finishedMessages[id] && messages[id] && messages[id].length === 0) {
  418. // If the message is finished and there are no more messages to process, break the loop
  419. assistantSpeaking = false;
  420. break;
  421. } else {
  422. // No messages to process, sleep for a bit
  423. await new Promise((resolve) => setTimeout(resolve, 200));
  424. }
  425. }
  426. console.log(`Audio monitoring and playing stopped for message ID ${id}`);
  427. };
  428. onMount(async () => {
  429. const setWakeLock = async () => {
  430. try {
  431. wakeLock = await navigator.wakeLock.request('screen');
  432. } catch (err) {
  433. // The Wake Lock request has failed - usually system related, such as battery.
  434. console.log(err);
  435. }
  436. if (wakeLock) {
  437. // Add a listener to release the wake lock when the page is unloaded
  438. wakeLock.addEventListener('release', () => {
  439. // the wake lock has been released
  440. console.log('Wake Lock released');
  441. });
  442. }
  443. };
  444. if ('wakeLock' in navigator) {
  445. await setWakeLock();
  446. document.addEventListener('visibilitychange', async () => {
  447. // Re-request the wake lock if the document becomes visible
  448. if (wakeLock !== null && document.visibilityState === 'visible') {
  449. await setWakeLock();
  450. }
  451. });
  452. }
  453. model = $models.find((m) => m.id === modelId);
  454. startRecording();
  455. const chatStartHandler = async (e) => {
  456. const { id } = e.detail;
  457. chatStreaming = true;
  458. if (currentMessageId !== id) {
  459. console.log(`Received chat start event for message ID ${id}`);
  460. currentMessageId = id;
  461. if (audioAbortController) {
  462. audioAbortController.abort();
  463. }
  464. audioAbortController = new AbortController();
  465. assistantSpeaking = true;
  466. // Start monitoring and playing audio for the message ID
  467. monitorAndPlayAudio(id, audioAbortController.signal);
  468. }
  469. };
  470. const chatEventHandler = async (e) => {
  471. const { id, content } = e.detail;
  472. // "id" here is message id
  473. // if "id" is not the same as "currentMessageId" then do not process
  474. // "content" here is a sentence from the assistant,
  475. // there will be many sentences for the same "id"
  476. if (currentMessageId === id) {
  477. console.log(`Received chat event for message ID ${id}: ${content}`);
  478. try {
  479. if (messages[id] === undefined) {
  480. messages[id] = [content];
  481. } else {
  482. messages[id].push(content);
  483. }
  484. console.log(content);
  485. fetchAudio(content);
  486. } catch (error) {
  487. console.error('Failed to fetch or play audio:', error);
  488. }
  489. }
  490. };
  491. const chatFinishHandler = async (e) => {
  492. const { id, content } = e.detail;
  493. // "content" here is the entire message from the assistant
  494. finishedMessages[id] = true;
  495. chatStreaming = false;
  496. };
  497. eventTarget.addEventListener('chat:start', chatStartHandler);
  498. eventTarget.addEventListener('chat', chatEventHandler);
  499. eventTarget.addEventListener('chat:finish', chatFinishHandler);
  500. return async () => {
  501. eventTarget.removeEventListener('chat:start', chatStartHandler);
  502. eventTarget.removeEventListener('chat', chatEventHandler);
  503. eventTarget.removeEventListener('chat:finish', chatFinishHandler);
  504. audioAbortController.abort();
  505. await tick();
  506. await stopAllAudio();
  507. await stopRecordingCallback(false);
  508. await stopCamera();
  509. };
  510. });
  511. onDestroy(async () => {
  512. await stopAllAudio();
  513. await stopRecordingCallback(false);
  514. await stopCamera();
  515. });
  516. </script>
  517. {#if $showCallOverlay}
  518. <div class="max-w-lg w-full h-full max-h-[100dvh] flex flex-col justify-between p-3 md:p-6">
  519. {#if camera}
  520. <button
  521. type="button"
  522. class="flex justify-center items-center w-full h-20 min-h-20"
  523. on:click={() => {
  524. if (assistantSpeaking) {
  525. stopAllAudio();
  526. }
  527. }}
  528. >
  529. {#if emoji}
  530. <div
  531. class=" transition-all rounded-full"
  532. style="font-size:{rmsLevel * 100 > 4
  533. ? '4.5'
  534. : rmsLevel * 100 > 2
  535. ? '4.25'
  536. : rmsLevel * 100 > 1
  537. ? '3.75'
  538. : '3.5'}rem;width: 100%; text-align:center;"
  539. >
  540. {emoji}
  541. </div>
  542. {:else if loading || assistantSpeaking}
  543. <svg
  544. class="size-12 text-gray-900 dark:text-gray-400"
  545. viewBox="0 0 24 24"
  546. fill="currentColor"
  547. xmlns="http://www.w3.org/2000/svg"
  548. ><style>
  549. .spinner_qM83 {
  550. animation: spinner_8HQG 1.05s infinite;
  551. }
  552. .spinner_oXPr {
  553. animation-delay: 0.1s;
  554. }
  555. .spinner_ZTLf {
  556. animation-delay: 0.2s;
  557. }
  558. @keyframes spinner_8HQG {
  559. 0%,
  560. 57.14% {
  561. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  562. transform: translate(0);
  563. }
  564. 28.57% {
  565. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  566. transform: translateY(-6px);
  567. }
  568. 100% {
  569. transform: translate(0);
  570. }
  571. }
  572. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  573. class="spinner_qM83 spinner_oXPr"
  574. cx="12"
  575. cy="12"
  576. r="3"
  577. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  578. >
  579. {:else}
  580. <div
  581. class=" {rmsLevel * 100 > 4
  582. ? ' size-[4.5rem]'
  583. : rmsLevel * 100 > 2
  584. ? ' size-16'
  585. : rmsLevel * 100 > 1
  586. ? 'size-14'
  587. : 'size-12'} transition-all rounded-full {(model?.info?.meta
  588. ?.profile_image_url ?? '/static/favicon.png') !== '/static/favicon.png'
  589. ? ' bg-cover bg-center bg-no-repeat'
  590. : 'bg-black dark:bg-white'} bg-black dark:bg-white"
  591. style={(model?.info?.meta?.profile_image_url ?? '/static/favicon.png') !==
  592. '/static/favicon.png'
  593. ? `background-image: url('${model?.info?.meta?.profile_image_url}');`
  594. : ''}
  595. />
  596. {/if}
  597. <!-- navbar -->
  598. </button>
  599. {/if}
  600. <div class="flex justify-center items-center flex-1 h-full w-full max-h-full">
  601. {#if !camera}
  602. <button
  603. type="button"
  604. on:click={() => {
  605. if (assistantSpeaking) {
  606. stopAllAudio();
  607. }
  608. }}
  609. >
  610. {#if emoji}
  611. <div
  612. class=" transition-all rounded-full"
  613. style="font-size:{rmsLevel * 100 > 4
  614. ? '13'
  615. : rmsLevel * 100 > 2
  616. ? '12'
  617. : rmsLevel * 100 > 1
  618. ? '11.5'
  619. : '11'}rem;width:100%;text-align:center;"
  620. >
  621. {emoji}
  622. </div>
  623. {:else if loading || assistantSpeaking}
  624. <svg
  625. class="size-44 text-gray-900 dark:text-gray-400"
  626. viewBox="0 0 24 24"
  627. fill="currentColor"
  628. xmlns="http://www.w3.org/2000/svg"
  629. ><style>
  630. .spinner_qM83 {
  631. animation: spinner_8HQG 1.05s infinite;
  632. }
  633. .spinner_oXPr {
  634. animation-delay: 0.1s;
  635. }
  636. .spinner_ZTLf {
  637. animation-delay: 0.2s;
  638. }
  639. @keyframes spinner_8HQG {
  640. 0%,
  641. 57.14% {
  642. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  643. transform: translate(0);
  644. }
  645. 28.57% {
  646. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  647. transform: translateY(-6px);
  648. }
  649. 100% {
  650. transform: translate(0);
  651. }
  652. }
  653. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  654. class="spinner_qM83 spinner_oXPr"
  655. cx="12"
  656. cy="12"
  657. r="3"
  658. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  659. >
  660. {:else}
  661. <div
  662. class=" {rmsLevel * 100 > 4
  663. ? ' size-52'
  664. : rmsLevel * 100 > 2
  665. ? 'size-48'
  666. : rmsLevel * 100 > 1
  667. ? 'size-44'
  668. : 'size-40'} transition-all rounded-full {(model?.info?.meta
  669. ?.profile_image_url ?? '/static/favicon.png') !== '/static/favicon.png'
  670. ? ' bg-cover bg-center bg-no-repeat'
  671. : 'bg-black dark:bg-white'} "
  672. style={(model?.info?.meta?.profile_image_url ?? '/static/favicon.png') !==
  673. '/static/favicon.png'
  674. ? `background-image: url('${model?.info?.meta?.profile_image_url}');`
  675. : ''}
  676. />
  677. {/if}
  678. </button>
  679. {:else}
  680. <div class="relative flex video-container w-full max-h-full pt-2 pb-4 md:py-6 px-2 h-full">
  681. <video
  682. id="camera-feed"
  683. autoplay
  684. class="rounded-2xl h-full min-w-full object-cover object-center"
  685. playsinline
  686. />
  687. <canvas id="camera-canvas" style="display:none;" />
  688. <div class=" absolute top-4 md:top-8 left-4">
  689. <button
  690. type="button"
  691. class="p-1.5 text-white cursor-pointer backdrop-blur-xl bg-black/10 rounded-full"
  692. on:click={() => {
  693. stopCamera();
  694. }}
  695. >
  696. <svg
  697. xmlns="http://www.w3.org/2000/svg"
  698. viewBox="0 0 16 16"
  699. fill="currentColor"
  700. class="size-6"
  701. >
  702. <path
  703. d="M5.28 4.22a.75.75 0 0 0-1.06 1.06L6.94 8l-2.72 2.72a.75.75 0 1 0 1.06 1.06L8 9.06l2.72 2.72a.75.75 0 1 0 1.06-1.06L9.06 8l2.72-2.72a.75.75 0 0 0-1.06-1.06L8 6.94 5.28 4.22Z"
  704. />
  705. </svg>
  706. </button>
  707. </div>
  708. </div>
  709. {/if}
  710. </div>
  711. <div class="flex justify-between items-center pb-2 w-full">
  712. <div>
  713. {#if camera}
  714. <VideoInputMenu
  715. devices={videoInputDevices}
  716. on:change={async (e) => {
  717. console.log(e.detail);
  718. selectedVideoInputDeviceId = e.detail;
  719. await stopVideoStream();
  720. await startVideoStream();
  721. }}
  722. >
  723. <button class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900" type="button">
  724. <svg
  725. xmlns="http://www.w3.org/2000/svg"
  726. viewBox="0 0 20 20"
  727. fill="currentColor"
  728. class="size-5"
  729. >
  730. <path
  731. fill-rule="evenodd"
  732. d="M15.312 11.424a5.5 5.5 0 0 1-9.201 2.466l-.312-.311h2.433a.75.75 0 0 0 0-1.5H3.989a.75.75 0 0 0-.75.75v4.242a.75.75 0 0 0 1.5 0v-2.43l.31.31a7 7 0 0 0 11.712-3.138.75.75 0 0 0-1.449-.39Zm1.23-3.723a.75.75 0 0 0 .219-.53V2.929a.75.75 0 0 0-1.5 0V5.36l-.31-.31A7 7 0 0 0 3.239 8.188a.75.75 0 1 0 1.448.389A5.5 5.5 0 0 1 13.89 6.11l.311.31h-2.432a.75.75 0 0 0 0 1.5h4.243a.75.75 0 0 0 .53-.219Z"
  733. clip-rule="evenodd"
  734. />
  735. </svg>
  736. </button>
  737. </VideoInputMenu>
  738. {:else}
  739. <Tooltip content={$i18n.t('Camera')}>
  740. <button
  741. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  742. type="button"
  743. on:click={async () => {
  744. await navigator.mediaDevices.getUserMedia({ video: true });
  745. startCamera();
  746. }}
  747. >
  748. <svg
  749. xmlns="http://www.w3.org/2000/svg"
  750. fill="none"
  751. viewBox="0 0 24 24"
  752. stroke-width="1.5"
  753. stroke="currentColor"
  754. class="size-5"
  755. >
  756. <path
  757. stroke-linecap="round"
  758. stroke-linejoin="round"
  759. d="M6.827 6.175A2.31 2.31 0 0 1 5.186 7.23c-.38.054-.757.112-1.134.175C2.999 7.58 2.25 8.507 2.25 9.574V18a2.25 2.25 0 0 0 2.25 2.25h15A2.25 2.25 0 0 0 21.75 18V9.574c0-1.067-.75-1.994-1.802-2.169a47.865 47.865 0 0 0-1.134-.175 2.31 2.31 0 0 1-1.64-1.055l-.822-1.316a2.192 2.192 0 0 0-1.736-1.039 48.774 48.774 0 0 0-5.232 0 2.192 2.192 0 0 0-1.736 1.039l-.821 1.316Z"
  760. />
  761. <path
  762. stroke-linecap="round"
  763. stroke-linejoin="round"
  764. d="M16.5 12.75a4.5 4.5 0 1 1-9 0 4.5 4.5 0 0 1 9 0ZM18.75 10.5h.008v.008h-.008V10.5Z"
  765. />
  766. </svg>
  767. </button>
  768. </Tooltip>
  769. {/if}
  770. </div>
  771. <div>
  772. <button
  773. type="button"
  774. on:click={() => {
  775. if (assistantSpeaking) {
  776. stopAllAudio();
  777. }
  778. }}
  779. >
  780. <div class=" line-clamp-1 text-sm font-medium">
  781. {#if loading}
  782. {$i18n.t('Thinking...')}
  783. {:else if assistantSpeaking}
  784. {$i18n.t('Tap to interrupt')}
  785. {:else}
  786. {$i18n.t('Listening...')}
  787. {/if}
  788. </div>
  789. </button>
  790. </div>
  791. <div>
  792. <button
  793. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  794. on:click={async () => {
  795. await stopAudioStream();
  796. await stopVideoStream();
  797. showCallOverlay.set(false);
  798. dispatch('close');
  799. }}
  800. type="button"
  801. >
  802. <svg
  803. xmlns="http://www.w3.org/2000/svg"
  804. viewBox="0 0 20 20"
  805. fill="currentColor"
  806. class="size-5"
  807. >
  808. <path
  809. d="M6.28 5.22a.75.75 0 0 0-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 1 0 1.06 1.06L10 11.06l3.72 3.72a.75.75 0 1 0 1.06-1.06L11.06 10l3.72-3.72a.75.75 0 0 0-1.06-1.06L10 8.94 6.28 5.22Z"
  810. />
  811. </svg>
  812. </button>
  813. </div>
  814. </div>
  815. </div>
  816. {/if}