CallOverlay.svelte 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. <script lang="ts">
  2. import { config, models, settings, showCallOverlay } from '$lib/stores';
  3. import { onMount, tick, getContext, onDestroy, createEventDispatcher } from 'svelte';
  4. import { DropdownMenu } from 'bits-ui';
  5. import Dropdown from '$lib/components/common/Dropdown.svelte';
  6. import { flyAndScale } from '$lib/utils/transitions';
  7. const dispatch = createEventDispatcher();
  8. import { blobToFile } from '$lib/utils';
  9. import { generateEmoji } from '$lib/apis';
  10. import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
  11. import { toast } from 'svelte-sonner';
  12. import Tooltip from '$lib/components/common/Tooltip.svelte';
  13. import VideoInputMenu from './CallOverlay/VideoInputMenu.svelte';
  14. const i18n = getContext('i18n');
  15. export let eventTarget: EventTarget;
  16. export let submitPrompt: Function;
  17. export let stopResponse: Function;
  18. export let files;
  19. export let chatId;
  20. export let modelId;
  21. let wakeLock = null;
  22. let model = null;
  23. let loading = false;
  24. let confirmed = false;
  25. let interrupted = false;
  26. let assistantSpeaking = false;
  27. let emoji = null;
  28. let camera = false;
  29. let cameraStream = null;
  30. let chatStreaming = false;
  31. let rmsLevel = 0;
  32. let hasStartedSpeaking = false;
  33. let mediaRecorder;
  34. let audioStream = null;
  35. let audioChunks = [];
  36. let videoInputDevices = [];
  37. let selectedVideoInputDeviceId = null;
  38. const getVideoInputDevices = async () => {
  39. const devices = await navigator.mediaDevices.enumerateDevices();
  40. videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
  41. if (navigator.mediaDevices.getDisplayMedia) {
  42. videoInputDevices = [
  43. ...videoInputDevices,
  44. {
  45. deviceId: 'screen',
  46. label: 'Screen Share'
  47. }
  48. ];
  49. }
  50. console.log(videoInputDevices);
  51. if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
  52. selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
  53. }
  54. };
  55. const startCamera = async () => {
  56. await getVideoInputDevices();
  57. if (cameraStream === null) {
  58. camera = true;
  59. await tick();
  60. try {
  61. await startVideoStream();
  62. } catch (err) {
  63. console.error('Error accessing webcam: ', err);
  64. }
  65. }
  66. };
  67. const startVideoStream = async () => {
  68. const video = document.getElementById('camera-feed');
  69. if (video) {
  70. if (selectedVideoInputDeviceId === 'screen') {
  71. cameraStream = await navigator.mediaDevices.getDisplayMedia({
  72. video: {
  73. cursor: 'always'
  74. },
  75. audio: false
  76. });
  77. } else {
  78. cameraStream = await navigator.mediaDevices.getUserMedia({
  79. video: {
  80. deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
  81. }
  82. });
  83. }
  84. if (cameraStream) {
  85. await getVideoInputDevices();
  86. video.srcObject = cameraStream;
  87. await video.play();
  88. }
  89. }
  90. };
  91. const stopVideoStream = async () => {
  92. if (cameraStream) {
  93. const tracks = cameraStream.getTracks();
  94. tracks.forEach((track) => track.stop());
  95. }
  96. cameraStream = null;
  97. };
  98. const takeScreenshot = () => {
  99. const video = document.getElementById('camera-feed');
  100. const canvas = document.getElementById('camera-canvas');
  101. if (!canvas) {
  102. return;
  103. }
  104. const context = canvas.getContext('2d');
  105. // Make the canvas match the video dimensions
  106. canvas.width = video.videoWidth;
  107. canvas.height = video.videoHeight;
  108. // Draw the image from the video onto the canvas
  109. context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
  110. // Convert the canvas to a data base64 URL and console log it
  111. const dataURL = canvas.toDataURL('image/png');
  112. console.log(dataURL);
  113. return dataURL;
  114. };
  115. const stopCamera = async () => {
  116. await stopVideoStream();
  117. camera = false;
  118. };
  119. const MIN_DECIBELS = -55;
  120. const VISUALIZER_BUFFER_LENGTH = 300;
  121. const transcribeHandler = async (audioBlob) => {
  122. // Create a blob from the audio chunks
  123. await tick();
  124. const file = blobToFile(audioBlob, 'recording.wav');
  125. const res = await transcribeAudio(localStorage.token, file).catch((error) => {
  126. toast.error(error);
  127. return null;
  128. });
  129. if (res) {
  130. console.log(res.text);
  131. if (res.text !== '') {
  132. const _responses = await submitPrompt(res.text, { _raw: true });
  133. console.log(_responses);
  134. }
  135. }
  136. };
  137. const stopRecordingCallback = async (_continue = true) => {
  138. if ($showCallOverlay) {
  139. console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
  140. // deep copy the audioChunks array
  141. const _audioChunks = audioChunks.slice(0);
  142. audioChunks = [];
  143. mediaRecorder = false;
  144. if (_continue) {
  145. startRecording();
  146. }
  147. if (confirmed) {
  148. loading = true;
  149. emoji = null;
  150. if (cameraStream) {
  151. const imageUrl = takeScreenshot();
  152. files = [
  153. {
  154. type: 'image',
  155. url: imageUrl
  156. }
  157. ];
  158. }
  159. const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
  160. await transcribeHandler(audioBlob);
  161. confirmed = false;
  162. loading = false;
  163. }
  164. } else {
  165. audioChunks = [];
  166. mediaRecorder = false;
  167. if (audioStream) {
  168. const tracks = audioStream.getTracks();
  169. tracks.forEach((track) => track.stop());
  170. }
  171. audioStream = null;
  172. }
  173. };
  174. const startRecording = async () => {
  175. audioStream = await navigator.mediaDevices.getUserMedia({ audio: true });
  176. mediaRecorder = new MediaRecorder(audioStream);
  177. mediaRecorder.onstart = () => {
  178. console.log('Recording started');
  179. audioChunks = [];
  180. analyseAudio(audioStream);
  181. };
  182. mediaRecorder.ondataavailable = (event) => {
  183. if (hasStartedSpeaking) {
  184. audioChunks.push(event.data);
  185. }
  186. };
  187. mediaRecorder.onstop = (e) => {
  188. console.log('Recording stopped', e);
  189. stopRecordingCallback();
  190. };
  191. mediaRecorder.start();
  192. };
  193. const stopAudioStream = async () => {
  194. if (audioStream) {
  195. const tracks = audioStream.getTracks();
  196. tracks.forEach((track) => track.stop());
  197. }
  198. audioStream = null;
  199. };
  200. // Function to calculate the RMS level from time domain data
  201. const calculateRMS = (data: Uint8Array) => {
  202. let sumSquares = 0;
  203. for (let i = 0; i < data.length; i++) {
  204. const normalizedValue = (data[i] - 128) / 128; // Normalize the data
  205. sumSquares += normalizedValue * normalizedValue;
  206. }
  207. return Math.sqrt(sumSquares / data.length);
  208. };
  209. const analyseAudio = (stream) => {
  210. const audioContext = new AudioContext();
  211. const audioStreamSource = audioContext.createMediaStreamSource(stream);
  212. const analyser = audioContext.createAnalyser();
  213. analyser.minDecibels = MIN_DECIBELS;
  214. audioStreamSource.connect(analyser);
  215. const bufferLength = analyser.frequencyBinCount;
  216. const domainData = new Uint8Array(bufferLength);
  217. const timeDomainData = new Uint8Array(analyser.fftSize);
  218. let lastSoundTime = Date.now();
  219. hasStartedSpeaking = false;
  220. console.log('🔊 Sound detection started', lastSoundTime, hasStartedSpeaking);
  221. const detectSound = () => {
  222. const processFrame = () => {
  223. if (!mediaRecorder || !$showCallOverlay) {
  224. return;
  225. }
  226. if (assistantSpeaking && !($settings?.voiceInterruption ?? false)) {
  227. // Mute the audio if the assistant is speaking
  228. analyser.maxDecibels = 0;
  229. analyser.minDecibels = -1;
  230. } else {
  231. analyser.minDecibels = MIN_DECIBELS;
  232. analyser.maxDecibels = -30;
  233. }
  234. analyser.getByteTimeDomainData(timeDomainData);
  235. analyser.getByteFrequencyData(domainData);
  236. // Calculate RMS level from time domain data
  237. rmsLevel = calculateRMS(timeDomainData);
  238. // Check if initial speech/noise has started
  239. const hasSound = domainData.some((value) => value > 0);
  240. if (hasSound) {
  241. // BIG RED TEXT
  242. console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
  243. if (!hasStartedSpeaking) {
  244. hasStartedSpeaking = true;
  245. stopAllAudio();
  246. }
  247. lastSoundTime = Date.now();
  248. }
  249. // Start silence detection only after initial speech/noise has been detected
  250. if (hasStartedSpeaking) {
  251. if (Date.now() - lastSoundTime > 2000) {
  252. confirmed = true;
  253. if (mediaRecorder) {
  254. console.log('%c%s', 'color: red; font-size: 20px;', '🔇 Silence detected');
  255. mediaRecorder.stop();
  256. return;
  257. }
  258. }
  259. }
  260. window.requestAnimationFrame(processFrame);
  261. };
  262. window.requestAnimationFrame(processFrame);
  263. };
  264. detectSound();
  265. };
  266. let finishedMessages = {};
  267. let currentMessageId = null;
  268. let currentUtterance = null;
  269. const speakSpeechSynthesisHandler = (content) => {
  270. if ($showCallOverlay) {
  271. return new Promise((resolve) => {
  272. let voices = [];
  273. const getVoicesLoop = setInterval(async () => {
  274. voices = await speechSynthesis.getVoices();
  275. if (voices.length > 0) {
  276. clearInterval(getVoicesLoop);
  277. const voice =
  278. voices
  279. ?.filter(
  280. (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
  281. )
  282. ?.at(0) ?? undefined;
  283. currentUtterance = new SpeechSynthesisUtterance(content);
  284. currentUtterance.rate = speechRate;
  285. if (voice) {
  286. currentUtterance.voice = voice;
  287. }
  288. speechSynthesis.speak(currentUtterance);
  289. currentUtterance.onend = async (e) => {
  290. await new Promise((r) => setTimeout(r, 200));
  291. resolve(e);
  292. };
  293. }
  294. }, 100);
  295. });
  296. } else {
  297. return Promise.resolve();
  298. }
  299. };
  300. const playAudio = (audio) => {
  301. if ($showCallOverlay) {
  302. return new Promise((resolve) => {
  303. const audioElement = document.getElementById('audioElement') as HTMLAudioElement;
  304. if (audioElement) {
  305. audioElement.src = audio.src;
  306. audioElement.muted = true;
  307. audioElement.playbackRate = speechRate;
  308. audioElement
  309. .play()
  310. .then(() => {
  311. audioElement.muted = false;
  312. })
  313. .catch((error) => {
  314. console.error(error);
  315. });
  316. audioElement.onended = async (e) => {
  317. await new Promise((r) => setTimeout(r, 100));
  318. resolve(e);
  319. };
  320. }
  321. });
  322. } else {
  323. return Promise.resolve();
  324. }
  325. };
  326. const stopAllAudio = async () => {
  327. assistantSpeaking = false;
  328. interrupted = true;
  329. if (chatStreaming) {
  330. stopResponse();
  331. }
  332. if (currentUtterance) {
  333. speechSynthesis.cancel();
  334. currentUtterance = null;
  335. }
  336. const audioElement = document.getElementById('audioElement');
  337. if (audioElement) {
  338. audioElement.muted = true;
  339. audioElement.pause();
  340. audioElement.currentTime = 0;
  341. }
  342. };
  343. let audioAbortController = new AbortController();
  344. // Audio speed control
  345. let speechRate = 1;
  346. let showSpeedMenu = false;
  347. const speedOptions = [2, 1.75, 1.5, 1.25, 1, 0.75, 0.5];
  348. const setSpeedRate = (rate: number) => {
  349. speechRate = rate;
  350. showSpeedMenu = false;
  351. updateAudioSpeed();
  352. };
  353. const updateAudioSpeed = () => {
  354. if (currentUtterance) {
  355. currentUtterance.rate = speechRate;
  356. }
  357. const audioElement = document.getElementById('audioElement') as HTMLAudioElement;
  358. if (audioElement) {
  359. audioElement.playbackRate = speechRate;
  360. }
  361. };
  362. // Audio cache map where key is the content and value is the Audio object.
  363. const audioCache = new Map();
  364. const emojiCache = new Map();
  365. const fetchAudio = async (content) => {
  366. if (!audioCache.has(content)) {
  367. try {
  368. // Set the emoji for the content if needed
  369. if ($settings?.showEmojiInCall ?? false) {
  370. const emoji = await generateEmoji(localStorage.token, modelId, content, chatId);
  371. if (emoji) {
  372. emojiCache.set(content, emoji);
  373. }
  374. }
  375. if ($config.audio.tts.engine !== '') {
  376. const res = await synthesizeOpenAISpeech(
  377. localStorage.token,
  378. $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
  379. content
  380. ).catch((error) => {
  381. console.error(error);
  382. return null;
  383. });
  384. if (res) {
  385. const blob = await res.blob();
  386. const blobUrl = URL.createObjectURL(blob);
  387. audioCache.set(content, new Audio(blobUrl));
  388. }
  389. } else {
  390. audioCache.set(content, true);
  391. }
  392. } catch (error) {
  393. console.error('Error synthesizing speech:', error);
  394. }
  395. }
  396. return audioCache.get(content);
  397. };
  398. let messages = {};
  399. const monitorAndPlayAudio = async (id, signal) => {
  400. while (!signal.aborted) {
  401. if (messages[id] && messages[id].length > 0) {
  402. // Retrieve the next content string from the queue
  403. const content = messages[id].shift(); // Dequeues the content for playing
  404. if (audioCache.has(content)) {
  405. // If content is available in the cache, play it
  406. // Set the emoji for the content if available
  407. if (($settings?.showEmojiInCall ?? false) && emojiCache.has(content)) {
  408. emoji = emojiCache.get(content);
  409. } else {
  410. emoji = null;
  411. }
  412. if ($config.audio.tts.engine !== '') {
  413. try {
  414. console.log(
  415. '%c%s',
  416. 'color: red; font-size: 20px;',
  417. `Playing audio for content: ${content}`
  418. );
  419. const audio = audioCache.get(content);
  420. await playAudio(audio); // Here ensure that playAudio is indeed correct method to execute
  421. console.log(`Played audio for content: ${content}`);
  422. await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
  423. } catch (error) {
  424. console.error('Error playing audio:', error);
  425. }
  426. } else {
  427. await speakSpeechSynthesisHandler(content);
  428. }
  429. } else {
  430. // If not available in the cache, push it back to the queue and delay
  431. messages[id].unshift(content); // Re-queue the content at the start
  432. console.log(`Audio for "${content}" not yet available in the cache, re-queued...`);
  433. await new Promise((resolve) => setTimeout(resolve, 200)); // Wait before retrying to reduce tight loop
  434. }
  435. } else if (finishedMessages[id] && messages[id] && messages[id].length === 0) {
  436. // If the message is finished and there are no more messages to process, break the loop
  437. assistantSpeaking = false;
  438. break;
  439. } else {
  440. // No messages to process, sleep for a bit
  441. await new Promise((resolve) => setTimeout(resolve, 200));
  442. }
  443. }
  444. console.log(`Audio monitoring and playing stopped for message ID ${id}`);
  445. };
  446. onMount(async () => {
  447. const setWakeLock = async () => {
  448. try {
  449. wakeLock = await navigator.wakeLock.request('screen');
  450. } catch (err) {
  451. // The Wake Lock request has failed - usually system related, such as battery.
  452. console.log(err);
  453. }
  454. if (wakeLock) {
  455. // Add a listener to release the wake lock when the page is unloaded
  456. wakeLock.addEventListener('release', () => {
  457. // the wake lock has been released
  458. console.log('Wake Lock released');
  459. });
  460. }
  461. };
  462. if ('wakeLock' in navigator) {
  463. await setWakeLock();
  464. document.addEventListener('visibilitychange', async () => {
  465. // Re-request the wake lock if the document becomes visible
  466. if (wakeLock !== null && document.visibilityState === 'visible') {
  467. await setWakeLock();
  468. }
  469. });
  470. }
  471. model = $models.find((m) => m.id === modelId);
  472. startRecording();
  473. const chatStartHandler = async (e) => {
  474. const { id } = e.detail;
  475. chatStreaming = true;
  476. if (currentMessageId !== id) {
  477. console.log(`Received chat start event for message ID ${id}`);
  478. currentMessageId = id;
  479. if (audioAbortController) {
  480. audioAbortController.abort();
  481. }
  482. audioAbortController = new AbortController();
  483. assistantSpeaking = true;
  484. // Start monitoring and playing audio for the message ID
  485. monitorAndPlayAudio(id, audioAbortController.signal);
  486. }
  487. };
  488. const chatEventHandler = async (e) => {
  489. const { id, content } = e.detail;
  490. // "id" here is message id
  491. // if "id" is not the same as "currentMessageId" then do not process
  492. // "content" here is a sentence from the assistant,
  493. // there will be many sentences for the same "id"
  494. if (currentMessageId === id) {
  495. console.log(`Received chat event for message ID ${id}: ${content}`);
  496. try {
  497. if (messages[id] === undefined) {
  498. messages[id] = [content];
  499. } else {
  500. messages[id].push(content);
  501. }
  502. console.log(content);
  503. fetchAudio(content);
  504. } catch (error) {
  505. console.error('Failed to fetch or play audio:', error);
  506. }
  507. }
  508. };
  509. const chatFinishHandler = async (e) => {
  510. const { id, content } = e.detail;
  511. // "content" here is the entire message from the assistant
  512. finishedMessages[id] = true;
  513. chatStreaming = false;
  514. };
  515. eventTarget.addEventListener('chat:start', chatStartHandler);
  516. eventTarget.addEventListener('chat', chatEventHandler);
  517. eventTarget.addEventListener('chat:finish', chatFinishHandler);
  518. return async () => {
  519. eventTarget.removeEventListener('chat:start', chatStartHandler);
  520. eventTarget.removeEventListener('chat', chatEventHandler);
  521. eventTarget.removeEventListener('chat:finish', chatFinishHandler);
  522. audioAbortController.abort();
  523. await tick();
  524. await stopAllAudio();
  525. await stopRecordingCallback(false);
  526. await stopCamera();
  527. };
  528. });
  529. onDestroy(async () => {
  530. await stopAllAudio();
  531. await stopRecordingCallback(false);
  532. await stopCamera();
  533. });
  534. </script>
  535. {#if $showCallOverlay}
  536. <div class="max-w-lg w-full h-full max-h-[100dvh] flex flex-col justify-between p-3 md:p-6">
  537. {#if camera}
  538. <button
  539. type="button"
  540. class="flex justify-center items-center w-full h-20 min-h-20"
  541. on:click={() => {
  542. if (assistantSpeaking) {
  543. stopAllAudio();
  544. }
  545. }}
  546. >
  547. {#if emoji}
  548. <div
  549. class=" transition-all rounded-full"
  550. style="font-size:{rmsLevel * 100 > 4
  551. ? '4.5'
  552. : rmsLevel * 100 > 2
  553. ? '4.25'
  554. : rmsLevel * 100 > 1
  555. ? '3.75'
  556. : '3.5'}rem;width: 100%; text-align:center;"
  557. >
  558. {emoji}
  559. </div>
  560. {:else if loading || assistantSpeaking}
  561. <svg
  562. class="size-12 text-gray-900 dark:text-gray-400"
  563. viewBox="0 0 24 24"
  564. fill="currentColor"
  565. xmlns="http://www.w3.org/2000/svg"
  566. ><style>
  567. .spinner_qM83 {
  568. animation: spinner_8HQG 1.05s infinite;
  569. }
  570. .spinner_oXPr {
  571. animation-delay: 0.1s;
  572. }
  573. .spinner_ZTLf {
  574. animation-delay: 0.2s;
  575. }
  576. @keyframes spinner_8HQG {
  577. 0%,
  578. 57.14% {
  579. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  580. transform: translate(0);
  581. }
  582. 28.57% {
  583. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  584. transform: translateY(-6px);
  585. }
  586. 100% {
  587. transform: translate(0);
  588. }
  589. }
  590. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  591. class="spinner_qM83 spinner_oXPr"
  592. cx="12"
  593. cy="12"
  594. r="3"
  595. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  596. >
  597. {:else}
  598. <div
  599. class=" {rmsLevel * 100 > 4
  600. ? ' size-[4.5rem]'
  601. : rmsLevel * 100 > 2
  602. ? ' size-16'
  603. : rmsLevel * 100 > 1
  604. ? 'size-14'
  605. : 'size-12'} transition-all rounded-full {(model?.info?.meta
  606. ?.profile_image_url ?? '/static/favicon.png') !== '/static/favicon.png'
  607. ? ' bg-cover bg-center bg-no-repeat'
  608. : 'bg-black dark:bg-white'} bg-black dark:bg-white"
  609. style={(model?.info?.meta?.profile_image_url ?? '/static/favicon.png') !==
  610. '/static/favicon.png'
  611. ? `background-image: url('${model?.info?.meta?.profile_image_url}');`
  612. : ''}
  613. />
  614. {/if}
  615. <!-- navbar -->
  616. </button>
  617. {/if}
  618. <div class="flex justify-center items-center flex-1 h-full w-full max-h-full">
  619. {#if !camera}
  620. <button
  621. type="button"
  622. on:click={() => {
  623. if (assistantSpeaking) {
  624. stopAllAudio();
  625. }
  626. }}
  627. >
  628. {#if emoji}
  629. <div
  630. class=" transition-all rounded-full"
  631. style="font-size:{rmsLevel * 100 > 4
  632. ? '13'
  633. : rmsLevel * 100 > 2
  634. ? '12'
  635. : rmsLevel * 100 > 1
  636. ? '11.5'
  637. : '11'}rem;width:100%;text-align:center;"
  638. >
  639. {emoji}
  640. </div>
  641. {:else if loading || assistantSpeaking}
  642. <svg
  643. class="size-44 text-gray-900 dark:text-gray-400"
  644. viewBox="0 0 24 24"
  645. fill="currentColor"
  646. xmlns="http://www.w3.org/2000/svg"
  647. ><style>
  648. .spinner_qM83 {
  649. animation: spinner_8HQG 1.05s infinite;
  650. }
  651. .spinner_oXPr {
  652. animation-delay: 0.1s;
  653. }
  654. .spinner_ZTLf {
  655. animation-delay: 0.2s;
  656. }
  657. @keyframes spinner_8HQG {
  658. 0%,
  659. 57.14% {
  660. animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
  661. transform: translate(0);
  662. }
  663. 28.57% {
  664. animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
  665. transform: translateY(-6px);
  666. }
  667. 100% {
  668. transform: translate(0);
  669. }
  670. }
  671. </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
  672. class="spinner_qM83 spinner_oXPr"
  673. cx="12"
  674. cy="12"
  675. r="3"
  676. /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
  677. >
  678. {:else}
  679. <div
  680. class=" {rmsLevel * 100 > 4
  681. ? ' size-52'
  682. : rmsLevel * 100 > 2
  683. ? 'size-48'
  684. : rmsLevel * 100 > 1
  685. ? 'size-44'
  686. : 'size-40'} transition-all rounded-full {(model?.info?.meta
  687. ?.profile_image_url ?? '/static/favicon.png') !== '/static/favicon.png'
  688. ? ' bg-cover bg-center bg-no-repeat'
  689. : 'bg-black dark:bg-white'} "
  690. style={(model?.info?.meta?.profile_image_url ?? '/static/favicon.png') !==
  691. '/static/favicon.png'
  692. ? `background-image: url('${model?.info?.meta?.profile_image_url}');`
  693. : ''}
  694. />
  695. {/if}
  696. </button>
  697. {:else}
  698. <div class="relative flex video-container w-full max-h-full pt-2 pb-4 md:py-6 px-2 h-full">
  699. <video
  700. id="camera-feed"
  701. autoplay
  702. class="rounded-2xl h-full min-w-full object-cover object-center"
  703. playsinline
  704. />
  705. <canvas id="camera-canvas" style="display:none;" />
  706. <div class=" absolute top-4 md:top-8 left-4">
  707. <button
  708. type="button"
  709. class="p-1.5 text-white cursor-pointer backdrop-blur-xl bg-black/10 rounded-full"
  710. on:click={() => {
  711. stopCamera();
  712. }}
  713. >
  714. <svg
  715. xmlns="http://www.w3.org/2000/svg"
  716. viewBox="0 0 16 16"
  717. fill="currentColor"
  718. class="size-6"
  719. >
  720. <path
  721. d="M5.28 4.22a.75.75 0 0 0-1.06 1.06L6.94 8l-2.72 2.72a.75.75 0 1 0 1.06 1.06L8 9.06l2.72 2.72a.75.75 0 1 0 1.06-1.06L9.06 8l2.72-2.72a.75.75 0 0 0-1.06-1.06L8 6.94 5.28 4.22Z"
  722. />
  723. </svg>
  724. </button>
  725. </div>
  726. </div>
  727. {/if}
  728. </div>
  729. <div class="flex justify-between items-center pb-2 w-full">
  730. <div>
  731. {#if camera}
  732. <VideoInputMenu
  733. devices={videoInputDevices}
  734. on:change={async (e) => {
  735. console.log(e.detail);
  736. selectedVideoInputDeviceId = e.detail;
  737. await stopVideoStream();
  738. await startVideoStream();
  739. }}
  740. >
  741. <button class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900" type="button">
  742. <svg
  743. xmlns="http://www.w3.org/2000/svg"
  744. viewBox="0 0 20 20"
  745. fill="currentColor"
  746. class="size-5"
  747. >
  748. <path
  749. fill-rule="evenodd"
  750. d="M15.312 11.424a5.5 5.5 0 0 1-9.201 2.466l-.312-.311h2.433a.75.75 0 0 0 0-1.5H3.989a.75.75 0 0 0-.75.75v4.242a.75.75 0 0 0 1.5 0v-2.43l.31.31a7 7 0 0 0 11.712-3.138.75.75 0 0 0-1.449-.39Zm1.23-3.723a.75.75 0 0 0 .219-.53V2.929a.75.75 0 0 0-1.5 0V5.36l-.31-.31A7 7 0 0 0 3.239 8.188a.75.75 0 1 0 1.448.389A5.5 5.5 0 0 1 13.89 6.11l.311.31h-2.432a.75.75 0 0 0 0 1.5h4.243a.75.75 0 0 0 .53-.219Z"
  751. clip-rule="evenodd"
  752. />
  753. </svg>
  754. </button>
  755. </VideoInputMenu>
  756. {:else}
  757. <Tooltip content={$i18n.t('Camera')}>
  758. <button
  759. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  760. type="button"
  761. on:click={async () => {
  762. await navigator.mediaDevices.getUserMedia({ video: true });
  763. startCamera();
  764. }}
  765. >
  766. <svg
  767. xmlns="http://www.w3.org/2000/svg"
  768. fill="none"
  769. viewBox="0 0 24 24"
  770. stroke-width="1.5"
  771. stroke="currentColor"
  772. class="size-5"
  773. >
  774. <path
  775. stroke-linecap="round"
  776. stroke-linejoin="round"
  777. d="M6.827 6.175A2.31 2.31 0 0 1 5.186 7.23c-.38.054-.757.112-1.134.175C2.999 7.58 2.25 8.507 2.25 9.574V18a2.25 2.25 0 0 0 2.25 2.25h15A2.25 2.25 0 0 0 21.75 18V9.574c0-1.067-.75-1.994-1.802-2.169a47.865 47.865 0 0 0-1.134-.175 2.31 2.31 0 0 1-1.64-1.055l-.822-1.316a2.192 2.192 0 0 0-1.736-1.039 48.774 48.774 0 0 0-5.232 0 2.192 2.192 0 0 0-1.736 1.039l-.821 1.316Z"
  778. />
  779. <path
  780. stroke-linecap="round"
  781. stroke-linejoin="round"
  782. d="M16.5 12.75a4.5 4.5 0 1 1-9 0 4.5 4.5 0 0 1 9 0ZM18.75 10.5h.008v.008h-.008V10.5Z"
  783. />
  784. </svg>
  785. </button>
  786. </Tooltip>
  787. {/if}
  788. </div>
  789. <div>
  790. <button
  791. type="button"
  792. on:click={() => {
  793. if (assistantSpeaking) {
  794. stopAllAudio();
  795. }
  796. }}
  797. >
  798. <div class=" line-clamp-1 text-sm font-medium">
  799. {#if loading}
  800. {$i18n.t('Thinking...')}
  801. {:else if assistantSpeaking}
  802. {$i18n.t('Tap to interrupt')}
  803. {:else}
  804. {$i18n.t('Listening...')}
  805. {/if}
  806. </div>
  807. </button>
  808. </div>
  809. <div class="relative">
  810. <Dropdown bind:show={showSpeedMenu}>
  811. <button class="p-2 rounded-full bg-gray-50 dark:bg-gray-900">
  812. <svg width="24" height="24" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
  813. <polygon points="8,5 8,19 19,12" fill="currentColor"/>
  814. <path d="M12 2A10 10 0 0 0 12 22" fill="none" stroke="currentColor" stroke-width="2" stroke-dasharray="2,2"/>
  815. <path d="M12 2A10 10 0 0 1 12 22" fill="none" stroke="currentColor" stroke-width="2"/>
  816. </svg>
  817. </button>
  818. <div slot="content">
  819. <DropdownMenu.Content
  820. class="w-full max-w-[180px] rounded-lg px-1 py-1.5 border border-gray-300/30 dark:border-gray-700/50 z-[9999] bg-white dark:bg-gray-900 dark:text-white shadow-sm"
  821. sideOffset={6}
  822. side="top"
  823. align="start"
  824. transition={flyAndScale}
  825. >
  826. {#each speedOptions as speed}
  827. <DropdownMenu.Item
  828. class="flex gap-2 items-center px-3 py-2 text-sm cursor-pointer hover:bg-gray-50 dark:hover:bg-gray-800 rounded-md {speechRate === speed ? 'bg-gray-200 dark:bg-gray-600' : ''}"
  829. on:click={() => setSpeedRate(speed)}
  830. >
  831. <div class="flex items-center">
  832. <div class="line-clamp-1">
  833. {speed}x
  834. </div>
  835. </div>
  836. </DropdownMenu.Item>
  837. {/each}
  838. </DropdownMenu.Content>
  839. </div>
  840. </Dropdown>
  841. </div>
  842. <div>
  843. <button
  844. class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
  845. on:click={async () => {
  846. await stopAudioStream();
  847. await stopVideoStream();
  848. showCallOverlay.set(false);
  849. dispatch('close');
  850. }}
  851. type="button"
  852. >
  853. <svg
  854. xmlns="http://www.w3.org/2000/svg"
  855. viewBox="0 0 20 20"
  856. fill="currentColor"
  857. class="size-5"
  858. >
  859. <path
  860. d="M6.28 5.22a.75.75 0 0 0-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 1 0 1.06 1.06L10 11.06l3.72 3.72a.75.75 0 1 0 1.06-1.06L11.06 10l3.72-3.72a.75.75 0 0 0-1.06-1.06L10 8.94 6.28 5.22Z"
  861. />
  862. </svg>
  863. </button>
  864. </div>
  865. </div>
  866. </div>
  867. {/if}