123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450 |
- <script lang="ts">
- import { settings, showCallOverlay } from '$lib/stores';
- import { onMount, tick, getContext } from 'svelte';
- import { blobToFile, calculateSHA256, extractSentences, findWordIndices } from '$lib/utils';
- import { synthesizeOpenAISpeech, transcribeAudio } from '$lib/apis/audio';
- import { toast } from 'svelte-sonner';
- import Tooltip from '$lib/components/common/Tooltip.svelte';
- const i18n = getContext('i18n');
- export let submitPrompt: Function;
- let loading = false;
- let confirmed = false;
- let assistantSpeaking = false;
- let assistantAudio = {};
- let assistantAudioIdx = null;
- let rmsLevel = 0;
- let hasStartedSpeaking = false;
- let audioContext;
- let analyser;
- let dataArray;
- let audioElement;
- let animationFrameId;
- let speechRecognition;
- let currentUtterance = null;
- let mediaRecorder;
- let audioChunks = [];
- const MIN_DECIBELS = -45;
- const VISUALIZER_BUFFER_LENGTH = 300;
- let visualizerData = Array(VISUALIZER_BUFFER_LENGTH).fill(0);
- const startAudio = () => {
- audioContext = new (window.AudioContext || window.webkitAudioContext)();
- analyser = audioContext.createAnalyser();
- const source = audioContext.createMediaElementSource(audioElement);
- source.connect(analyser);
- analyser.connect(audioContext.destination);
- analyser.fftSize = 32; // Adjust the fftSize
- dataArray = new Uint8Array(analyser.frequencyBinCount);
- visualize();
- };
- const visualize = () => {
- analyser.getByteFrequencyData(dataArray);
- div1Height = dataArray[1] / 2;
- div2Height = dataArray[3] / 2;
- div3Height = dataArray[5] / 2;
- div4Height = dataArray[7] / 2;
- animationFrameId = requestAnimationFrame(visualize);
- };
- // Function to calculate the RMS level from time domain data
- const calculateRMS = (data: Uint8Array) => {
- let sumSquares = 0;
- for (let i = 0; i < data.length; i++) {
- const normalizedValue = (data[i] - 128) / 128; // Normalize the data
- sumSquares += normalizedValue * normalizedValue;
- }
- return Math.sqrt(sumSquares / data.length);
- };
- const normalizeRMS = (rms) => {
- rms = rms * 10;
- const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
- const scaledRMS = Math.pow(rms, exp);
- // Scale between 0.01 (1%) and 1.0 (100%)
- return Math.min(1.0, Math.max(0.01, scaledRMS));
- };
- const analyseAudio = (stream) => {
- const audioContext = new AudioContext();
- const audioStreamSource = audioContext.createMediaStreamSource(stream);
- const analyser = audioContext.createAnalyser();
- analyser.minDecibels = MIN_DECIBELS;
- audioStreamSource.connect(analyser);
- const bufferLength = analyser.frequencyBinCount;
- const domainData = new Uint8Array(bufferLength);
- const timeDomainData = new Uint8Array(analyser.fftSize);
- let lastSoundTime = Date.now();
- hasStartedSpeaking = false;
- const detectSound = () => {
- const processFrame = () => {
- if (!mediaRecorder || !$showCallOverlay) {
- if (mediaRecorder) {
- mediaRecorder.stop();
- }
- return;
- }
- analyser.getByteTimeDomainData(timeDomainData);
- analyser.getByteFrequencyData(domainData);
- // Calculate RMS level from time domain data
- rmsLevel = calculateRMS(timeDomainData);
- // Check if initial speech/noise has started
- const hasSound = domainData.some((value) => value > 0);
- if (hasSound) {
- stopAllAudio();
- hasStartedSpeaking = true;
- lastSoundTime = Date.now();
- }
- // Start silence detection only after initial speech/noise has been detected
- if (hasStartedSpeaking) {
- if (Date.now() - lastSoundTime > 2000) {
- confirmed = true;
- if (mediaRecorder) {
- mediaRecorder.stop();
- }
- }
- }
- window.requestAnimationFrame(processFrame);
- };
- window.requestAnimationFrame(processFrame);
- };
- detectSound();
- };
- const stopAllAudio = () => {
- if (currentUtterance) {
- speechSynthesis.cancel();
- currentUtterance = null;
- }
- if (assistantAudio[assistantAudioIdx]) {
- assistantAudio[assistantAudioIdx].pause();
- assistantAudio[assistantAudioIdx].currentTime = 0;
- }
- const audioElement = document.getElementById('audioElement');
- audioElement.pause();
- audioElement.currentTime = 0;
- assistantSpeaking = false;
- };
- const playAudio = (idx) => {
- return new Promise((res) => {
- assistantAudioIdx = idx;
- const audioElement = document.getElementById('audioElement');
- const audio = assistantAudio[idx];
- audioElement.src = audio.src; // Assume `assistantAudio` has objects with a `src` property
- audioElement.play();
- audioElement.onended = async (e) => {
- await new Promise((r) => setTimeout(r, 300));
- if (Object.keys(assistantAudio).length - 1 === idx) {
- assistantSpeaking = false;
- }
- res(e);
- };
- });
- };
- const getOpenAISpeech = async (text) => {
- const res = await synthesizeOpenAISpeech(
- localStorage.token,
- $settings?.audio?.speaker ?? 'alloy',
- text,
- $settings?.audio?.model ?? 'tts-1'
- ).catch((error) => {
- toast.error(error);
- assistantSpeaking = false;
- return null;
- });
- if (res) {
- const blob = await res.blob();
- const blobUrl = URL.createObjectURL(blob);
- const audio = new Audio(blobUrl);
- assistantAudio = audio;
- }
- };
- const transcribeHandler = async (audioBlob) => {
- // Create a blob from the audio chunks
- await tick();
- const file = blobToFile(audioBlob, 'recording.wav');
- const res = await transcribeAudio(localStorage.token, file).catch((error) => {
- toast.error(error);
- return null;
- });
- if (res) {
- console.log(res.text);
- if (res.text !== '') {
- const _responses = await submitPrompt(res.text);
- console.log(_responses);
- if (_responses.at(0)) {
- const content = _responses[0];
- if (content) {
- assistantSpeakingHandler(content);
- }
- }
- }
- }
- };
- const assistantSpeakingHandler = async (content) => {
- assistantSpeaking = true;
- if (($settings?.audio?.TTSEngine ?? '') == '') {
- currentUtterance = new SpeechSynthesisUtterance(content);
- speechSynthesis.speak(currentUtterance);
- } else if ($settings?.audio?.TTSEngine === 'openai') {
- console.log('openai');
- const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
- const lastIndex = mergedTexts.length - 1;
- if (lastIndex >= 0) {
- const previousText = mergedTexts[lastIndex];
- const wordCount = previousText.split(/\s+/).length;
- if (wordCount < 2) {
- mergedTexts[lastIndex] = previousText + ' ' + currentText;
- } else {
- mergedTexts.push(currentText);
- }
- } else {
- mergedTexts.push(currentText);
- }
- return mergedTexts;
- }, []);
- console.log(sentences);
- let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
- for (const [idx, sentence] of sentences.entries()) {
- const res = await synthesizeOpenAISpeech(
- localStorage.token,
- $settings?.audio?.speaker,
- sentence,
- $settings?.audio?.model
- ).catch((error) => {
- toast.error(error);
- assistantSpeaking = false;
- return null;
- });
- if (res) {
- const blob = await res.blob();
- const blobUrl = URL.createObjectURL(blob);
- const audio = new Audio(blobUrl);
- assistantAudio[idx] = audio;
- lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
- }
- }
- }
- };
- const stopRecordingCallback = async () => {
- if ($showCallOverlay) {
- if (confirmed) {
- loading = true;
- const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
- await transcribeHandler(audioBlob);
- confirmed = false;
- loading = false;
- }
- audioChunks = [];
- mediaRecorder = false;
- startRecording();
- } else {
- audioChunks = [];
- mediaRecorder = false;
- }
- };
- const startRecording = async () => {
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
- mediaRecorder = new MediaRecorder(stream);
- mediaRecorder.onstart = () => {
- console.log('Recording started');
- audioChunks = [];
- analyseAudio(stream);
- };
- mediaRecorder.ondataavailable = (event) => {
- if (hasStartedSpeaking) {
- audioChunks.push(event.data);
- }
- };
- mediaRecorder.onstop = async () => {
- console.log('Recording stopped');
- await stopRecordingCallback();
- };
- mediaRecorder.start();
- };
- $: if ($showCallOverlay) {
- startRecording();
- }
- </script>
- {#if $showCallOverlay}
- <audio id="audioElement" src="" style="display: none;" />
- <div class=" absolute w-full h-full flex z-[999]">
- <div
- class="absolute w-full h-full bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
- >
- <div class="max-w-lg w-full h-screen flex flex-col justify-between p-6">
- <div>
- <!-- navbar -->
- </div>
- <div class="flex justify-center items-center w-ull">
- {#if loading}
- <svg
- class="size-44 text-gray-900 dark:text-gray-400"
- viewBox="0 0 24 24"
- fill="currentColor"
- xmlns="http://www.w3.org/2000/svg"
- ><style>
- .spinner_qM83 {
- animation: spinner_8HQG 1.05s infinite;
- }
- .spinner_oXPr {
- animation-delay: 0.1s;
- }
- .spinner_ZTLf {
- animation-delay: 0.2s;
- }
- @keyframes spinner_8HQG {
- 0%,
- 57.14% {
- animation-timing-function: cubic-bezier(0.33, 0.66, 0.66, 1);
- transform: translate(0);
- }
- 28.57% {
- animation-timing-function: cubic-bezier(0.33, 0, 0.66, 0.33);
- transform: translateY(-6px);
- }
- 100% {
- transform: translate(0);
- }
- }
- </style><circle class="spinner_qM83" cx="4" cy="12" r="3" /><circle
- class="spinner_qM83 spinner_oXPr"
- cx="12"
- cy="12"
- r="3"
- /><circle class="spinner_qM83 spinner_ZTLf" cx="20" cy="12" r="3" /></svg
- >
- {:else}
- <div
- class=" {rmsLevel * 100 > 4
- ? ' size-52'
- : rmsLevel * 100 > 2
- ? 'size-48'
- : rmsLevel * 100 > 1
- ? 'size-[11.5rem]'
- : 'size-44'} transition-all bg-black dark:bg-white rounded-full"
- />
- {/if}
- </div>
- <div class="flex justify-between items-center pb-2 w-full">
- <div>
- <Tooltip content="WIP 🚧">
- <button class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900">
- <svg
- xmlns="http://www.w3.org/2000/svg"
- fill="none"
- viewBox="0 0 24 24"
- stroke-width="1.5"
- stroke="currentColor"
- class="size-5"
- >
- <path
- stroke-linecap="round"
- stroke-linejoin="round"
- d="M6.827 6.175A2.31 2.31 0 0 1 5.186 7.23c-.38.054-.757.112-1.134.175C2.999 7.58 2.25 8.507 2.25 9.574V18a2.25 2.25 0 0 0 2.25 2.25h15A2.25 2.25 0 0 0 21.75 18V9.574c0-1.067-.75-1.994-1.802-2.169a47.865 47.865 0 0 0-1.134-.175 2.31 2.31 0 0 1-1.64-1.055l-.822-1.316a2.192 2.192 0 0 0-1.736-1.039 48.774 48.774 0 0 0-5.232 0 2.192 2.192 0 0 0-1.736 1.039l-.821 1.316Z"
- />
- <path
- stroke-linecap="round"
- stroke-linejoin="round"
- d="M16.5 12.75a4.5 4.5 0 1 1-9 0 4.5 4.5 0 0 1 9 0ZM18.75 10.5h.008v.008h-.008V10.5Z"
- />
- </svg>
- </button>
- </Tooltip>
- </div>
- <div>
- <button type="button">
- <div class=" line-clamp-1 text-sm font-medium">
- {#if loading}
- Thinking...
- {:else}
- Listening...
- {/if}
- </div>
- </button>
- </div>
- <div>
- <button
- class=" p-3 rounded-full bg-gray-50 dark:bg-gray-900"
- on:click={async () => {
- showCallOverlay.set(false);
- }}
- type="button"
- >
- <svg
- xmlns="http://www.w3.org/2000/svg"
- viewBox="0 0 20 20"
- fill="currentColor"
- class="size-5"
- >
- <path
- d="M6.28 5.22a.75.75 0 0 0-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 1 0 1.06 1.06L10 11.06l3.72 3.72a.75.75 0 1 0 1.06-1.06L11.06 10l3.72-3.72a.75.75 0 0 0-1.06-1.06L10 8.94 6.28 5.22Z"
- />
- </svg>
- </button>
- </div>
- </div>
- </div>
- </div>
- </div>
- {/if}
|