|
@@ -14,16 +14,18 @@
|
|
const i18n = getContext('i18n');
|
|
const i18n = getContext('i18n');
|
|
|
|
|
|
export let eventTarget: EventTarget;
|
|
export let eventTarget: EventTarget;
|
|
|
|
+
|
|
export let submitPrompt: Function;
|
|
export let submitPrompt: Function;
|
|
|
|
+ export let stopResponse: Function;
|
|
|
|
+
|
|
export let files;
|
|
export let files;
|
|
|
|
|
|
export let chatId;
|
|
export let chatId;
|
|
export let modelId;
|
|
export let modelId;
|
|
|
|
|
|
- let message = '';
|
|
|
|
-
|
|
|
|
let loading = false;
|
|
let loading = false;
|
|
let confirmed = false;
|
|
let confirmed = false;
|
|
|
|
+ let interrupted = false;
|
|
|
|
|
|
let emoji = null;
|
|
let emoji = null;
|
|
|
|
|
|
@@ -31,17 +33,141 @@
|
|
let cameraStream = null;
|
|
let cameraStream = null;
|
|
|
|
|
|
let assistantSpeaking = false;
|
|
let assistantSpeaking = false;
|
|
- let assistantAudio = {};
|
|
|
|
- let assistantAudioIdx = null;
|
|
|
|
|
|
|
|
- let rmsLevel = 0;
|
|
|
|
- let hasStartedSpeaking = false;
|
|
|
|
|
|
+ let chatStreaming = false;
|
|
|
|
+ let assistantMessage = '';
|
|
|
|
+ let assistantSentences = [];
|
|
|
|
+ let assistantSentenceAudios = {};
|
|
|
|
+ let assistantSentenceIdx = -1;
|
|
|
|
+
|
|
|
|
+ let audioQueue = [];
|
|
|
|
+
|
|
|
|
+ $: assistantSentences = extractSentences(assistantMessage).reduce((mergedTexts, currentText) => {
|
|
|
|
+ const lastIndex = mergedTexts.length - 1;
|
|
|
|
+ if (lastIndex >= 0) {
|
|
|
|
+ const previousText = mergedTexts[lastIndex];
|
|
|
|
+ const wordCount = previousText.split(/\s+/).length;
|
|
|
|
+ if (wordCount < 2) {
|
|
|
|
+ mergedTexts[lastIndex] = previousText + ' ' + currentText;
|
|
|
|
+ } else {
|
|
|
|
+ mergedTexts.push(currentText);
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ mergedTexts.push(currentText);
|
|
|
|
+ }
|
|
|
|
+ return mergedTexts;
|
|
|
|
+ }, []);
|
|
|
|
|
|
let currentUtterance = null;
|
|
let currentUtterance = null;
|
|
|
|
|
|
|
|
+ let rmsLevel = 0;
|
|
|
|
+ let hasStartedSpeaking = false;
|
|
let mediaRecorder;
|
|
let mediaRecorder;
|
|
let audioChunks = [];
|
|
let audioChunks = [];
|
|
|
|
|
|
|
|
+ $: console.log('hasStartedSpeaking', hasStartedSpeaking);
|
|
|
|
+
|
|
|
|
+ let videoInputDevices = [];
|
|
|
|
+ let selectedVideoInputDeviceId = null;
|
|
|
|
+
|
|
|
|
+ const getVideoInputDevices = async () => {
|
|
|
|
+ const devices = await navigator.mediaDevices.enumerateDevices();
|
|
|
|
+ videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
|
|
|
|
+
|
|
|
|
+ if (!!navigator.mediaDevices.getDisplayMedia) {
|
|
|
|
+ videoInputDevices = [
|
|
|
|
+ ...videoInputDevices,
|
|
|
|
+ {
|
|
|
|
+ deviceId: 'screen',
|
|
|
|
+ label: 'Screen Share'
|
|
|
|
+ }
|
|
|
|
+ ];
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ console.log(videoInputDevices);
|
|
|
|
+ if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
|
|
|
|
+ selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
|
|
|
|
+ }
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const startCamera = async () => {
|
|
|
|
+ await getVideoInputDevices();
|
|
|
|
+
|
|
|
|
+ if (cameraStream === null) {
|
|
|
|
+ camera = true;
|
|
|
|
+ await tick();
|
|
|
|
+ try {
|
|
|
|
+ await startVideoStream();
|
|
|
|
+ } catch (err) {
|
|
|
|
+ console.error('Error accessing webcam: ', err);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const startVideoStream = async () => {
|
|
|
|
+ const video = document.getElementById('camera-feed');
|
|
|
|
+ if (video) {
|
|
|
|
+ if (selectedVideoInputDeviceId === 'screen') {
|
|
|
|
+ cameraStream = await navigator.mediaDevices.getDisplayMedia({
|
|
|
|
+ video: {
|
|
|
|
+ cursor: 'always'
|
|
|
|
+ },
|
|
|
|
+ audio: false
|
|
|
|
+ });
|
|
|
|
+ } else {
|
|
|
|
+ cameraStream = await navigator.mediaDevices.getUserMedia({
|
|
|
|
+ video: {
|
|
|
|
+ deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
|
|
|
|
+ }
|
|
|
|
+ });
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (cameraStream) {
|
|
|
|
+ await getVideoInputDevices();
|
|
|
|
+ video.srcObject = cameraStream;
|
|
|
|
+ await video.play();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const stopVideoStream = async () => {
|
|
|
|
+ if (cameraStream) {
|
|
|
|
+ const tracks = cameraStream.getTracks();
|
|
|
|
+ tracks.forEach((track) => track.stop());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ cameraStream = null;
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const takeScreenshot = () => {
|
|
|
|
+ const video = document.getElementById('camera-feed');
|
|
|
|
+ const canvas = document.getElementById('camera-canvas');
|
|
|
|
+
|
|
|
|
+ if (!canvas) {
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ const context = canvas.getContext('2d');
|
|
|
|
+
|
|
|
|
+ // Make the canvas match the video dimensions
|
|
|
|
+ canvas.width = video.videoWidth;
|
|
|
|
+ canvas.height = video.videoHeight;
|
|
|
|
+
|
|
|
|
+ // Draw the image from the video onto the canvas
|
|
|
|
+ context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
|
|
|
|
+
|
|
|
|
+ // Convert the canvas to a data base64 URL and console log it
|
|
|
|
+ const dataURL = canvas.toDataURL('image/png');
|
|
|
|
+ console.log(dataURL);
|
|
|
|
+
|
|
|
|
+ return dataURL;
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const stopCamera = async () => {
|
|
|
|
+ await stopVideoStream();
|
|
|
|
+ camera = false;
|
|
|
|
+ };
|
|
|
|
+
|
|
const MIN_DECIBELS = -45;
|
|
const MIN_DECIBELS = -45;
|
|
const VISUALIZER_BUFFER_LENGTH = 300;
|
|
const VISUALIZER_BUFFER_LENGTH = 300;
|
|
|
|
|
|
@@ -55,15 +181,6 @@
|
|
return Math.sqrt(sumSquares / data.length);
|
|
return Math.sqrt(sumSquares / data.length);
|
|
};
|
|
};
|
|
|
|
|
|
- const normalizeRMS = (rms) => {
|
|
|
|
- rms = rms * 10;
|
|
|
|
- const exp = 1.5; // Adjust exponent value; values greater than 1 expand larger numbers more and compress smaller numbers more
|
|
|
|
- const scaledRMS = Math.pow(rms, exp);
|
|
|
|
-
|
|
|
|
- // Scale between 0.01 (1%) and 1.0 (100%)
|
|
|
|
- return Math.min(1.0, Math.max(0.01, scaledRMS));
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
const analyseAudio = (stream) => {
|
|
const analyseAudio = (stream) => {
|
|
const audioContext = new AudioContext();
|
|
const audioContext = new AudioContext();
|
|
const audioStreamSource = audioContext.createMediaStreamSource(stream);
|
|
const audioStreamSource = audioContext.createMediaStreamSource(stream);
|
|
@@ -83,12 +200,9 @@
|
|
const detectSound = () => {
|
|
const detectSound = () => {
|
|
const processFrame = () => {
|
|
const processFrame = () => {
|
|
if (!mediaRecorder || !$showCallOverlay) {
|
|
if (!mediaRecorder || !$showCallOverlay) {
|
|
- if (mediaRecorder) {
|
|
|
|
- mediaRecorder.stop();
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
+
|
|
analyser.getByteTimeDomainData(timeDomainData);
|
|
analyser.getByteTimeDomainData(timeDomainData);
|
|
analyser.getByteFrequencyData(domainData);
|
|
analyser.getByteFrequencyData(domainData);
|
|
|
|
|
|
@@ -98,9 +212,12 @@
|
|
// Check if initial speech/noise has started
|
|
// Check if initial speech/noise has started
|
|
const hasSound = domainData.some((value) => value > 0);
|
|
const hasSound = domainData.some((value) => value > 0);
|
|
if (hasSound) {
|
|
if (hasSound) {
|
|
- stopAllAudio();
|
|
|
|
hasStartedSpeaking = true;
|
|
hasStartedSpeaking = true;
|
|
lastSoundTime = Date.now();
|
|
lastSoundTime = Date.now();
|
|
|
|
+
|
|
|
|
+ // BIG RED TEXT
|
|
|
|
+ console.log('%c%s', 'color: red; font-size: 20px;', '🔊 Sound detected');
|
|
|
|
+ stopAllAudio();
|
|
}
|
|
}
|
|
|
|
|
|
// Start silence detection only after initial speech/noise has been detected
|
|
// Start silence detection only after initial speech/noise has been detected
|
|
@@ -123,35 +240,94 @@
|
|
detectSound();
|
|
detectSound();
|
|
};
|
|
};
|
|
|
|
|
|
- const stopAllAudio = () => {
|
|
|
|
|
|
+ const transcribeHandler = async (audioBlob) => {
|
|
|
|
+ // Create a blob from the audio chunks
|
|
|
|
+
|
|
|
|
+ await tick();
|
|
|
|
+ const file = blobToFile(audioBlob, 'recording.wav');
|
|
|
|
+
|
|
|
|
+ const res = await transcribeAudio(localStorage.token, file).catch((error) => {
|
|
|
|
+ toast.error(error);
|
|
|
|
+ return null;
|
|
|
|
+ });
|
|
|
|
+
|
|
|
|
+ if (res) {
|
|
|
|
+ console.log(res.text);
|
|
|
|
+
|
|
|
|
+ if (res.text !== '') {
|
|
|
|
+ const _responses = await submitPrompt(res.text, { _raw: true });
|
|
|
|
+ console.log(_responses);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const stopAllAudio = async () => {
|
|
|
|
+ interrupted = true;
|
|
|
|
+
|
|
|
|
+ if (chatStreaming) {
|
|
|
|
+ stopResponse();
|
|
|
|
+ }
|
|
|
|
+
|
|
if (currentUtterance) {
|
|
if (currentUtterance) {
|
|
speechSynthesis.cancel();
|
|
speechSynthesis.cancel();
|
|
currentUtterance = null;
|
|
currentUtterance = null;
|
|
}
|
|
}
|
|
- if (assistantAudio[assistantAudioIdx]) {
|
|
|
|
- assistantAudio[assistantAudioIdx].pause();
|
|
|
|
- assistantAudio[assistantAudioIdx].currentTime = 0;
|
|
|
|
- }
|
|
|
|
|
|
|
|
- const audioElement = document.getElementById('audioElement');
|
|
|
|
|
|
+ await tick();
|
|
|
|
+ audioQueue = [];
|
|
|
|
+ await tick();
|
|
|
|
|
|
|
|
+ const audioElement = document.getElementById('audioElement');
|
|
if (audioElement) {
|
|
if (audioElement) {
|
|
audioElement.pause();
|
|
audioElement.pause();
|
|
audioElement.currentTime = 0;
|
|
audioElement.currentTime = 0;
|
|
}
|
|
}
|
|
|
|
+
|
|
assistantSpeaking = false;
|
|
assistantSpeaking = false;
|
|
};
|
|
};
|
|
|
|
|
|
- const playAudio = (idx) => {
|
|
|
|
|
|
+ const speakSpeechSynthesisHandler = (content) => {
|
|
|
|
+ if ($showCallOverlay) {
|
|
|
|
+ return new Promise((resolve) => {
|
|
|
|
+ let voices = [];
|
|
|
|
+ const getVoicesLoop = setInterval(async () => {
|
|
|
|
+ voices = await speechSynthesis.getVoices();
|
|
|
|
+ if (voices.length > 0) {
|
|
|
|
+ clearInterval(getVoicesLoop);
|
|
|
|
+
|
|
|
|
+ const voice =
|
|
|
|
+ voices
|
|
|
|
+ ?.filter(
|
|
|
|
+ (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
|
|
|
+ )
|
|
|
|
+ ?.at(0) ?? undefined;
|
|
|
|
+
|
|
|
|
+ currentUtterance = new SpeechSynthesisUtterance(content);
|
|
|
|
+
|
|
|
|
+ if (voice) {
|
|
|
|
+ currentUtterance.voice = voice;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ speechSynthesis.speak(currentUtterance);
|
|
|
|
+ currentUtterance.onend = async (e) => {
|
|
|
|
+ await new Promise((r) => setTimeout(r, 100));
|
|
|
|
+ resolve(e);
|
|
|
|
+ };
|
|
|
|
+ }
|
|
|
|
+ }, 100);
|
|
|
|
+ });
|
|
|
|
+ } else {
|
|
|
|
+ return Promise.resolve();
|
|
|
|
+ }
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const playAudio = (audio) => {
|
|
if ($showCallOverlay) {
|
|
if ($showCallOverlay) {
|
|
- return new Promise((res) => {
|
|
|
|
- assistantAudioIdx = idx;
|
|
|
|
|
|
+ return new Promise((resolve) => {
|
|
const audioElement = document.getElementById('audioElement');
|
|
const audioElement = document.getElementById('audioElement');
|
|
- const audio = assistantAudio[idx];
|
|
|
|
|
|
|
|
if (audioElement) {
|
|
if (audioElement) {
|
|
- audioElement.src = audio.src; // Assume `assistantAudio` has objects with a `src` property
|
|
|
|
-
|
|
|
|
|
|
+ audioElement.src = audio.src;
|
|
audioElement.muted = true;
|
|
audioElement.muted = true;
|
|
|
|
|
|
audioElement
|
|
audioElement
|
|
@@ -160,17 +336,12 @@
|
|
audioElement.muted = false;
|
|
audioElement.muted = false;
|
|
})
|
|
})
|
|
.catch((error) => {
|
|
.catch((error) => {
|
|
- toast.error(error);
|
|
|
|
|
|
+ console.error(error);
|
|
});
|
|
});
|
|
|
|
|
|
audioElement.onended = async (e) => {
|
|
audioElement.onended = async (e) => {
|
|
- await new Promise((r) => setTimeout(r, 300));
|
|
|
|
-
|
|
|
|
- if (Object.keys(assistantAudio).length - 1 === idx) {
|
|
|
|
- assistantSpeaking = false;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- res(e);
|
|
|
|
|
|
+ await new Promise((r) => setTimeout(r, 100));
|
|
|
|
+ resolve(e);
|
|
};
|
|
};
|
|
}
|
|
}
|
|
});
|
|
});
|
|
@@ -179,147 +350,57 @@
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
|
|
- const getOpenAISpeech = async (text) => {
|
|
|
|
- const res = await synthesizeOpenAISpeech(
|
|
|
|
- localStorage.token,
|
|
|
|
- $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
|
|
|
|
- text
|
|
|
|
- ).catch((error) => {
|
|
|
|
- toast.error(error);
|
|
|
|
|
|
+ const playAudioHandler = async () => {
|
|
|
|
+ console.log('playAudioHandler', audioQueue, assistantSpeaking, audioQueue.length > 0);
|
|
|
|
+ if (!assistantSpeaking && !interrupted && audioQueue.length > 0) {
|
|
|
|
+ assistantSpeaking = true;
|
|
|
|
+ const audioToPlay = audioQueue.shift(); // Shift the audio out from queue before playing.
|
|
|
|
+ audioQueue = audioQueue;
|
|
|
|
+ await playAudio(audioToPlay);
|
|
assistantSpeaking = false;
|
|
assistantSpeaking = false;
|
|
- return null;
|
|
|
|
- });
|
|
|
|
-
|
|
|
|
- if (res) {
|
|
|
|
- const blob = await res.blob();
|
|
|
|
- const blobUrl = URL.createObjectURL(blob);
|
|
|
|
- const audio = new Audio(blobUrl);
|
|
|
|
- assistantAudio = audio;
|
|
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
|
|
- const transcribeHandler = async (audioBlob) => {
|
|
|
|
- // Create a blob from the audio chunks
|
|
|
|
-
|
|
|
|
- await tick();
|
|
|
|
- const file = blobToFile(audioBlob, 'recording.wav');
|
|
|
|
-
|
|
|
|
- const res = await transcribeAudio(localStorage.token, file).catch((error) => {
|
|
|
|
- toast.error(error);
|
|
|
|
- return null;
|
|
|
|
- });
|
|
|
|
-
|
|
|
|
- if (res) {
|
|
|
|
- console.log(res.text);
|
|
|
|
-
|
|
|
|
- if (res.text !== '') {
|
|
|
|
- const _responses = await submitPrompt(res.text, { _raw: true });
|
|
|
|
- console.log(_responses);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
- const assistantSpeakingHandler = async (content) => {
|
|
|
|
- assistantSpeaking = true;
|
|
|
|
-
|
|
|
|
- if (modelId && ($settings?.showEmojiInCall ?? false)) {
|
|
|
|
- console.log('Generating emoji');
|
|
|
|
- const res = await generateEmoji(localStorage.token, modelId, content, chatId).catch(
|
|
|
|
- (error) => {
|
|
|
|
- console.error(error);
|
|
|
|
- return null;
|
|
|
|
- }
|
|
|
|
- );
|
|
|
|
|
|
+ const setContentAudio = async (content, idx) => {
|
|
|
|
+ if (assistantSentenceAudios[idx] === undefined) {
|
|
|
|
+ console.log('%c%s', 'color: red; font-size: 20px;', content);
|
|
|
|
+
|
|
|
|
+ assistantSentenceAudios[idx] = null;
|
|
|
|
+ const res = await synthesizeOpenAISpeech(
|
|
|
|
+ localStorage.token,
|
|
|
|
+ $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
|
|
|
|
+ content
|
|
|
|
+ ).catch((error) => {
|
|
|
|
+ toast.error(error);
|
|
|
|
+ assistantSpeaking = false;
|
|
|
|
+ return null;
|
|
|
|
+ });
|
|
|
|
|
|
if (res) {
|
|
if (res) {
|
|
- console.log(res);
|
|
|
|
- if (/\p{Extended_Pictographic}/u.test(res)) {
|
|
|
|
- emoji = res.match(/\p{Extended_Pictographic}/gu)[0];
|
|
|
|
- }
|
|
|
|
|
|
+ const blob = await res.blob();
|
|
|
|
+ const blobUrl = URL.createObjectURL(blob);
|
|
|
|
+ const audio = new Audio(blobUrl);
|
|
|
|
+ assistantSentenceAudios[idx] = audio;
|
|
|
|
+ audioQueue.push(audio);
|
|
|
|
+ audioQueue = audioQueue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+ };
|
|
|
|
|
|
- if (($config.audio.tts.engine ?? '') == '') {
|
|
|
|
- let voices = [];
|
|
|
|
- const getVoicesLoop = setInterval(async () => {
|
|
|
|
- voices = await speechSynthesis.getVoices();
|
|
|
|
- if (voices.length > 0) {
|
|
|
|
- clearInterval(getVoicesLoop);
|
|
|
|
-
|
|
|
|
- const voice =
|
|
|
|
- voices
|
|
|
|
- ?.filter(
|
|
|
|
- (v) => v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
|
|
|
- )
|
|
|
|
- ?.at(0) ?? undefined;
|
|
|
|
-
|
|
|
|
- currentUtterance = new SpeechSynthesisUtterance(content);
|
|
|
|
-
|
|
|
|
- if (voice) {
|
|
|
|
- currentUtterance.voice = voice;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- speechSynthesis.speak(currentUtterance);
|
|
|
|
-
|
|
|
|
- currentUtterance.onend = async () => {
|
|
|
|
- assistantSpeaking = false;
|
|
|
|
- };
|
|
|
|
- }
|
|
|
|
- }, 100);
|
|
|
|
- } else if ($config.audio.tts.engine === 'openai') {
|
|
|
|
- console.log('openai');
|
|
|
|
-
|
|
|
|
- const sentences = extractSentences(content).reduce((mergedTexts, currentText) => {
|
|
|
|
- const lastIndex = mergedTexts.length - 1;
|
|
|
|
- if (lastIndex >= 0) {
|
|
|
|
- const previousText = mergedTexts[lastIndex];
|
|
|
|
- const wordCount = previousText.split(/\s+/).length;
|
|
|
|
- if (wordCount < 2) {
|
|
|
|
- mergedTexts[lastIndex] = previousText + ' ' + currentText;
|
|
|
|
- } else {
|
|
|
|
- mergedTexts.push(currentText);
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- mergedTexts.push(currentText);
|
|
|
|
- }
|
|
|
|
- return mergedTexts;
|
|
|
|
- }, []);
|
|
|
|
-
|
|
|
|
- console.log(sentences);
|
|
|
|
-
|
|
|
|
- let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
|
|
|
-
|
|
|
|
- for (const [idx, sentence] of sentences.entries()) {
|
|
|
|
- const res = await synthesizeOpenAISpeech(
|
|
|
|
- localStorage.token,
|
|
|
|
- $settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice,
|
|
|
|
- sentence
|
|
|
|
- ).catch((error) => {
|
|
|
|
- toast.error(error);
|
|
|
|
|
|
+ const stopRecordingCallback = async (_continue = true) => {
|
|
|
|
+ console.log('%c%s', 'color: red; font-size: 20px;', '🚨 stopRecordingCallback 🚨');
|
|
|
|
|
|
- assistantSpeaking = false;
|
|
|
|
- return null;
|
|
|
|
- });
|
|
|
|
|
|
+ if ($showCallOverlay) {
|
|
|
|
+ // deep copy the audioChunks array
|
|
|
|
+ const _audioChunks = audioChunks.slice(0);
|
|
|
|
|
|
- if (res) {
|
|
|
|
- const blob = await res.blob();
|
|
|
|
- const blobUrl = URL.createObjectURL(blob);
|
|
|
|
- const audio = new Audio(blobUrl);
|
|
|
|
- assistantAudio[idx] = audio;
|
|
|
|
- lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
|
|
|
|
|
+ audioChunks = [];
|
|
|
|
+ mediaRecorder = false;
|
|
|
|
|
|
- if (idx === sentences.length - 1) {
|
|
|
|
- lastPlayedAudioPromise.then(() => {
|
|
|
|
- assistantSpeaking = false;
|
|
|
|
- });
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ if (_continue) {
|
|
|
|
+ startRecording();
|
|
}
|
|
}
|
|
- }
|
|
|
|
- };
|
|
|
|
|
|
|
|
- const stopRecordingCallback = async (_continue = true) => {
|
|
|
|
- if ($showCallOverlay) {
|
|
|
|
if (confirmed) {
|
|
if (confirmed) {
|
|
loading = true;
|
|
loading = true;
|
|
emoji = null;
|
|
emoji = null;
|
|
@@ -335,18 +416,12 @@
|
|
];
|
|
];
|
|
}
|
|
}
|
|
|
|
|
|
- const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
|
|
|
|
|
|
+ const audioBlob = new Blob(_audioChunks, { type: 'audio/wav' });
|
|
await transcribeHandler(audioBlob);
|
|
await transcribeHandler(audioBlob);
|
|
|
|
|
|
confirmed = false;
|
|
confirmed = false;
|
|
loading = false;
|
|
loading = false;
|
|
}
|
|
}
|
|
- audioChunks = [];
|
|
|
|
- mediaRecorder = false;
|
|
|
|
-
|
|
|
|
- if (_continue) {
|
|
|
|
- startRecording();
|
|
|
|
- }
|
|
|
|
} else {
|
|
} else {
|
|
audioChunks = [];
|
|
audioChunks = [];
|
|
mediaRecorder = false;
|
|
mediaRecorder = false;
|
|
@@ -368,113 +443,11 @@
|
|
};
|
|
};
|
|
mediaRecorder.onstop = async () => {
|
|
mediaRecorder.onstop = async () => {
|
|
console.log('Recording stopped');
|
|
console.log('Recording stopped');
|
|
-
|
|
|
|
await stopRecordingCallback();
|
|
await stopRecordingCallback();
|
|
};
|
|
};
|
|
mediaRecorder.start();
|
|
mediaRecorder.start();
|
|
};
|
|
};
|
|
|
|
|
|
- let videoInputDevices = [];
|
|
|
|
- let selectedVideoInputDeviceId = null;
|
|
|
|
-
|
|
|
|
- const getVideoInputDevices = async () => {
|
|
|
|
- const devices = await navigator.mediaDevices.enumerateDevices();
|
|
|
|
- videoInputDevices = devices.filter((device) => device.kind === 'videoinput');
|
|
|
|
-
|
|
|
|
- if (!!navigator.mediaDevices.getDisplayMedia) {
|
|
|
|
- videoInputDevices = [
|
|
|
|
- ...videoInputDevices,
|
|
|
|
- {
|
|
|
|
- deviceId: 'screen',
|
|
|
|
- label: 'Screen Share'
|
|
|
|
- }
|
|
|
|
- ];
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- console.log(videoInputDevices);
|
|
|
|
- if (selectedVideoInputDeviceId === null && videoInputDevices.length > 0) {
|
|
|
|
- selectedVideoInputDeviceId = videoInputDevices[0].deviceId;
|
|
|
|
- }
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
- const startCamera = async () => {
|
|
|
|
- await getVideoInputDevices();
|
|
|
|
-
|
|
|
|
- if (cameraStream === null) {
|
|
|
|
- camera = true;
|
|
|
|
- await tick();
|
|
|
|
- try {
|
|
|
|
- await startVideoStream();
|
|
|
|
- } catch (err) {
|
|
|
|
- console.error('Error accessing webcam: ', err);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
- const startVideoStream = async () => {
|
|
|
|
- const video = document.getElementById('camera-feed');
|
|
|
|
- if (video) {
|
|
|
|
- if (selectedVideoInputDeviceId === 'screen') {
|
|
|
|
- cameraStream = await navigator.mediaDevices.getDisplayMedia({
|
|
|
|
- video: {
|
|
|
|
- cursor: 'always'
|
|
|
|
- },
|
|
|
|
- audio: false
|
|
|
|
- });
|
|
|
|
- } else {
|
|
|
|
- cameraStream = await navigator.mediaDevices.getUserMedia({
|
|
|
|
- video: {
|
|
|
|
- deviceId: selectedVideoInputDeviceId ? { exact: selectedVideoInputDeviceId } : undefined
|
|
|
|
- }
|
|
|
|
- });
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (cameraStream) {
|
|
|
|
- await getVideoInputDevices();
|
|
|
|
- video.srcObject = cameraStream;
|
|
|
|
- await video.play();
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
- const stopVideoStream = async () => {
|
|
|
|
- if (cameraStream) {
|
|
|
|
- const tracks = cameraStream.getTracks();
|
|
|
|
- tracks.forEach((track) => track.stop());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- cameraStream = null;
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
- const takeScreenshot = () => {
|
|
|
|
- const video = document.getElementById('camera-feed');
|
|
|
|
- const canvas = document.getElementById('camera-canvas');
|
|
|
|
-
|
|
|
|
- if (!canvas) {
|
|
|
|
- return;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- const context = canvas.getContext('2d');
|
|
|
|
-
|
|
|
|
- // Make the canvas match the video dimensions
|
|
|
|
- canvas.width = video.videoWidth;
|
|
|
|
- canvas.height = video.videoHeight;
|
|
|
|
-
|
|
|
|
- // Draw the image from the video onto the canvas
|
|
|
|
- context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
|
|
|
|
-
|
|
|
|
- // Convert the canvas to a data base64 URL and console log it
|
|
|
|
- const dataURL = canvas.toDataURL('image/png');
|
|
|
|
- console.log(dataURL);
|
|
|
|
-
|
|
|
|
- return dataURL;
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
- const stopCamera = async () => {
|
|
|
|
- await stopVideoStream();
|
|
|
|
- camera = false;
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
$: if ($showCallOverlay) {
|
|
$: if ($showCallOverlay) {
|
|
startRecording();
|
|
startRecording();
|
|
} else {
|
|
} else {
|
|
@@ -483,30 +456,73 @@
|
|
stopRecordingCallback(false);
|
|
stopRecordingCallback(false);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ $: {
|
|
|
|
+ if (audioQueue.length > 0 && !assistantSpeaking) {
|
|
|
|
+ playAudioHandler();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
onMount(() => {
|
|
onMount(() => {
|
|
console.log(eventTarget);
|
|
console.log(eventTarget);
|
|
|
|
|
|
eventTarget.addEventListener('chat:start', async (e) => {
|
|
eventTarget.addEventListener('chat:start', async (e) => {
|
|
- console.log('Chat start event:', e.detail);
|
|
|
|
- message = '';
|
|
|
|
|
|
+ console.log('Chat start event:', e);
|
|
|
|
+ interrupted = false;
|
|
|
|
+
|
|
|
|
+ assistantMessage = '';
|
|
|
|
+ assistantSentenceIdx = -1;
|
|
|
|
+ assistantSentenceAudios = {}; // Reset audio tracking
|
|
|
|
+ audioQueue = []; // Clear the audio queue
|
|
|
|
+
|
|
|
|
+ chatStreaming = true;
|
|
});
|
|
});
|
|
|
|
|
|
eventTarget.addEventListener('chat', async (e) => {
|
|
eventTarget.addEventListener('chat', async (e) => {
|
|
const { content } = e.detail;
|
|
const { content } = e.detail;
|
|
|
|
+ assistantMessage += content;
|
|
|
|
+ await tick();
|
|
|
|
+
|
|
|
|
+ if (!interrupted) {
|
|
|
|
+ if ($config.audio.tts.engine !== '') {
|
|
|
|
+ assistantSentenceIdx = assistantSentences.length - 2;
|
|
|
|
+
|
|
|
|
+ if (assistantSentenceIdx >= 0 && !assistantSentenceAudios[assistantSentenceIdx]) {
|
|
|
|
+ await tick();
|
|
|
|
+ setContentAudio(assistantSentences[assistantSentenceIdx], assistantSentenceIdx);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
- message += content;
|
|
|
|
- console.log('Chat event:', message);
|
|
|
|
|
|
+ chatStreaming = true;
|
|
});
|
|
});
|
|
|
|
|
|
eventTarget.addEventListener('chat:finish', async (e) => {
|
|
eventTarget.addEventListener('chat:finish', async (e) => {
|
|
- console.log('Chat finish event:', e.detail);
|
|
|
|
- message = '';
|
|
|
|
|
|
+ chatStreaming = false;
|
|
|
|
+ loading = false;
|
|
|
|
+
|
|
|
|
+ console.log('Chat finish event:', e);
|
|
|
|
+ await tick();
|
|
|
|
+
|
|
|
|
+ if (!interrupted) {
|
|
|
|
+ if ($config.audio.tts.engine !== '') {
|
|
|
|
+ for (const [idx, sentence] of assistantSentences.entries()) {
|
|
|
|
+ if (!assistantSentenceAudios[idx]) {
|
|
|
|
+ await tick();
|
|
|
|
+ setContentAudio(sentence, idx);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ emoji = generateEmoji(localStorage.token, modelId, assistantMessage);
|
|
|
|
+ speakSpeechSynthesisHandler(assistantMessage);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
});
|
|
});
|
|
});
|
|
});
|
|
</script>
|
|
</script>
|
|
|
|
|
|
|
|
+<audio id="audioElement" src="" style="display: none;" />
|
|
|
|
+
|
|
{#if $showCallOverlay}
|
|
{#if $showCallOverlay}
|
|
- <audio id="audioElement" src="" style="display: none;" />
|
|
|
|
<div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
|
|
<div class=" absolute w-full h-screen max-h-[100dvh] flex z-[999] overflow-hidden">
|
|
<div
|
|
<div
|
|
class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
|
|
class="absolute w-full h-screen max-h-[100dvh] bg-white text-gray-700 dark:bg-black dark:text-gray-300 flex justify-center"
|