Skip to content

Features/refine chat window #345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/lib/helpers/types/conversationTypes.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
* @property {string} status - The conversation status.
* @property {Object} states - The conversation states.
* @property {string[]} tags - The conversation tags.
* @property {boolean?} is_realtime_enabled - Whether the realtime feature is enabled.
* @property {Date} updated_time - The conversation updated time.
* @property {Date} created_time - The conversation created time.
*/
Expand Down
11 changes: 0 additions & 11 deletions src/lib/services/conversation-service.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,6 @@ export async function getConversation(id, isLoadStates = false) {
return response.data;
}

/**
* Get conversation user
* @param {string} id
* @returns {Promise<import('$userTypes').UserModel>}
*/
export async function getConversationUser(id) {
let url = replaceUrl(endpoints.conversationUserUrl, {conversationId: id});
const response = await axios.get(url);
return response.data;
}

/**
* Get conversation list
* @param {import('$conversationTypes').ConversationFilter} filter
Expand Down
151 changes: 96 additions & 55 deletions src/lib/services/web-speech.js
Original file line number Diff line number Diff line change
@@ -1,75 +1,116 @@
// // https://developer.mozilla.org/en-US/docs/Web/API/Web_Speech_API/Using_the_Web_Speech_API
// @ts-ignore
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
const SpeechRecognitionEvent = window.SpeechRecognitionEvent || window.webkitSpeechRecognitionEvent;

const recognition = !navigator.userAgent.includes('Firefox') ? new SpeechRecognition() : null;
if (recognition) {
recognition.continuous = false;
recognition.lang = "en-US";
recognition.interimResults = false;
recognition.maxAlternatives = 1;
}


const synth = window.speechSynthesis;

const utterThis = new SpeechSynthesisUtterance();
utterThis.pitch = 1;
utterThis.rate = 1;

export const SPEECH_VOICES = [
"Microsoft Michelle Online (Natural) - English (United States)",
"Google US English"
];

export const webSpeech = {
/** @type {import('$conversationTypes').OnSpeechToTextDetected} */
/** @type {SpeechRecognition | null} */
recognition: null,

/** @type {import('$conversationTypes').OnSpeechToTextDetected} */
onSpeechToTextDetected: () => {},

start() {
if (recognition) {
recognition.start();
console.log("Ready to receive a voice command.");
onRecognitionStarted: () => {},

onRecognitionEnded: () => {},

/** @param {{continuous?: boolean, lang?: string, interimResults?: boolean, maxAlternatives?: number}} options */
start(options = {
continuous: false,
lang: "en-US",
interimResults: false,
maxAlternatives: 1
}) {
this.recognition = !navigator.userAgent.includes('Firefox') ? new SpeechRecognition() : null;
if (this.recognition == null) return;

this.recognition.continuous = options.continuous || false;
this.recognition.lang = options.lang || "en-US";
this.recognition.interimResults = options.interimResults || false;
this.recognition.maxAlternatives = options.maxAlternatives || 1;

this.recognition.onstart = () => {
console.log('Recognition starts...');
this.onRecognitionStarted?.();
};

this.recognition.onresult = (/** @type {any} */ event) => {
const len = event.results.length;
const text = event.results[len-1][0].transcript;
console.log(`Confidence: ${text} ${event.results[len-1][0].confidence}`);
this.onSpeechToTextDetected?.(text);
};

this.recognition.onsoundstart = () => {
console.log('Recognition sound start...');
};

this.recognition.onaudiostart = () => {
console.log('Recognition audio start...');
};

this.recognition.onspeechstart = () => {
console.log('Recognition speech start...');
};

this.recognition.onnomatch = () => {
console.log("I didn't recognize the voice.");
};

this.recognition.onerror = (/** @type {any} */ event) => {
console.log(`Error occurred in recognition: ${event.error}`);
};

this.recognition.onend = () => {
console.log('Recognition is ended.');
this.onRecognitionEnded?.();
};

try {
this.recognition.start();
} catch (err) {
console.log('Error when starting speach recognition...');
setTimeout(() => {
this.recognition.start();
}, 500);
}
},

abort() {
if (this.recognition) {
this.recognition.abort();
}
}
};

export const webSpeaker = {
/** @type {SpeechSynthesisUtterance | null} */
utter: null,

synth: window.speechSynthesis,

/** @param {string} transcript */
utter(transcript) {
setVoiceSynthesis();
utterThis.text = transcript
synth.speak(utterThis);
speak(transcript) {
this.utter = new SpeechSynthesisUtterance();
this.utter.pitch = 1;
this.utter.rate = 1;
this.utter.text = transcript;

// set voice
if (this.utter.voice == null) {
this.utter.voice = this.synth.getVoices().find(x => SPEECH_VOICES.includes(x.name)) || null;
}

this.synth.speak(this.utter);
},

stop() {
synth.cancel();
}
}

function setVoiceSynthesis() {
if (utterThis.voice == null) {
const voices = synth.getVoices();
for (let i = 0; i < voices.length; i++) {
if (SPEECH_VOICES.includes(voices[i].name)) {
utterThis.voice = voices[i];
console.log(voices[i].name);
break;
}
}
this.synth.cancel();
this.utter = null;
}
}

if (recognition) {
recognition.onresult = (/** @type {any} */ event) => {
const text = event.results[0][0].transcript;
console.log(`Confidence: ${text} ${event.results[0][0].confidence}`);
webSpeech.onSpeechToTextDetected(text);
};

recognition.onnomatch = (/** @type {any} */ event) => {
console.log("I didn't recognize that color.");
};

recognition.onerror = (/** @type {any} */ event) => {
console.log(`Error occurred in recognition: ${event.error}`);
};
}
};
44 changes: 32 additions & 12 deletions src/routes/chat/[agentId]/[conversationId]/chat-box.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
updateConversationMessage,
updateConversationTags,
getConversationFiles,
getConversationUser,
uploadConversationFiles,
getAddressOptions,
pinConversationToDashboard,
Expand Down Expand Up @@ -71,6 +70,7 @@
import InstantLog from './instant-log/instant-log.svelte';
import LocalStorageManager from '$lib/helpers/utils/storage-manager';
import { realtimeChat } from '$lib/services/realtime-chat-service';
import { webSpeech } from '$lib/services/web-speech';


const options = {
Expand Down Expand Up @@ -132,7 +132,6 @@

/** @type {any[]} */
let scrollbars = [];
let microphoneIcon = "microphone-off";

/** @type {import('$conversationTypes').ConversationModel} */
let conversation;
Expand Down Expand Up @@ -222,7 +221,7 @@
disableSpeech = navigator.userAgent.includes('Firefox');
conversation = await getConversation(params.conversationId, true);
dialogs = await getDialogs(params.conversationId, dialogCount);
conversationUser = await getConversationUser(params.conversationId);
conversationUser = conversation?.user;
selectedTags = conversation?.tags || [];
latestStateLog = conversation?.states;
initUserSentMessages(dialogs);
Expand Down Expand Up @@ -669,17 +668,38 @@
}
}

async function startListen() {
function startListen() {
if (disableSpeech) return;

if (!isListening) {
realtimeChat.start(params.agentId, params.conversationId);
isListening = true;
microphoneIcon = "microphone";
isListening = !isListening;
if (conversation?.is_realtime_enabled) {

if (isListening) {
realtimeChat.start(params.agentId, params.conversationId);
} else {
realtimeChat.stop();
}
} else {
realtimeChat.stop();
isListening = false;
microphoneIcon = "microphone-off";
webSpeech.onSpeechToTextDetected = (transcript) => {
if (!!!_.trim(transcript) || isSendingMsg) {
return;
}

sendChatMessage(transcript);
};
webSpeech.onRecognitionStarted = () => {
isListening = true;
};
webSpeech.onRecognitionEnded = () => {
isListening = false;
};

if (isListening) {
webSpeech.start({ continuous: true });
} else {
webSpeech.abort();
}

}
}

Expand Down Expand Up @@ -1819,7 +1839,7 @@
disabled={isSendingMsg || isThinking || disableAction}
on:click={() => startListen()}
>
<i class="mdi mdi-{microphoneIcon} md-36" />
<i class="mdi mdi-{isListening ? 'microphone' : 'microphone-off'} md-36" />
</button>
{/if}
</div>
Expand Down