From 0e6e568c4badd903a869f40186d07db1246f4c69 Mon Sep 17 00:00:00 2001 From: Luke Oliff Date: Wed, 6 Mar 2024 23:09:46 +0000 Subject: [PATCH 1/3] feat: create failsafe for incomplete speech final --- app/components/Conversation.tsx | 56 ++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/app/components/Conversation.tsx b/app/components/Conversation.tsx index 1199fcc..adb40fd 100644 --- a/app/components/Conversation.tsx +++ b/app/components/Conversation.tsx @@ -211,7 +211,9 @@ export default function Conversation(): JSX.Element { useEffect(() => { const onTranscript = (data: LiveTranscriptionEvent) => { let content = utteranceText(data); - if (content || data.speech_final) { + + // i only want an empty transcript part if it is speech_final + if (content !== "" || data.speech_final) { /** * use an outbound message queue to build up the unsent utterance */ @@ -240,7 +242,7 @@ export default function Conversation(): JSX.Element { }; }, [addTranscriptPart, connection]); - const [currentUtterance, setCurrentUtterance] = useState(""); + const [currentUtterance, setCurrentUtterance] = useState(); const getCurrentUtterance = useCallback(() => { return transcriptParts.filter(({ is_final, speech_final }, i, arr) => { @@ -248,6 +250,8 @@ export default function Conversation(): JSX.Element { }); }, [transcriptParts]); + const [lastUtterance, setLastUtterance] = useState(); + useEffect(() => { const parts = getCurrentUtterance(); const last = parts[parts.length - 1]; @@ -256,20 +260,64 @@ export default function Conversation(): JSX.Element { .join(" ") .trim(); - if (content === "") return; + /** + * if the entire utterance is empty, don't go any further + * for example, many many many empty transcription responses + */ + if (!content) return; + /** + * display the concatenated utterances + */ setCurrentUtterance(content); + /** + * record the last time we recieved a word + */ + if (last.text !== "") { + setLastUtterance(Date.now()); + } + + /** + * if the last part of the utterance, empty or not, is speech_final, send to the LLM. + */ if (last && last.speech_final) { append({ role: "user", content, }); clearTranscriptParts(); - setCurrentUtterance(""); + setCurrentUtterance(undefined); } }, [getCurrentUtterance, clearTranscriptParts, append]); + /** + * incomplete speech final failsafe + */ + useEffect(() => { + if (!lastUtterance || !currentUtterance) return; + + const interval = setInterval(() => { + const timeLived = Date.now() - lastUtterance; + + // console.log(timeLived, timeLived > 2000, currentUtterance); + + if (currentUtterance !== "" && timeLived > 2000) { + append({ + role: "user", + content: currentUtterance, + }); + clearTranscriptParts(); + setCurrentUtterance(undefined); + } + }, 100); + + return () => { + clearInterval(interval); + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [lastUtterance, currentUtterance]); + /** * barge-in */ From 7f2202e6bbb0d02636fc394dd81e31e9f757398c Mon Sep 17 00:00:00 2001 From: Luke Oliff Date: Wed, 6 Mar 2024 23:16:41 +0000 Subject: [PATCH 2/3] chore: add logging to failsafe --- app/components/Conversation.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/components/Conversation.tsx b/app/components/Conversation.tsx index adb40fd..bf1f37e 100644 --- a/app/components/Conversation.tsx +++ b/app/components/Conversation.tsx @@ -300,9 +300,11 @@ export default function Conversation(): JSX.Element { const interval = setInterval(() => { const timeLived = Date.now() - lastUtterance; - // console.log(timeLived, timeLived > 2000, currentUtterance); + console.log(timeLived, timeLived > 2000, currentUtterance); if (currentUtterance !== "" && timeLived > 2000) { + console.log("failsafe fires! pew pew!!"); + append({ role: "user", content: currentUtterance, From e22fba237b83ebfe860b275ff8266fe8c0ad42de Mon Sep 17 00:00:00 2001 From: Luke Oliff Date: Wed, 6 Mar 2024 23:54:53 +0000 Subject: [PATCH 3/3] chore: reduce failsafe timeout --- app/components/Conversation.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/components/Conversation.tsx b/app/components/Conversation.tsx index bf1f37e..2dc2faf 100644 --- a/app/components/Conversation.tsx +++ b/app/components/Conversation.tsx @@ -300,9 +300,9 @@ export default function Conversation(): JSX.Element { const interval = setInterval(() => { const timeLived = Date.now() - lastUtterance; - console.log(timeLived, timeLived > 2000, currentUtterance); + console.log(timeLived, timeLived > 1500, currentUtterance); - if (currentUtterance !== "" && timeLived > 2000) { + if (currentUtterance !== "" && timeLived > 1500) { console.log("failsafe fires! pew pew!!"); append({