Some sample code that does continuous recognition from a microphone. I can make a pull request if you're interested.
The intents are longer than in openWakeWord so alternating between recording and detection will not work very well. This code uses the callback mode of PyAudio to put the input chunks in a queue and then keeps a buffer that is two seconds in length for the detection.
# Continuous recognition from a microphone
import pyaudio
import numpy as np
import time
import queue
from openspeechtointent.model import CitrinetModel
# Load model (this will also download the model if it is not already present)
mdl = CitrinetModel()
# Define some simple intents
intents = [
"turn on the lights",
"turn off the lights",
"pause the music",
"set a 5 minute timer",
"set a 10 minute timer",
"remind me to buy apples tomorrow",
"remind me to buy pears tomorrow",
]
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1280
# 2 second buffer
nchunk = 25
audio_interface = pyaudio.PyAudio()
class MicrophoneBuffer:
def __init__(self, nchunk):
self.nchunk = nchunk
buf_len = nchunk * CHUNK
self.buffer = np.zeros(buf_len, dtype=np.int16)
# Might need this to avoid some checks for an empty buffer
# self.buffer[:] = 2
self.idx = 0
self.buffer_queue = queue.Queue()
self.mic_stream = audio_interface.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
stream_callback=self.callback,
)
# Using PyAudio callback mode, this function is called in a separate thread
def callback(self, in_data, frame_count, time_info, status):
self.buffer_queue.put(in_data)
return (None, pyaudio.paContinue)
def get(self):
while not self.buffer_queue.empty():
audio_data = np.frombuffer(self.buffer_queue.get(), dtype=np.int16)
# Initial filling of the audio content buffer
if self.idx < self.nchunk:
loc = self.idx * CHUNK
buffer[loc : loc + CHUNK] = audio_data[:]
self.idx += 1
else:
self.buffer = np.roll(self.buffer, -CHUNK)
self.buffer[-CHUNK - 1 : -1] = audio_data[:]
return self.buffer
def reset(self):
self.buffer[:] = 0
self.idx = 0
mic = MicrophoneBuffer(nchunk)
while True:
buffer = mic.get()
ts3 = time.perf_counter_ns()
matched_intents, scores, durations = mdl.match_intents(
buffer, intents, softmax_scores=True
)
ts4 = time.perf_counter_ns()
for intent, score, duration in zip(matched_intents, scores, durations):
if score > 0.45:
print(f"Intent: {intent}, Score: {score}, Duration: {duration}")
print("match time (ms)", (ts4 - ts3) / 1e6)
# Clear the buffer after a match
mic.reset()
Some sample code that does continuous recognition from a microphone. I can make a pull request if you're interested.
The intents are longer than in openWakeWord so alternating between recording and detection will not work very well. This code uses the callback mode of PyAudio to put the input chunks in a queue and then keeps a buffer that is two seconds in length for the detection.