Continuous recognition from microphone

Some sample code that does continuous recognition from a microphone.  I can make a pull request if you're interested.

The intents are longer than in openWakeWord so alternating between recording and detection will not work very well.  This code uses the callback mode of PyAudio to put the input chunks in a queue and then keeps a buffer that is two seconds in length for the detection.

```
# Continuous recognition from a microphone

import pyaudio
import numpy as np
import time
import queue

from openspeechtointent.model import CitrinetModel

# Load model (this will also download the model if it is not already present)
mdl = CitrinetModel()

# Define some simple intents
intents = [
    "turn on the lights",
    "turn off the lights",
    "pause the music",
    "set a 5 minute timer",
    "set a 10 minute timer",
    "remind me to buy apples tomorrow",
    "remind me to buy pears tomorrow",
]


FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1280

# 2 second buffer
nchunk = 25

audio_interface = pyaudio.PyAudio()

class MicrophoneBuffer:
    def __init__(self, nchunk):
        self.nchunk = nchunk
        buf_len = nchunk * CHUNK
        self.buffer = np.zeros(buf_len, dtype=np.int16)
        # Might need this to avoid some checks for an empty buffer
        # self.buffer[:] = 2
        self.idx = 0
        self.buffer_queue = queue.Queue()
        self.mic_stream = audio_interface.open(
            format=FORMAT,
            channels=CHANNELS,
            rate=RATE,
            input=True,
            frames_per_buffer=CHUNK,
            stream_callback=self.callback,
        )

    # Using PyAudio callback mode, this function is called in a separate thread
    def callback(self, in_data, frame_count, time_info, status):
        self.buffer_queue.put(in_data)
        return (None, pyaudio.paContinue)

    def get(self):
        while not self.buffer_queue.empty():
            audio_data = np.frombuffer(self.buffer_queue.get(), dtype=np.int16)
            # Initial filling of the audio content buffer
            if self.idx < self.nchunk:
                loc = self.idx * CHUNK
                buffer[loc : loc + CHUNK] = audio_data[:]
                self.idx += 1
            else:
                self.buffer = np.roll(self.buffer, -CHUNK)
                self.buffer[-CHUNK - 1 : -1] = audio_data[:]
        return self.buffer

    def reset(self):
        self.buffer[:] = 0
        self.idx = 0


mic = MicrophoneBuffer(nchunk)

while True:
    buffer = mic.get()

    ts3 = time.perf_counter_ns()
    matched_intents, scores, durations = mdl.match_intents(
        buffer, intents, softmax_scores=True
    )
    ts4 = time.perf_counter_ns()

    for intent, score, duration in zip(matched_intents, scores, durations):
        if score > 0.45:
            print(f"Intent: {intent}, Score: {score}, Duration: {duration}")
            print("match time (ms)", (ts4 - ts3) / 1e6)
            # Clear the buffer after a match
            mic.reset()
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Continuous recognition from microphone #3

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Continuous recognition from microphone #3

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions