LiveKit Turn Detector Plugin

1.5.2 · active · verified Sat Apr 11

livekit-plugins-turn-detector provides end-of-utterance detection for LiveKit Agents, leveraging machine learning to differentiate between genuine interruptions and incidental background noises. It is an integral part of LiveKit Agents' adaptive interruption handling introduced in v1.5.0. The current version is 1.5.2, and it typically releases in conjunction with major livekit-agents updates.

Warnings

Install

Imports

Quickstart

Demonstrates how to initialize the `TurnDetector`, feed it simulated audio frames, and listen for `TurnStarted` and `TurnFinished` events. This illustrates the core API for integrating turn detection into an audio processing pipeline.

import asyncio
import numpy as np
from livekit.agents.utils import AudioFrame
from livekit.plugins.turn_detector import TurnDetector, TurnStarted, TurnFinished

async def quickstart_turn_detector():
    print("Initializing TurnDetector...")
    # TurnDetector.create() is an async factory method
    detector = await TurnDetector.create()

    # Simulate an audio stream (e.g., 16kHz mono audio)
    sample_rate = 16000
    num_silent_frames = 50 # 500ms of silence (50 * 10ms frames)
    num_speech_frames = 100 # 1 second of speech
    frame_size = int(sample_rate * 0.01) # 10ms frame

    async def simulate_audio():
        # Silence
        for _ in range(num_silent_frames):
            frame = AudioFrame(np.zeros(frame_size, dtype=np.int16), sample_rate, 1)
            await detector.push_frame(frame)
            await asyncio.sleep(0.01) # Simulate real-time
        print("Simulated silence.")

        # Speech (simulated non-zero audio)
        for i in range(num_speech_frames):
            t = np.linspace(0, 0.01, frame_size, endpoint=False)
            sine_wave = (np.sin(2 * np.pi * 440 * t) * 1000).astype(np.int16)
            frame = AudioFrame(sine_wave, sample_rate, 1)
            await detector.push_frame(frame)
            if i == 0:
                print("Simulating speech...")
            await asyncio.sleep(0.01)

        # Post-speech silence
        for _ in range(num_silent_frames):
            frame = AudioFrame(np.zeros(frame_size, dtype=np.int16), sample_rate, 1)
            await detector.push_frame(frame)
            await asyncio.sleep(0.01)
        print("Simulated post-speech silence. Closing detector.")

        # Signal end of stream
        await detector.flush()

    # Process events from the detector
    async def process_events():
        async for event in detector.detect_turns():
            if isinstance(event, TurnStarted):
                print(f"Turn Started at timestamp {event.timestamp}")
            elif isinstance(event, TurnFinished):
                print(f"Turn Finished at timestamp {event.timestamp}, duration: {event.duration}s")

    # Run both concurrently
    await asyncio.gather(simulate_audio(), process_events())
    print("Quickstart finished.")

if __name__ == "__main__":
    asyncio.run(quickstart_turn_detector())

view raw JSON →