Microsoft Azure Cognitive Services Speech SDK for JavaScript

1.49.0 · active · verified Sun Apr 19

The Microsoft Cognitive Services Speech SDK for JavaScript provides robust APIs for integrating speech-to-text, text-to-speech, and speech translation capabilities into JavaScript applications. It supports both browser and Node.js environments, making it versatile for various use cases. The current stable version is 1.49.0, with a release cadence that appears to be monthly or bi-monthly, indicating active development and continuous feature enhancements. Key differentiators include official support for Azure Speech Services, comprehensive feature set for speech AI, and first-class TypeScript type definitions, enabling a more robust development experience compared to generic WebSocket or REST API integrations.

Common errors

Warnings

Install

Imports

Quickstart

Demonstrates basic speech-to-text recognition from microphone input, showing how to configure the SDK, listen for speech, and process the recognized text or cancellation events.

import { SpeechConfig, AudioConfig, SpeechRecognizer, ResultReason } from 'microsoft-cognitiveservices-speech-sdk';

const speechKey: string = process.env.SPEECH_KEY ?? '';
const speechRegion: string = process.env.SPEECH_REGION ?? '';

async function recognizeFromMicrophone(): Promise<void> {
    if (!speechKey || !speechRegion) {
        console.error('Please set the SPEECH_KEY and SPEECH_REGION environment variables.');
        return;
    }

    const speechConfig = SpeechConfig.fromSubscription(speechKey, speechRegion);
    speechConfig.speechRecognitionLanguage = 'en-US';

    const audioConfig = AudioConfig.fromDefaultMicrophoneInput();
    const recognizer = new SpeechRecognizer(speechConfig, audioConfig);

    console.log('Say something into your microphone...');

    recognizer.recognizeOnceAsync(result => {
        switch (result.reason) {
            case ResultReason.RecognizedSpeech:
                console.log(`RECOGNIZED: Text=${result.text}`);
                break;
            case ResultReason.NoMatch:
                console.log('NOMATCH: Speech could not be recognized.');
                break;
            case ResultReason.Canceled:
                const cancellationDetails = result.cancellationDetails;
                console.log(`CANCELED: Reason=${cancellationDetails?.reason}`);
                if (cancellationDetails?.errorDetails) {
                    console.log(`CANCELED: ErrorDetails=${cancellationDetails.errorDetails}`);
                }
                break;
        }
    });
}

recognizeFromMicrophone();

view raw JSON →