SpeechRecognition

3.16.0 · active · verified Thu Apr 09

SpeechRecognition is a comprehensive Python library for performing speech recognition. It supports various engines and APIs, both online (e.g., Google Web Speech API, Google Cloud Speech, OpenAI Whisper API, AWS Transcribe, Microsoft Azure Speech, Cohere Transcribe) and offline (e.g., CMU Sphinx, Vosk, Whisper via local models). It is actively maintained with frequent minor and patch releases, currently at version 3.16.0.

Warnings

Install

Imports

Quickstart

This quickstart demonstrates how to transcribe audio using the SpeechRecognition library. It includes a runnable microphone input example (with graceful degradation if PyAudio is not installed) and an example for transcribing from an audio file. For the audio file example, it attempts to create a dummy WAV file using `pydub` if available, otherwise, it expects a manual WAV file. It uses the free Google Web Speech API for transcription. A third option for using a commercial API (OpenAI Whisper) is also included, requiring an API key and additional installation.

import speech_recognition as sr
import os

r = sr.Recognizer()

# --- Option 1: Listen from Microphone (requires PyAudio and PortAudio) ---
try:
    import pyaudio
    with sr.Microphone() as source:
        print("Say something into the microphone!")
        r.adjust_for_ambient_noise(source, duration=1) # Adjust for ambient noise
        audio = r.listen(source, timeout=5, phrase_time_limit=10)
    print("Processing microphone input...")
    text = r.recognize_google(audio)
    print(f"You said (Google Web Speech): {text}")
except sr.WaitTimeoutError:
    print("No speech detected within the timeout period for microphone.")
except sr.UnknownValueError:
    print("Google Web Speech Recognition could not understand microphone audio.")
except sr.RequestError as e:
    print(f"Could not request results from Google Web Speech service for microphone; {e}")
except ImportError:
    print("PyAudio not installed. Cannot use microphone. To enable, install with: pip install pyaudio")
except Exception as e:
    print(f"An unexpected error occurred with microphone input: {e}")

# --- Option 2: Transcribe an Audio File (e.g., using Google Web Speech API) ---
file_path = "dummy_audio.wav"
# Create a dummy WAV file for demonstration if it doesn't exist
if not os.path.exists(file_path):
    try:
        from pydub import AudioSegment
        AudioSegment.silent(duration=1000, frame_rate=16000).export(file_path, format="wav")
        print(f"\nCreated a dummy WAV file: {file_path}")
    except ImportError:
        print("\npydub not installed, cannot create dummy audio. Please provide a WAV file manually.")
        print("Skipping audio file transcription example.")
        file_path = None

if file_path:
    try:
        with sr.AudioFile(file_path) as source:
            audio = r.record(source)  # Read the entire audio file
        print(f"Transcribing '{file_path}'...")
        text = r.recognize_google(audio)
        print(f"Transcription (Google Web Speech): {text}")
    except sr.UnknownValueError:
        print(f"Google Web Speech Recognition could not understand audio from '{file_path}'.")
    except sr.RequestError as e:
        print(f"Could not request results from Google Web Speech service for '{file_path}'; {e}")
    except Exception as e:
        print(f"An error occurred with audio file transcription: {e}")

# --- Option 3: Using a Commercial API (e.g., OpenAI Whisper API) ---
# Requires 'pip install openai' and setting OPENAI_API_KEY environment variable
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
if OPENAI_API_KEY and file_path:
    print("\nAttempting transcription with OpenAI Whisper API...")
    try:
        with sr.AudioFile(file_path) as source:
            audio = r.record(source)
        text = r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)
        print(f"Transcription (OpenAI Whisper API): {text}")
    except sr.UnknownValueError:
        print(f"OpenAI Whisper API could not understand audio from '{file_path}'.")
    except sr.RequestError as e:
        print(f"Could not request results from OpenAI Whisper API service; {e}")
    except Exception as e:
        print(f"An error occurred with OpenAI Whisper API: {e}")
else:
    print("\nSkipping OpenAI Whisper API example (OPENAI_API_KEY not set or no audio file for transcription).")

view raw JSON →