{"id":27601,"library":"audiolm","title":"AudioLM","description":"AudioLM is a PyTorch-based implementation of a language modeling approach to audio generation, capable of generating coherent audio continuations given a short prompt. Current version is 0.0.1.dev0, with irregular releases.","status":"active","version":"0.0.1.dev0","language":"python","source_language":"en","source_url":"https://github.com/lucidrains/audiolm-pytorch","tags":["audio generation","language model","deep learning","PyTorch","music","speech"],"install":[{"cmd":"pip install audiolm","lang":"bash","label":"Standard install"}],"dependencies":[{"reason":"Core dependency for model operations","package":"torch","optional":false},{"reason":"Audio I/O and processing","package":"torchaudio","optional":false},{"reason":"HuggingFace models and tokenizers","package":"transformers","optional":false},{"reason":"Audio file reading/writing","package":"soundfile","optional":false},{"reason":"Tensor operations","package":"einops","optional":false}],"imports":[{"note":"Direct import from package top-level","wrong":"","symbol":"AudioLM","correct":"from audiolm import AudioLM"},{"note":"train is in submodule audiolm.train","wrong":"from audiolm import train","symbol":"train","correct":"from audiolm.train import train"},{"note":"decode is in submodule audiolm.decode","wrong":"from audiolm import decode","symbol":"decode","correct":"from audiolm.decode import decode"}],"quickstart":{"code":"import torch\nimport torchaudio\nfrom audiolm import AudioLM\nfrom audiolm.decode import decode\n\nmodel = AudioLM()\n# Load a prompt audio file\nwaveform, sample_rate = torchaudio.load('prompt.wav')\n# Generate continuation (requires GPU or CPU)\ngenerated = decode(model, waveform, sample_rate, max_new_tokens=256)\ntorchaudio.save('output.wav', generated[0].unsqueeze(0), sample_rate)","lang":"python","description":"Load a pre-trained AudioLM model and generate a continuation from a prompt audio file."},"warnings":[{"fix":"Pin to specific commit or use only for experimentation.","message":"The library is in early development (0.0.1.dev0). API is unstable and may change without notice. Do not use in production.","severity":"breaking","affected_versions":"all"},{"fix":"Use a GPU with at least 16GB VRAM; reduce max_new_tokens if out-of-memory.","message":"AudioLM requires significant GPU memory (16GB+). CPU inference is extremely slow and may run out of memory.","severity":"gotcha","affected_versions":"all"},{"fix":"Refer to the GitHub README for the most up-to-date usage.","message":"The 'decode' function signature may change in future versions; current version uses (model, waveform, sample_rate, ...).","severity":"deprecated","affected_versions":"0.0.1.dev0"}],"env_vars":null,"last_verified":"2026-05-09T00:00:00.000Z","next_check":"2026-08-07T00:00:00.000Z","problems":[{"fix":"Run 'pip install audiolm' in the correct environment.","cause":"Library not installed or installed in wrong environment.","error":"ModuleNotFoundError: No module named 'audiolm'"},{"fix":"Move input tensors to the same device: waveform = waveform.to('cuda') after loading model.","cause":"Input tensor on CPU while model on GPU.","error":"RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}