{"library":"flash-linear-attention","type":"library","category":null,"description":"Flash Linear Attention (FLA) is a Python library providing efficient, Triton-based implementations for state-of-the-art linear attention models and emerging sequence modeling architectures. It aims for high-performance training and inference across NVIDIA, AMD, and Intel GPUs. As of version 0.4.2, the library is actively maintained with frequent releases, offering optimized kernels, fused modules, and integration-ready layers for PyTorch and Hugging Face models.","language":"python","status":"active","version":"0.4.2","tags":["attention","linear-attention","deep-learning","pytorch","triton","transformers","gpu-acceleration","sequence-modeling"],"install":[{"cmd":"pip install flash-linear-attention","imports":["from fla.layers import MultiScaleRetention","from fla.models import FlashMamba"]},{"cmd":"pip install torch triton einops transformers numpy\n# For AMD GPUs, ensure Triton ROCm backend is installed separately.\n# For Intel GPUs, ensure Triton XPU backend is installed separately.","imports":[]}],"homepage":null,"github":"https://github.com/fla-org/flash-linear-attention","docs":null,"changelog":null,"pypi":"https://pypi.org/project/flash-linear-attention/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":35,"avg_install_s":78.2,"avg_import_s":19.79,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/flash-linear-attention/compatibility"},"provenance":{"verified_status":"install_fail","verified_at":"Sun Jun 28","last_verified":"Sun Jun 28","next_check":"Thu Jul 09","install_tag":null}}