{"library":"flashinfer-cubin","type":"library","category":null,"description":"FlashInfer-cubin provides pre-compiled kernel binaries for FlashInfer, supporting a wide range of GPU architectures. This optional package for `flashinfer-python` eliminates JIT compilation and downloading overhead at runtime, leading to faster initialization and enabling offline usage. The FlashInfer project focuses on delivering high-performance LLM GPU kernels for serving and inference, maintaining an active development cycle with frequent nightly builds and regular patch releases.","language":"python","status":"active","version":"0.6.7.post3","tags":["cuda","llm","inference","gpu","optimization","kernels","pytorch"],"last_verified":"Fri May 22","install":[{"cmd":"pip install flashinfer-python flashinfer-cubin","imports":["N/A"]},{"cmd":"pip install flashinfer-cubin","imports":[]}],"homepage":"https://flashinfer.ai","github":"https://github.com/flashinfer-ai/flashinfer","docs":"https://github.com/flashinfer-ai/flashinfer","changelog":null,"pypi":"https://pypi.org/project/flashinfer-cubin/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":70,"avg_install_s":47,"avg_import_s":null,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/flashinfer-cubin/compatibility"}}