{"library":"tpu-inference","type":"library","category":null,"description":"tpu-inference is a hardware plugin for vLLM, designed to enable efficient inference of large language models (LLMs) on Google Cloud TPUs. It unifies JAX and PyTorch under a single lowering path, allowing PyTorch model definitions to run performantly on TPUs without additional code changes, while also extending native support to JAX. The library aims to push TPU hardware performance limits and retain vLLM's standardized user experience. It is actively maintained by the vLLM project and Google Cloud, with releases tied to vLLM development.","language":"python","status":"active","version":"0.13.3","tags":["TPU","inference","vLLM","JAX","PyTorch","LLM","Google Cloud","AI/ML acceleration"],"install":[{"cmd":"pip install vllm-tpu","imports":["import importlib.metadata\ntpu_version = importlib.metadata.version(\"tpu_inference\")"]},{"cmd":"uv pip install vllm-tpu","imports":[]}],"homepage":null,"github":null,"docs":null,"changelog":null,"pypi":"https://pypi.org/project/tpu-inference/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":0,"avg_install_s":null,"avg_import_s":null,"wheel_type":null},"url":"https://checklist.day/v1/registry/tpu-inference/compatibility"},"provenance":{"verified_status":"passing","verified_at":"Sun Jun 28","last_verified":"Sun Jun 28","next_check":"Tue Jul 28","install_tag":null}}