{"library":"vllm","type":"library","category":null,"description":"vLLM is a high-throughput and memory-efficient inference and serving engine for large language models (LLMs). It utilizes various optimization techniques, such as PagedAttention, to significantly improve LLM serving performance. Currently at version 0.19.0, vLLM maintains a rapid release cadence with frequent updates and new feature additions.","language":"python","status":"active","version":"0.19.0","tags":["LLM","inference","GPU","serving","high-performance","deep-learning"],"last_verified":"Wed May 20","install":[{"cmd":"pip install vllm","imports":["from vllm import LLM","from vllm import SamplingParams"]},{"cmd":"pip install vllm --extra-index-url https://download.pytorch.org/whl/cu121","imports":[]},{"cmd":"uv pip install vllm --torch-backend=auto","imports":[]}],"homepage":"https://vllm.io","github":"https://github.com/vllm-project/vllm","docs":"https://docs.vllm.ai/en/latest/","changelog":null,"pypi":"https://pypi.org/project/vllm/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":0,"avg_install_s":null,"avg_import_s":null,"wheel_type":null},"url":"https://checklist.day/v1/registry/vllm/compatibility"}}