{"library":"llama-cpp-python","type":"library","category":null,"description":"Python bindings for the `llama.cpp` library, enabling efficient local inference of large language models (LLMs) on various hardware, including CPUs and GPUs (NVIDIA, Apple Metal, AMD ROCm). It provides both a high-level API for easy model interaction and a low-level API for direct C API access. The library is actively maintained with frequent updates, often mirroring upstream `llama.cpp` changes, and currently stands at version 0.3.20.","language":"python","status":"active","version":"0.3.20","tags":["LLM","bindings","inference","NLP","AI","local-inference","GGUF","CUDA","Metal"],"last_verified":"Sat May 23","install":[{"cmd":"pip install llama-cpp-python","imports":["from llama_cpp import Llama","from llama_cpp import LlamaGrammar","from llama_cpp import LlamaHFTokenizer"]},{"cmd":"pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu","imports":[]},{"cmd":"CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" pip install llama-cpp-python","imports":[]},{"cmd":"CMAKE_ARGS=\"-DLLAMA_METAL=on\" pip install llama-cpp-python","imports":[]},{"cmd":"CMAKE_ARGS=\"-DLLAMA_HIPBLAS=on\" pip install llama-cpp-python","imports":[]},{"cmd":"CMAKE_ARGS=\"-DLLAMA_OPENBLAS=on\" pip install llama-cpp-python","imports":[]}],"homepage":null,"github":"https://github.com/abetlen/llama-cpp-python","docs":"https://llama-cpp-python.readthedocs.io/en/latest/","changelog":"https://llama-cpp-python.readthedocs.io/en/latest/changelog/","pypi":"https://pypi.org/project/llama-cpp-python/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":50,"avg_install_s":5.6,"avg_import_s":0.55,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/llama-cpp-python/compatibility"}}