{"library":"ctranslate2","type":"library","category":null,"description":"CTranslate2 is a C++ and Python library for efficient inference with Transformer models. It implements a custom runtime with performance optimizations like weights quantization, layers fusion, and batch reordering to accelerate and reduce memory usage of Transformer models on CPUs and GPUs. It currently supports a wide range of encoder-decoder, decoder-only, and encoder-only models from frameworks like OpenNMT, Fairseq, and Hugging Face Transformers. The library is actively maintained with frequent releases, currently at version 4.7.1.","language":"python","status":"active","version":"4.7.1","tags":["NLP","inference","Transformer","machine translation","LLM","quantization","speech recognition","GPU","CPU"],"last_verified":"Wed May 20","install":[{"cmd":"pip install ctranslate2","imports":["import ctranslate2\ntranslator = ctranslate2.Translator(model_path)","import ctranslate2\ngenerator = ctranslate2.Generator(model_path)","ct2-transformers-converter --model facebook/m2m100_418M --output_dir ct2_model"]},{"cmd":"pip install ctranslate2 # Ensure CUDA 12.x and cuDNN 8/9 are installed separately for NVIDIA GPUs.","imports":[]},{"cmd":"pip install ctranslate2 --extra-index-url https://download.pytorch.org/whl/rocm6.0 # For AMD GPUs with ROCm 6.0+","imports":[]}],"homepage":"https://opennmt.net","github":"https://github.com/OpenNMT/CTranslate2","docs":"https://opennmt.net/CTranslate2","changelog":null,"pypi":"https://pypi.org/project/ctranslate2/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":50,"avg_install_s":5,"avg_import_s":0.66,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/ctranslate2/compatibility"}}