{"library":"trl","type":"library","category":null,"description":"Hugging Face library for post-training LLMs: SFT, DPO, GRPO, PPO, reward modeling. Current version is 0.29.1 (Mar 2026). Requires Python >=3.10. Extremely high API churn — major parameter renames across versions. tokenizer= renamed to processing_class= in 0.12. Still pre-1.0 (Development Status: Pre-Alpha).","language":"python","status":"active","version":"0.29.1","tags":["fine-tuning","rlhf","dpo","sft","grpo","llm","huggingface","post-training"],"last_verified":"Tue Jun 09","install":[{"cmd":"pip install trl","imports":["from trl import SFTConfig, SFTTrainer\nfrom datasets import load_dataset\n\ntrainer = SFTTrainer(\n    model='Qwen/Qwen2.5-0.5B',\n    args=SFTConfig(output_dir='output', max_length=512),\n    train_dataset=load_dataset('trl-lib/Capybara', split='train'),\n    processing_class=tokenizer,  # not tokenizer=\n)","from trl import DPOConfig, DPOTrainer\n\ntrainer = DPOTrainer(\n    model=model,\n    args=DPOConfig(output_dir='output', beta=0.1),\n    train_dataset=dataset,\n    processing_class=tokenizer,\n    # With PEFT: no ref_model needed — adapter is disabled to recover reference behavior\n)"]},{"cmd":"pip install trl[peft]","imports":[]},{"cmd":"pip install trl[quantization]","imports":[]},{"cmd":"pip install trl[vllm]","imports":[]}],"homepage":null,"github":"https://github.com/huggingface/trl","docs":null,"changelog":null,"pypi":"https://pypi.org/project/trl/","npm":null,"openapi_spec":null,"status_page":null,"smithery":null,"compatibility":{"summary":{"python_range":"3.10–3.9","success_rate":23,"avg_install_s":83.8,"avg_import_s":22.47,"wheel_type":"wheel"},"url":"https://checklist.day/v1/registry/trl/compatibility"}}