{"library":"trl","install":[{"cmd":"pip install trl","imports":["from trl import SFTConfig, SFTTrainer\nfrom datasets import load_dataset\n\ntrainer = SFTTrainer(\n    model='Qwen/Qwen2.5-0.5B',\n    args=SFTConfig(output_dir='output', max_length=512),\n    train_dataset=load_dataset('trl-lib/Capybara', split='train'),\n    processing_class=tokenizer,  # not tokenizer=\n)","from trl import DPOConfig, DPOTrainer\n\ntrainer = DPOTrainer(\n    model=model,\n    args=DPOConfig(output_dir='output', beta=0.1),\n    train_dataset=dataset,\n    processing_class=tokenizer,\n    # With PEFT: no ref_model needed — adapter is disabled to recover reference behavior\n)"]},{"cmd":"pip install trl[peft]","imports":[]},{"cmd":"pip install trl[quantization]","imports":[]},{"cmd":"pip install trl[vllm]","imports":[]}]}