CONTRIBUTING.md
LICENSE
MANIFEST.in
README.md
VERSION
pyproject.toml
trl/__init__.py
trl/_compat.py
trl/_lazy_module.py
trl/chat_template_utils.py
trl/data_utils.py
trl/import_utils.py
trl/py.typed
trl.egg-info/PKG-INFO
trl.egg-info/SOURCES.txt
trl.egg-info/dependency_links.txt
trl.egg-info/entry_points.txt
trl.egg-info/requires.txt
trl.egg-info/top_level.txt
trl/accelerate_configs/fsdp1.yaml
trl/accelerate_configs/fsdp2.yaml
trl/accelerate_configs/multi_gpu.yaml
trl/accelerate_configs/single_gpu.yaml
trl/accelerate_configs/zero1.yaml
trl/accelerate_configs/zero2.yaml
trl/accelerate_configs/zero3.yaml
trl/chat_templates/README.md
trl/chat_templates/cohere.jinja
trl/chat_templates/cohere2.jinja
trl/chat_templates/cohere2_training.jinja
trl/chat_templates/cohere_training.jinja
trl/chat_templates/deepseekv3.jinja
trl/chat_templates/deepseekv3_training.jinja
trl/chat_templates/gemma.jinja
trl/chat_templates/gemma3.jinja
trl/chat_templates/gemma3_training.jinja
trl/chat_templates/gemma_training.jinja
trl/chat_templates/glm4moe.jinja
trl/chat_templates/glm4moe_training.jinja
trl/chat_templates/gptoss.jinja
trl/chat_templates/gptoss_training.jinja
trl/chat_templates/llama3.jinja
trl/chat_templates/llama3_1.jinja
trl/chat_templates/llama3_2.jinja
trl/chat_templates/llama3_training.jinja
trl/chat_templates/phi3.jinja
trl/chat_templates/phi3_training.jinja
trl/chat_templates/qwen2_5.jinja
trl/chat_templates/qwen2_5_training.jinja
trl/chat_templates/qwen3.jinja
trl/chat_templates/qwen3_5_2b_and_below.jinja
trl/chat_templates/qwen3_5_4b_and_above.jinja
trl/chat_templates/qwen3_6.jinja
trl/chat_templates/qwen3_6_training.jinja
trl/chat_templates/qwen3_instruct_2507.jinja
trl/chat_templates/qwen3_instruct_2507_training.jinja
trl/chat_templates/qwen3_training.jinja
trl/chat_templates/qwen3_vl.jinja
trl/cli/__init__.py
trl/cli/accelerate_config.py
trl/cli/accelerate_launcher.py
trl/cli/main.py
trl/cli/commands/__init__.py
trl/cli/commands/base.py
trl/cli/commands/env.py
trl/cli/commands/skills.py
trl/cli/commands/training.py
trl/cli/commands/vllm_serve.py
trl/experimental/__init__.py
trl/experimental/merge_model_callback.py
trl/experimental/utils.py
trl/experimental/async_grpo/__init__.py
trl/experimental/async_grpo/async_grpo_config.py
trl/experimental/async_grpo/async_grpo_trainer.py
trl/experimental/async_grpo/async_rollout_worker.py
trl/experimental/bco/__init__.py
trl/experimental/bco/bco_config.py
trl/experimental/bco/bco_trainer.py
trl/experimental/bema_for_ref_model/__init__.py
trl/experimental/bema_for_ref_model/callback.py
trl/experimental/bema_for_ref_model/dpo_trainer.py
trl/experimental/cpo/__init__.py
trl/experimental/cpo/cpo_config.py
trl/experimental/cpo/cpo_trainer.py
trl/experimental/distillation/__init__.py
trl/experimental/distillation/distillation.py
trl/experimental/distillation/distillation_config.py
trl/experimental/distillation/distillation_trainer.py
trl/experimental/dppo/__init__.py
trl/experimental/dppo/dppo_config.py
trl/experimental/dppo/dppo_trainer.py
trl/experimental/gfpo/__init__.py
trl/experimental/gfpo/gfpo_config.py
trl/experimental/gfpo/gfpo_trainer.py
trl/experimental/gkd/__init__.py
trl/experimental/gkd/gkd_config.py
trl/experimental/gkd/gkd_trainer.py
trl/experimental/gold/__init__.py
trl/experimental/gold/gold.py
trl/experimental/gold/gold_config.py
trl/experimental/gold/gold_trainer.py
trl/experimental/grpo_with_replay_buffer/__init__.py
trl/experimental/grpo_with_replay_buffer/grpo_with_replay_buffer_config.py
trl/experimental/grpo_with_replay_buffer/grpo_with_replay_buffer_trainer.py
trl/experimental/gspo_token/__init__.py
trl/experimental/gspo_token/grpo_trainer.py
trl/experimental/kto/__init__.py
trl/experimental/kto/kto_config.py
trl/experimental/kto/kto_trainer.py
trl/experimental/minillm/__init__.py
trl/experimental/minillm/minillm_config.py
trl/experimental/minillm/minillm_trainer.py
trl/experimental/nash_md/__init__.py
trl/experimental/nash_md/nash_md_config.py
trl/experimental/nash_md/nash_md_trainer.py
trl/experimental/online_dpo/__init__.py
trl/experimental/online_dpo/online_dpo_config.py
trl/experimental/online_dpo/online_dpo_trainer.py
trl/experimental/openenv/__init__.py
trl/experimental/openenv/utils.py
trl/experimental/openreward/__init__.py
trl/experimental/openreward/_spec.py
trl/experimental/openreward/environment.py
trl/experimental/orpo/__init__.py
trl/experimental/orpo/orpo_config.py
trl/experimental/orpo/orpo_trainer.py
trl/experimental/papo/__init__.py
trl/experimental/papo/papo_config.py
trl/experimental/papo/papo_trainer.py
trl/experimental/ppo/__init__.py
trl/experimental/ppo/modeling_value_head.py
trl/experimental/ppo/ppo_config.py
trl/experimental/ppo/ppo_trainer.py
trl/experimental/prm/__init__.py
trl/experimental/prm/prm_config.py
trl/experimental/prm/prm_trainer.py
trl/experimental/sdft/__init__.py
trl/experimental/sdft/sdft.py
trl/experimental/sdft/sdft_config.py
trl/experimental/sdft/sdft_trainer.py
trl/experimental/sdpo/__init__.py
trl/experimental/sdpo/sdpo.py
trl/experimental/sdpo/sdpo_config.py
trl/experimental/sdpo/sdpo_trainer.py
trl/experimental/self_distillation/__init__.py
trl/experimental/self_distillation/base_self_distillation_trainer.py
trl/experimental/self_distillation/online_rollout_mixin.py
trl/experimental/self_distillation/peft_adapter_ema_callback.py
trl/experimental/self_distillation/self_distillation_config.py
trl/experimental/self_distillation/self_distillation_mixin.py
trl/experimental/self_distillation/teacher_context.py
trl/experimental/ssd/__init__.py
trl/experimental/ssd/ssd.py
trl/experimental/ssd/ssd_config.py
trl/experimental/ssd/ssd_eval.py
trl/experimental/ssd/ssd_trainer.py
trl/experimental/tpo/__init__.py
trl/experimental/tpo/tpo.py
trl/experimental/tpo/tpo_config.py
trl/experimental/tpo/tpo_trainer.py
trl/experimental/xpo/__init__.py
trl/experimental/xpo/xpo_config.py
trl/experimental/xpo/xpo_trainer.py
trl/extras/__init__.py
trl/extras/dataset_formatting.py
trl/extras/profiling.py
trl/generation/__init__.py
trl/generation/vllm_client.py
trl/generation/vllm_generation.py
trl/models/__init__.py
trl/models/activation_offloading.py
trl/models/utils.py
trl/rewards/__init__.py
trl/rewards/accuracy_rewards.py
trl/rewards/format_rewards.py
trl/rewards/other_rewards.py
trl/scripts/__init__.py
trl/scripts/_hf_argparser.py
trl/scripts/dpo.py
trl/scripts/env.py
trl/scripts/grpo.py
trl/scripts/kto.py
trl/scripts/reward.py
trl/scripts/rloo.py
trl/scripts/sft.py
trl/scripts/utils.py
trl/scripts/vllm_serve.py
trl/skills/__init__.py
trl/skills/cli.py
trl/skills/skills.py
trl/skills/trl-training/SKILL.md
trl/templates/completions_dataset_card.md
trl/templates/lm_model_card.md
trl/templates/rm_model_card.md
trl/trainer/__init__.py
trl/trainer/base_config.py
trl/trainer/base_trainer.py
trl/trainer/callbacks.py
trl/trainer/dpo_config.py
trl/trainer/dpo_trainer.py
trl/trainer/grpo_config.py
trl/trainer/grpo_trainer.py
trl/trainer/kto_config.py
trl/trainer/kto_trainer.py
trl/trainer/model_config.py
trl/trainer/reward_config.py
trl/trainer/reward_trainer.py
trl/trainer/rloo_config.py
trl/trainer/rloo_trainer.py
trl/trainer/sft_config.py
trl/trainer/sft_trainer.py
trl/trainer/utils.py