CONTRIBUTING.md
LICENSE
MANIFEST.in
README.md
VERSION
pyproject.toml
trl/__init__.py
trl/cli.py
trl/core.py
trl/data_utils.py
trl/import_utils.py
trl/mergekit_utils.py
trl/py.typed
trl.egg-info/PKG-INFO
trl.egg-info/SOURCES.txt
trl.egg-info/dependency_links.txt
trl.egg-info/entry_points.txt
trl.egg-info/requires.txt
trl.egg-info/top_level.txt
trl/accelerate_configs/fsdp1.yaml
trl/accelerate_configs/fsdp2.yaml
trl/accelerate_configs/multi_gpu.yaml
trl/accelerate_configs/single_gpu.yaml
trl/accelerate_configs/zero1.yaml
trl/accelerate_configs/zero2.yaml
trl/accelerate_configs/zero3.yaml
trl/experimental/__init__.py
trl/experimental/bema_for_ref_model/__init__.py
trl/experimental/bema_for_ref_model/callback.py
trl/experimental/bema_for_ref_model/dpo_trainer.py
trl/experimental/gfpo/__init__.py
trl/experimental/gfpo/gfpo_config.py
trl/experimental/gfpo/gfpo_trainer.py
trl/experimental/grpo_with_replay_buffer/__init__.py
trl/experimental/grpo_with_replay_buffer/grpo_with_replay_buffer_config.py
trl/experimental/grpo_with_replay_buffer/grpo_with_replay_buffer_trainer.py
trl/experimental/gspo_token/__init__.py
trl/experimental/gspo_token/grpo_trainer.py
trl/extras/__init__.py
trl/extras/best_of_n_sampler.py
trl/extras/dataset_formatting.py
trl/extras/profiling.py
trl/extras/vllm_client.py
trl/models/__init__.py
trl/models/activation_offloading.py
trl/models/modeling_base.py
trl/models/modeling_value_head.py
trl/models/utils.py
trl/rewards/__init__.py
trl/rewards/accuracy_rewards.py
trl/rewards/format_rewards.py
trl/rewards/other_rewards.py
trl/scripts/__init__.py
trl/scripts/dpo.py
trl/scripts/env.py
trl/scripts/grpo.py
trl/scripts/kto.py
trl/scripts/reward.py
trl/scripts/rloo.py
trl/scripts/sft.py
trl/scripts/utils.py
trl/scripts/vllm_serve.py
trl/templates/lm_model_card.md
trl/templates/rm_model_card.md
trl/trainer/__init__.py
trl/trainer/base_trainer.py
trl/trainer/bco_config.py
trl/trainer/bco_trainer.py
trl/trainer/callbacks.py
trl/trainer/cpo_config.py
trl/trainer/cpo_trainer.py
trl/trainer/dpo_config.py
trl/trainer/dpo_trainer.py
trl/trainer/gkd_config.py
trl/trainer/gkd_trainer.py
trl/trainer/grpo_config.py
trl/trainer/grpo_trainer.py
trl/trainer/judges.py
trl/trainer/kto_config.py
trl/trainer/kto_trainer.py
trl/trainer/model_config.py
trl/trainer/nash_md_config.py
trl/trainer/nash_md_trainer.py
trl/trainer/online_dpo_config.py
trl/trainer/online_dpo_trainer.py
trl/trainer/orpo_config.py
trl/trainer/orpo_trainer.py
trl/trainer/ppo_config.py
trl/trainer/ppo_trainer.py
trl/trainer/prm_config.py
trl/trainer/prm_trainer.py
trl/trainer/reward_config.py
trl/trainer/reward_trainer.py
trl/trainer/rloo_config.py
trl/trainer/rloo_trainer.py
trl/trainer/sft_config.py
trl/trainer/sft_trainer.py
trl/trainer/utils.py
trl/trainer/xpo_config.py
trl/trainer/xpo_trainer.py