LICENSE
README.md
pyproject.toml
setup.py
oxrl/__init__.py
oxrl/cli.py
oxrl/main_rl.py
oxrl/main_sl.py
oxrl/trainer.py
oxrl.egg-info/PKG-INFO
oxrl.egg-info/SOURCES.txt
oxrl.egg-info/dependency_links.txt
oxrl.egg-info/entry_points.txt
oxrl.egg-info/requires.txt
oxrl.egg-info/top_level.txt
oxrl/algs/__init__.py
oxrl/algs/alpha_dpo.py
oxrl/algs/alphapo.py
oxrl/algs/aot.py
oxrl/algs/apo.py
oxrl/algs/base.py
oxrl/algs/bco.py
oxrl/algs/betadpo.py
oxrl/algs/bpo.py
oxrl/algs/c2dpo.py
oxrl/algs/caldpo.py
oxrl/algs/cdpo.py
oxrl/algs/chipo.py
oxrl/algs/cpo.py
oxrl/algs/cposimpo.py
oxrl/algs/cpt.py
oxrl/algs/discopop.py
oxrl/algs/dpnll.py
oxrl/algs/dpo.py
oxrl/algs/dpop.py
oxrl/algs/dposhift.py
oxrl/algs/drdpo.py
oxrl/algs/exo.py
oxrl/algs/fdpo.py
oxrl/algs/focalpo.py
oxrl/algs/gpo.py
oxrl/algs/grpo.py
oxrl/algs/hdpo.py
oxrl/algs/hinge.py
oxrl/algs/ipo.py
oxrl/algs/kd.py
oxrl/algs/kto.py
oxrl/algs/minor_dpo.py
oxrl/algs/nca.py
oxrl/algs/odpo.py
oxrl/algs/online_dpo.py
oxrl/algs/orpo.py
oxrl/algs/ppo.py
oxrl/algs/rdpo.py
oxrl/algs/rft.py
oxrl/algs/rm.py
oxrl/algs/robust_dpo.py
oxrl/algs/sampo.py
oxrl/algs/sft.py
oxrl/algs/simpo.py
oxrl/algs/spin.py
oxrl/algs/spo.py
oxrl/algs/sppo.py
oxrl/algs/wpo.py
oxrl/algs/losses/__init__.py
oxrl/algs/losses/cispo.py
oxrl/algs/losses/common.py
oxrl/algs/losses/gspo.py
oxrl/algs/losses/sgrpo.py
oxrl/configs/__init__.py
oxrl/configs/load.py
oxrl/configs/loader.py
oxrl/configs/schema.py
oxrl/configs/sync.py
oxrl/datasets/__init__.py
oxrl/datasets/mixed_ratio_sampler.py
oxrl/datasets/prompt_only.py
oxrl/datasets/prompt_preference.py
oxrl/datasets/prompt_response.py
oxrl/loops/__init__.py
oxrl/loops/checkpoint_phase.py
oxrl/loops/rollout_phase.py
oxrl/loops/train_phase.py
oxrl/preprocessing/__init__.py
oxrl/preprocessing/audio_dummy.py
oxrl/preprocessing/gpqa.py
oxrl/preprocessing/gsm8k.py
oxrl/preprocessing/math_hard.py
oxrl/preprocessing/mbpp.py
oxrl/preprocessing/openr1_math.py
oxrl/preprocessing/ultrafeedback.py
oxrl/preprocessing/vision_dummy.py
oxrl/rewards/__init__.py
oxrl/rewards/base.py
oxrl/rewards/code.py
oxrl/rewards/format.py
oxrl/rewards/math.py
oxrl/rewards/multimodal.py
oxrl/rewards/qa.py
oxrl/rewards/reasoning.py
oxrl/rewards/rm_reward.py
oxrl/rollouts/__init__.py
oxrl/rollouts/logprob_utils.py
oxrl/rollouts/replay_buffer.py
oxrl/rollouts/reward_scoring.py
oxrl/rollouts/sampling.py
oxrl/rollouts/vllm_engine.py
oxrl/setup/__init__.py
oxrl/setup/dataloader_factory.py
oxrl/setup/engine_factory.py
oxrl/setup/ray_setup.py
oxrl/swarm/__init__.py
oxrl/swarm/bug_reporter.py
oxrl/swarm/bugfixer.py
oxrl/swarm/config_generator.py
oxrl/swarm/model_registry.py
oxrl/swarm/orchestrator.py
oxrl/swarm/scout.py
oxrl/tools/__init__.py
oxrl/tools/checkpoint.py
oxrl/tools/lora_merge.py
oxrl/tools/tensor_utils.py
oxrl/utils/__init__.py
oxrl/utils/logging.py
oxrl/utils/setup.py
oxrl/utils/utils.py
tests/test_alpha_dpo.py
tests/test_alphapo.py
tests/test_aot.py
tests/test_apo.py
tests/test_bco.py
tests/test_betadpo.py
tests/test_bpo.py
tests/test_bugs.py
tests/test_c2dpo.py
tests/test_caldpo.py
tests/test_cdpo.py
tests/test_chipo.py
tests/test_cpo.py
tests/test_cposimpo.py
tests/test_discopop.py
tests/test_dpnll.py
tests/test_dpop.py
tests/test_dposhift.py
tests/test_drdpo.py
tests/test_exo.py
tests/test_fdpo.py
tests/test_focalpo.py
tests/test_gpo.py
tests/test_hdpo.py
tests/test_hinge.py
tests/test_lop_refactoring.py
tests/test_minor_dpo.py
tests/test_nca.py
tests/test_odpo.py
tests/test_rdpo.py
tests/test_robust_dpo.py
tests/test_sampo.py
tests/test_spo.py
tests/test_sppo.py
tests/test_wpo.py