LICENSE
README.md
pyproject.toml
setup.py
oxrl/__init__.py
oxrl/cli.py
oxrl/trainer.py
oxrl.egg-info/PKG-INFO
oxrl.egg-info/SOURCES.txt
oxrl.egg-info/dependency_links.txt
oxrl.egg-info/entry_points.txt
oxrl.egg-info/requires.txt
oxrl.egg-info/top_level.txt
oxrl/algs/__init__.py
oxrl/algs/base.py
oxrl/algs/cpt.py
oxrl/algs/dpo.py
oxrl/algs/grpo.py
oxrl/algs/ipo.py
oxrl/algs/kd.py
oxrl/algs/kto.py
oxrl/algs/online_dpo.py
oxrl/algs/orpo.py
oxrl/algs/ppo.py
oxrl/algs/rft.py
oxrl/algs/rm.py
oxrl/algs/sft.py
oxrl/algs/simpo.py
oxrl/algs/spin.py
oxrl/configs/__init__.py
oxrl/configs/load.py
oxrl/datasets/__init__.py
oxrl/datasets/mixed_ratio_sampler.py
oxrl/datasets/prompt_only.py
oxrl/datasets/prompt_preference.py
oxrl/datasets/prompt_response.py
oxrl/preprocessing/__init__.py
oxrl/preprocessing/audio_dummy.py
oxrl/preprocessing/gpqa.py
oxrl/preprocessing/gsm8k.py
oxrl/preprocessing/math_hard.py
oxrl/preprocessing/mbpp.py
oxrl/preprocessing/openr1_math.py
oxrl/preprocessing/ultrafeedback.py
oxrl/preprocessing/vision_dummy.py
oxrl/rewards/__init__.py
oxrl/rewards/base.py
oxrl/rewards/code.py
oxrl/rewards/format.py
oxrl/rewards/math.py
oxrl/rewards/multimodal.py
oxrl/rewards/qa.py
oxrl/rewards/reasoning.py
oxrl/rewards/rm_reward.py
oxrl/rollouts/__init__.py
oxrl/rollouts/replay_buffer.py
oxrl/rollouts/vllm_engine.py
oxrl/swarm/__init__.py
oxrl/swarm/bug_reporter.py
oxrl/swarm/bugfixer.py
oxrl/swarm/config_generator.py
oxrl/swarm/model_registry.py
oxrl/swarm/orchestrator.py
oxrl/swarm/scout.py
oxrl/utils/__init__.py
oxrl/utils/logging.py
oxrl/utils/setup.py
oxrl/utils/utils.py
tests/test_bugs.py