.pylintrc
.style.yapf
CHANGELOG.md
LICENSE
MANIFEST.in
Makefile
README.md
README_zh.md
pyproject.toml
requirements-dev.txt
requirements-doc.txt
requirements.txt
setup.py
assets/logo.png
docs/PYPI_PUBLISH_GUIDE.md
docs/source/conf.py
docs/source/index.rst
docs/source/api_doc/datasets/audio_alpaca.rst
docs/source/api_doc/datasets/grm_dataset.rst
docs/source/api_doc/datasets/hpdv3.rst
docs/source/api_doc/datasets/image_reward_db.rst
docs/source/api_doc/datasets/imagegen_cot_reward.rst
docs/source/api_doc/datasets/index.rst
docs/source/api_doc/datasets/omnirewardbench.rst
docs/source/api_doc/datasets/process_reward_dataset.rst
docs/source/api_doc/datasets/prompts_dataset.rst
docs/source/api_doc/datasets/prompts_dataset_vl.rst
docs/source/api_doc/datasets/rapidata.rst
docs/source/api_doc/datasets/sft_dataset.rst
docs/source/api_doc/datasets/sft_dataset_vl.rst
docs/source/api_doc/datasets/srm_dataset.rst
docs/source/api_doc/datasets/utils.rst
docs/source/api_doc/models/actor_al.rst
docs/source/api_doc/models/actor_language.rst
docs/source/api_doc/models/actor_vl.rst
docs/source/api_doc/models/grm_vl.rst
docs/source/api_doc/models/index.rst
docs/source/api_doc/models/loss.rst
docs/source/api_doc/models/srm_al.rst
docs/source/api_doc/models/srm_vl.rst
docs/source/api_doc/models/utils.rst
docs/source/api_doc/models/monkey_patch/apply.rst
docs/source/api_doc/models/monkey_patch/hf_generate_patch.rst
docs/source/api_doc/models/monkey_patch/index.rst
docs/source/api_doc/models/monkey_patch/llama.rst
docs/source/api_doc/models/monkey_patch/qwen.rst
docs/source/api_doc/strategy/config.rst
docs/source/api_doc/strategy/fake_strategy.rst
docs/source/api_doc/strategy/index.rst
docs/source/api_doc/strategy/strategy.rst
docs/source/api_doc/strategy/strategy_base.rst
docs/source/api_doc/strategy/deepspeed/deepspeed.rst
docs/source/api_doc/strategy/deepspeed/deepspeed_utils.rst
docs/source/api_doc/strategy/deepspeed/index.rst
docs/source/api_doc/strategy/fsdp/fsdp_optimizer.rst
docs/source/api_doc/strategy/fsdp/fsdp_utils.rst
docs/source/api_doc/strategy/fsdp/fsdpv2.rst
docs/source/api_doc/strategy/fsdp/index.rst
docs/source/api_doc/strategy/sglang_utils/index.rst
docs/source/api_doc/strategy/sglang_utils/sgl_model_saver.rst
docs/source/api_doc/strategy/sglang_utils/sglang_engine.rst
docs/source/api_doc/strategy/utils/broadcast_utils.rst
docs/source/api_doc/strategy/utils/ckpt_utils.rst
docs/source/api_doc/strategy/utils/data_utils.rst
docs/source/api_doc/strategy/utils/distributed_util.rst
docs/source/api_doc/strategy/utils/index.rst
docs/source/api_doc/strategy/utils/optimizer_utils.rst
docs/source/api_doc/strategy/utils/parallel_utils.rst
docs/source/api_doc/strategy/utils/statistic.rst
docs/source/api_doc/strategy/vllm_utils/index.rst
docs/source/api_doc/strategy/vllm_utils/vllm_worker_wrap_no_ray.rst
docs/source/api_doc/trainer/experience_maker.rst
docs/source/api_doc/trainer/experience_maker_vl.rst
docs/source/api_doc/trainer/fast_exp_maker.rst
docs/source/api_doc/trainer/grm_trainer_vl.rst
docs/source/api_doc/trainer/index.rst
docs/source/api_doc/trainer/kl_controller.rst
docs/source/api_doc/trainer/ppo_trainer.rst
docs/source/api_doc/trainer/ppo_trainer_vl.rst
docs/source/api_doc/trainer/replay_buffer.rst
docs/source/api_doc/trainer/replay_buffer_utils.rst
docs/source/api_doc/trainer/replay_buffer_vl.rst
docs/source/api_doc/trainer/spmd_ppo_trainer.rst
docs/source/api_doc/trainer/srm_trainer_al.rst
docs/source/api_doc/trainer/srm_trainer_vl.rst
docs/source/api_doc/trainer/utils.rst
docs/source/api_doc/utils/cli_args.rst
docs/source/api_doc/utils/distributed_sampler.rst
docs/source/api_doc/utils/index.rst
docs/source/api_doc/utils/logging_utils.rst
docs/source/api_doc/utils/processor.rst
docs/source/api_doc/utils/remote_rm_utils.rst
docs/source/api_doc/utils/timer.rst
docs/source/api_doc/utils/trajectory_saver.rst
docs/source/api_doc/utils/utils.rst
docs/source/best_practice/contributing.md
docs/source/best_practice/faq.md
docs/source/best_practice/index.rst
docs/source/best_practice/model_testing.md
docs/source/best_practice/models.md
docs/source/best_practice/reward_model_training_en.md
docs/source/best_practice/reward_model_training_zh.md
docs/source/best_practice/strategy_design_philosophy.md
docs/source/best_practice/strategy_usage.rst
docs/source/best_practice/strategy_usage_zh.md
docs/source/best_practice/troubleshooting.md
docs/source/installation/index.rst
docs/source/installation/index_cn.rst
docs/source/quick_start/algorithms.md
docs/source/quick_start/algorithms_cn.md
docs/source/quick_start/configuration.md
docs/source/quick_start/index.rst
docs/source/quick_start/project.rst
examples/chat/chat.sh
examples/chat/test_chat.py
examples/grm_training/run_grm_vl.sh
examples/grm_training/test_grm_vl.py
examples/grm_training/train_grm_vl.py
examples/gsm8k_geo3k/reward_models_utils.py
examples/gsm8k_geo3k/run_grpo_geo3k_qwen2.5_vl_7b.sh
examples/gsm8k_geo3k/run_grpo_geo3k_qwen2.5_vl_7b_2.sh
examples/gsm8k_geo3k/run_grpo_gsm8k_qwen2.5_0.5b.sh
examples/gsm8k_geo3k/train_colocate.py
examples/gsm8k_geo3k/data_preprocess/geo3k.py
examples/gsm8k_geo3k/data_preprocess/gsm8k.py
examples/srm_training/run_srm_al.sh
examples/srm_training/run_srm_vl.sh
examples/srm_training/test_srm_al.py
examples/srm_training/test_srm_vl.py
examples/srm_training/train_srm_al.py
examples/srm_training/train_srm_vl.py
lightrft/__init__.py