.gitignore
.pre-commit-config.yaml
.readthedocs.yaml
CONTRIBUTING.md
LICENSE
README-zh.md
README.md
pyproject.toml
requirements-npu.txt
requirements.txt
setup.py
./docs/conf.py
./examples/custom_pipeline_example/custom_grpo.py
./examples/custom_reward/rewardfunc_gsm8k.py
./examples/data_preprocess/deepscaler.py
./examples/data_preprocess/geo3k.py
./examples/data_preprocess/gsm8k.py
./examples/data_preprocess/math_dataset.py
./examples/data_preprocess/mm_eureka.py
./examples/experimental/marft/config/code_env.py
./examples/experimental/marft/config/math_env.py
./examples/experimental/marft/config/process.py
./siirl/__init__.py
./siirl/_version.py
./siirl/main_dag.py
./siirl/dag_worker/__init__.py
./siirl/dag_worker/checkpoint_manager.py
./siirl/dag_worker/constants.py
./siirl/dag_worker/core_algos.py
./siirl/dag_worker/dag_utils.py
./siirl/dag_worker/dagworker.py
./siirl/dag_worker/data_structures.py
./siirl/dag_worker/metric_aggregator.py
./siirl/dag_worker/metrics_collector.py
./siirl/dag_worker/validator.py
./siirl/data_coordinator/__init__.py
./siirl/data_coordinator/data_buffer.py
./siirl/data_coordinator/protocol.py
./siirl/data_coordinator/sample.py
./siirl/data_coordinator/dataloader/__init__.py
./siirl/data_coordinator/dataloader/data_loader_node.py
./siirl/data_coordinator/dataloader/embodied_preprocess.py
./siirl/data_coordinator/dataloader/partitioned_dataset.py
./siirl/data_coordinator/dataloader/vision_utils.py
./siirl/engine/__init__.py
./siirl/engine/fsdp_workers.py
./siirl/engine/megatron_workers.py
./siirl/engine/actor/__init__.py
./siirl/engine/actor/base.py
./siirl/engine/actor/dp_actor.py
./siirl/engine/actor/embodied_actor.py
./siirl/engine/actor/megatron_actor.py
./siirl/engine/base_worker/__init__.py
./siirl/engine/base_worker/resouce_pool.py
./siirl/engine/base_worker/base/__init__.py
./siirl/engine/base_worker/base/worker.py
./siirl/engine/base_worker/megatron/__init__.py
./siirl/engine/base_worker/megatron/npu_mbridge_patch.py
./siirl/engine/base_worker/megatron/worker.py
./siirl/engine/base_worker/register_center/__init__.py
./siirl/engine/base_worker/register_center/register_center.py
./siirl/engine/critic/__init__.py
./siirl/engine/critic/base.py
./siirl/engine/critic/dp_critic.py
./siirl/engine/critic/megatron_critic.py
./siirl/engine/reward_manager/__init__.py
./siirl/engine/reward_manager/dapo.py
./siirl/engine/reward_manager/embodied.py
./siirl/engine/reward_manager/naive.py
./siirl/engine/reward_manager/parallel.py
./siirl/engine/reward_model/__init__.py
./siirl/engine/reward_model/base.py
./siirl/engine/reward_model/megatron/__init__.py
./siirl/engine/reward_model/megatron/reward_model.py
./siirl/engine/rollout/__init__.py
./siirl/engine/rollout/async_server.py
./siirl/engine/rollout/base.py
./siirl/engine/rollout/embodied_rollout.py
./siirl/engine/rollout/hf_rollout.py
./siirl/engine/rollout/schemas.py
./siirl/engine/rollout/sglang_rollout/__init__.py
./siirl/engine/rollout/sglang_rollout/async_sglang_server.py
./siirl/engine/rollout/sglang_rollout/sglang_rollout.py
./siirl/engine/rollout/sglang_rollout/utils.py
./siirl/engine/rollout/vllm_rollout/__init__.py
./siirl/engine/rollout/vllm_rollout/vllm_async_server.py
./siirl/engine/rollout/vllm_rollout/vllm_rollout_spmd.py
./siirl/engine/sharding_manager/__init__.py
./siirl/engine/sharding_manager/base.py
./siirl/engine/sharding_manager/fsdp_hf.py
./siirl/engine/sharding_manager/fsdp_sglang.py
./siirl/engine/sharding_manager/fsdp_ulysses.py
./siirl/engine/sharding_manager/fsdp_vllm.py
./siirl/engine/sharding_manager/megatron_sglang.py
./siirl/engine/sharding_manager/megatron_vllm.py
./siirl/environment/embodied/__init__.py
./siirl/environment/embodied/base.py
./siirl/environment/embodied/venv.py
./siirl/environment/embodied/adapters/__init__.py
./siirl/environment/embodied/adapters/libero.py
./siirl/execution/dag/__init__.py
./siirl/execution/dag/builtin_pipelines.py
./siirl/execution/dag/config_loader.py
./siirl/execution/dag/node.py
./siirl/execution/dag/pipeline.py
./siirl/execution/dag/task_graph.py
./siirl/execution/dag/task_loader.py
./siirl/execution/metric_worker/metric_worker.py
./siirl/execution/metric_worker/utils.py
./siirl/execution/rollout_flow/multi_agent/multiagent_generate.py
./siirl/execution/rollout_flow/multi_agent/utils.py
./siirl/execution/rollout_flow/multiturn/__init__.py
./siirl/execution/rollout_flow/multiturn/agent_loop/__init__.py
./siirl/execution/rollout_flow/multiturn/agent_loop/agent_loop.py
./siirl/execution/rollout_flow/multiturn/agent_loop/single_turn_agent_loop.py
./siirl/execution/rollout_flow/multiturn/agent_loop/tool_agent_loop.py
./siirl/execution/rollout_flow/multiturn/interactions/__init__.py
./siirl/execution/rollout_flow/multiturn/interactions/base.py
./siirl/execution/rollout_flow/multiturn/interactions/gsm8k_interaction.py
./siirl/execution/rollout_flow/multiturn/interactions/utils/__init__.py
./siirl/execution/rollout_flow/multiturn/interactions/utils/interaction_registry.py
./siirl/execution/rollout_flow/multiturn/tools/__init__.py
./siirl/execution/rollout_flow/multiturn/tools/base_tool.py
./siirl/execution/rollout_flow/multiturn/tools/geo3k_tool.py
./siirl/execution/rollout_flow/multiturn/tools/gsm8k_tool.py
./siirl/execution/rollout_flow/multiturn/tools/mcp_base_tool.py
./siirl/execution/rollout_flow/multiturn/tools/mcp_search_tool.py
./siirl/execution/rollout_flow/multiturn/tools/sandbox_fusion_tools.py
./siirl/execution/rollout_flow/multiturn/tools/schemas.py
./siirl/execution/rollout_flow/multiturn/tools/search_tool.py
./siirl/execution/rollout_flow/multiturn/tools/utils/__init__.py
./siirl/execution/rollout_flow/multiturn/tools/utils/search_r1_like_utils.py
./siirl/execution/rollout_flow/multiturn/tools/utils/tool_registry.py
./siirl/execution/rollout_flow/multiturn/tools/utils/mcp_clients/McpClientManager.py
./siirl/execution/rollout_flow/multiturn/tools/utils/mcp_clients/__init__.py
./siirl/execution/rollout_flow/multiturn/tools/utils/mcp_clients/utils.py
./siirl/execution/scheduler/__init__.py
./siirl/execution/scheduler/enums.py
./siirl/execution/scheduler/graph_updater.py
./siirl/execution/scheduler/launch.py
./siirl/execution/scheduler/process_group_manager.py
./siirl/execution/scheduler/ray_actor_manager.py
./siirl/execution/scheduler/resource_manager.py
./siirl/execution/scheduler/reward.py
./siirl/execution/scheduler/task_scheduler.py
./siirl/models/__init__.py
./siirl/models/loader.py
./siirl/models/patcher.py
./siirl/models/registry.py
./siirl/models/weight_loader_registry.py
./siirl/models/embodied/openvla/__init__.py
./siirl/models/embodied/openvla/configuration_prismatic.py
./siirl/models/embodied/openvla/modeling_prismatic.py
./siirl/models/embodied/openvla/processing_prismatic.py
./siirl/models/embodied/openvla_oft/__init__.py
./siirl/models/embodied/openvla_oft/configuration_prismatic.py
./siirl/models/embodied/openvla_oft/constants.py
./siirl/models/embodied/openvla_oft/modeling_prismatic.py
./siirl/models/embodied/openvla_oft/processing_prismatic.py
./siirl/models/embodied/openvla_oft/train_utils.py
./siirl/models/llama/__init__.py
./siirl/models/llama/megatron/__init__.py
./siirl/models/llama/megatron/modeling_llama_megatron.py
./siirl/models/llama/megatron/checkpoint_utils/__init__.py
./siirl/models/llama/megatron/checkpoint_utils/llama_loader.py
./siirl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
./siirl/models/llama/megatron/checkpoint_utils/llama_saver.py
./siirl/models/llama/megatron/layers/__init__.py
./siirl/models/llama/megatron/layers/parallel_attention.py
./siirl/models/llama/megatron/layers/parallel_decoder.py
./siirl/models/llama/megatron/layers/parallel_linear.py
./siirl/models/llama/megatron/layers/parallel_mlp.py
./siirl/models/llama/megatron/layers/parallel_rmsnorm.py
./siirl/models/mcore/__init__.py
./siirl/models/mcore/config_converter.py
./siirl/models/mcore/loader.py
./siirl/models/mcore/mbridge.py
./siirl/models/mcore/model_forward.py
./siirl/models/mcore/model_forward_fused.py
./siirl/models/mcore/model_initializer.py
./siirl/models/mcore/patch_v012.py
./siirl/models/mcore/registry.py
./siirl/models/mcore/saver.py
./siirl/models/mcore/util.py
./siirl/models/mcore/weight_converter.py
./siirl/models/model_utils/__init__.py
./siirl/models/model_utils/visual.py
./siirl/models/qwen2/__init__.py
./siirl/models/qwen2/megatron/__init__.py
./siirl/models/qwen2/megatron/modeling_qwen2_megatron.py
./siirl/models/qwen2/megatron/checkpoint_utils/__init__.py
./siirl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
./siirl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
./siirl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
./siirl/models/qwen2/megatron/layers/__init__.py
./siirl/models/qwen2/megatron/layers/parallel_attention.py
./siirl/models/qwen2/megatron/layers/parallel_decoder.py
./siirl/models/qwen2/megatron/layers/parallel_linear.py
./siirl/models/qwen2/megatron/layers/parallel_mlp.py
./siirl/models/qwen2/megatron/layers/parallel_rmsnorm.py
./siirl/models/transformers/__init__.py
./siirl/models/transformers/internvl.py
./siirl/models/transformers/kimi_vl.py
./siirl/models/transformers/llama.py
./siirl/models/transformers/monkey_patch.py
./siirl/models/transformers/npu_patch.py
./siirl/models/transformers/qwen2.py
./siirl/models/transformers/qwen2_5_vl.py
./siirl/models/transformers/qwen2_vl.py
./siirl/models/transformers/transformers_compat.py
./siirl/models/transformers/internvl_chat/__init__.py
./siirl/models/transformers/internvl_chat/configuration_intern_vit.py
./siirl/models/transformers/internvl_chat/configuration_internlm2.py
./siirl/models/transformers/internvl_chat/configuration_internvl_chat.py
./siirl/models/transformers/internvl_chat/modeling_intern_vit.py
./siirl/models/transformers/internvl_chat/modeling_internlm2.py
./siirl/models/transformers/internvl_chat/modeling_internvl_chat.py
./siirl/models/transformers/internvl_chat/tokenization_internlm2.py
./siirl/models/transformers/internvl_chat/tokenization_internlm2_fast.py
./siirl/params/__init__.py
./siirl/params/dag_args.py
./siirl/params/data_args.py
./siirl/params/display_dict.py
./siirl/params/embodied_args.py
./siirl/params/model_args.py
./siirl/params/parser.py
./siirl/params/profiler_args.py
./siirl/params/training_args.py
./siirl/third_party/__init__.py
./siirl/third_party/sglang/__init__.py
./siirl/third_party/sglang/parallel_state.py
./siirl/user_interface/filter_interface/__init__.py
./siirl/user_interface/filter_interface/dapo.py
./siirl/user_interface/filter_interface/embodied.py
./siirl/user_interface/rewards_interface/custom_gsm8k_reward.py
./siirl/utils/__init__.py
./siirl/utils/import_string.py
./siirl/utils/memory_utils.py
./siirl/utils/checkpoint/__init__.py
./siirl/utils/checkpoint/checkpoint_manager.py
./siirl/utils/checkpoint/fsdp_checkpoint_manager.py
./siirl/utils/checkpoint/megatron_checkpoint_manager.py
./siirl/utils/debug/__init__.py
./siirl/utils/debug/mstx_profile.py
./siirl/utils/debug/performance.py
./siirl/utils/debug/profile.py
./siirl/utils/embodied/__init__.py
./siirl/utils/embodied/libero_utils.py
./siirl/utils/embodied/openvla_utils.py
./siirl/utils/embodied/video_emb.py
./siirl/utils/experimental/__init__.py
./siirl/utils/experimental/torch_functional.py
./siirl/utils/extras/__init__.py
./siirl/utils/extras/device.py
./siirl/utils/extras/fs.py
./siirl/utils/extras/hdfs_io.py
./siirl/utils/extras/import_utils.py
./siirl/utils/extras/misc.py
./siirl/utils/extras/net_utils.py
./siirl/utils/extras/packages.py
./siirl/utils/extras/patch.py
./siirl/utils/extras/py_functional.py
./siirl/utils/extras/ray_utils.py
./siirl/utils/kernel/__init__.py
./siirl/utils/kernel/kernels.py
./siirl/utils/kernel/linear_cross_entropy.py
./siirl/utils/logger/__init__.py
./siirl/utils/logger/aggregate_logger.py
./siirl/utils/logger/logging_utils.py
./siirl/utils/logger/tracking.py
./siirl/utils/megatron/__init__.py
./siirl/utils/megatron/dist_checkpointing.py
./siirl/utils/megatron/megatron_utils.py
./siirl/utils/megatron/memory.py
./siirl/utils/megatron/memory_buffer.py
./siirl/utils/megatron/optimizer.py
./siirl/utils/megatron/pipeline_parallel.py
./siirl/utils/megatron/sequence_parallel.py
./siirl/utils/megatron/tensor_parallel.py
./siirl/utils/metrics/__init__.py
./siirl/utils/metrics/metric_utils.py
./siirl/utils/model_utils/__init__.py
./siirl/utils/model_utils/activation_offload.py
./siirl/utils/model_utils/attention_utils.py
./siirl/utils/model_utils/flops_counter.py
./siirl/utils/model_utils/fsdp_utils.py
./siirl/utils/model_utils/model.py
./siirl/utils/model_utils/npu_utils.py
./siirl/utils/model_utils/seqlen_balancing.py
./siirl/utils/model_utils/tensordict_utils.py
./siirl/utils/model_utils/torch_dtypes.py
./siirl/utils/model_utils/torch_functional.py
./siirl/utils/model_utils/ulysses.py
./siirl/utils/model_utils/vllm_utils.py
./siirl/utils/reward_score/__init__.py
./siirl/utils/reward_score/embodied.py
./siirl/utils/reward_score/geo3k.py
./siirl/utils/reward_score/gsm8k.py
./siirl/utils/reward_score/math.py
./siirl/utils/reward_score/math_batch.py
./siirl/utils/reward_score/math_dapo.py
./siirl/utils/reward_score/math_verify.py
./siirl/utils/reward_score/mm_eureka.py
./siirl/utils/reward_score/search_r1_like_qa_em.py
./siirl/utils/reward_score/prime_code/__init__.py
./siirl/utils/reward_score/prime_code/testing_util.py
./siirl/utils/reward_score/prime_code/utils.py
./siirl/utils/reward_score/prime_math/__init__.py
./siirl/utils/reward_score/prime_math/grader.py
./siirl/utils/reward_score/prime_math/math_normalize.py
./siirl/utils/reward_score/sandbox_fusion/__init__.py
./siirl/utils/reward_score/sandbox_fusion/utils.py
./tests/__init__.py
./tests/dag/test_config_loader.py
./tests/dag/test_node.py
./tests/dag/test_task_graph.py
./tests/dag/test_task_loader.py
./tests/dag_worker/test_dag_worker.py
./tests/dag_worker/test_dapo_merge.py
./tests/dag_worker/test_dapo_pipeline.py
./tests/data_buffer/detailed_put_performance_test.py
./tests/data_buffer/performance_test_data_buffer.py
./tests/data_buffer/test_data_buffer.py
./tests/scheduler/test_process_group_manager.py
./tests/scheduler/test_task_scheduler.py
asset/batch_size_total_throughput_final.png
asset/context_length_comparison_with_oom_label.png
asset/cube.jpg
asset/grpo_performance_comparison.png
asset/logo-feishu.png
asset/logo-wechat.png
asset/overview.png
asset/ppo_performance_comparison.png
asset/reward_and_entropy_comparison_final.png
asset/scaling_trend_new.png
asset/sii.png
asset/siiRL-feishu-group.png
asset/siiRL-wechat-group.png
asset/code_explained/dag_init.png
asset/code_explained/dag_worker.png
asset/code_explained/data_buffer_loop.png
asset/code_explained/data_loader.png
asset/code_explained/dist_pg.png
asset/code_explained/overview_diagram.png
asset/code_explained/pipeline.png
asset/code_explained/ray_trainer.png
asset/code_explained/siirl_arch.png
asset/code_explained/taskgraph_sched.png
asset/code_explained/train_init.png
docker/Dockerfile.cu124
docker/Dockerfile.cu126
docs/Makefile
docs/conf.py
docs/index.rst
docs/requirements-docs.txt
docs/_static/cube.jpg
docs/examples/config.rst
docs/examples/cpgd_example.rst
docs/examples/deepscaler_example.rst
docs/examples/embodied_srpo_example.rst
docs/examples/megatron_backend_example.rst
docs/examples/mm_eureka_example.rst
docs/hardware_tutorial/ascend_profiling_en.rst
docs/hardware_tutorial/ascend_quickstart.rst
docs/hardware_tutorial/metax_quickstart.rst
docs/preparation/prepare_data.rst
docs/preparation/reward_function.rst
docs/programming_guide/code_structure.rst
docs/programming_guide/siiRL_code_explained.rst
docs/programming_guide/siirl_architecture_guide.rst
docs/programming_guide/srpo_code_explained.rst
docs/start/install.rst
docs/start/quickstart.rst
docs/user_interface/filter_interface.rst
docs/user_interface/metrics_interface.rst
docs/user_interface/pipeline_interface.rst
docs/user_interface/reward_interface.rst
examples/cpgd_trainer/run_qwen2_5-7b.sh
examples/cpgd_trainer/run_qwen2_5_vl-72b.sh
examples/cpgd_trainer/run_qwen2_5_vl-7b.sh
examples/cpgd_trainer/run_qwen3-1.7b.sh
examples/cpgd_trainer/run_qwen3-8b.sh
examples/custom_pipeline_example/custom_grpo.py
examples/custom_reward/rewardfunc_gsm8k.py
examples/custom_reward/run_qwen2_5-7b-custom_reward.sh
examples/dapo_trainer/run_qwen2_5-7b.sh
examples/dapo_trainer/run_qwen3-235b-megatron-gspo.sh
examples/dapo_trainer/run_qwen3-8b.sh
examples/data_preprocess/deepscaler.py
examples/data_preprocess/geo3k.py
examples/data_preprocess/gsm8k.py
examples/data_preprocess/math_dataset.py
examples/data_preprocess/mm_eureka.py
examples/embodied_srpo_trainer/run_openvla_oft_libero_goal.sh
examples/embodied_srpo_trainer/run_openvla_oft_libero_long.sh
examples/embodied_srpo_trainer/run_openvla_oft_libero_object.sh
examples/embodied_srpo_trainer/run_openvla_oft_libero_spatial.sh
examples/experimental/marft/run_qwen2_5-3b_marft.sh
examples/experimental/marft/config/code_env.py
examples/experimental/marft/config/math_env.py
examples/experimental/marft/config/process.py
examples/experimental/marft/config/workflow_marft.yaml
examples/experimental/marft/config/workflow_marft_code.yaml
examples/experimental/multiturn_server/run_qwen2_5-3b_grpo_multiturn_vllm.sh
examples/grpo_trainer/run_qwen2_5-32b-metax.sh
examples/grpo_trainer/run_qwen2_5-32b-npu.sh
examples/grpo_trainer/run_qwen2_5-72b-npu.sh
examples/grpo_trainer/run_qwen2_5-7b-npu-e2e_prof.sh
examples/grpo_trainer/run_qwen2_5-7b-npu-mindspeed.sh
examples/grpo_trainer/run_qwen2_5-7b-npu.sh
examples/grpo_trainer/run_qwen2_5-7b.sh
examples/grpo_trainer/run_qwen2_5_vl-72b.sh
examples/grpo_trainer/run_qwen2_5_vl-7b-npu.sh
examples/grpo_trainer/run_qwen2_5_vl-7b.sh
examples/grpo_trainer/run_qwen3-235b-megatron.sh
examples/grpo_trainer/run_qwen3-235b-npu-mindspeed.sh
examples/grpo_trainer/run_qwen3-30b-npu-mindspeed.sh
examples/grpo_trainer/run_qwen3-8b-megatron.sh
examples/grpo_trainer/run_qwen3-8b.sh
examples/gspo_trainer/run_qwen3-1.7b.sh
examples/gspo_trainer/run_qwen3-235b-megatron.sh
examples/gspo_trainer/run_qwen3-30b-gspo-megatron.sh
examples/multi_turn/config/interaction_config/gsm8k_interaction_config.yaml
examples/multi_turn/config/tool_config/gsm8k_tool_config.yaml
examples/multi_turn/gsm8k/run_qwen2_5-3b_grpo_multiturn_sglang.sh
examples/ppo_trainer/run_qwen2_5-72b.sh
examples/ppo_trainer/run_qwen3-8b-megatron.sh
examples/ppo_trainer/run_qwen3-8b.sh
siirl/__init__.py
siirl/main_dag.py
siirl.egg-info/PKG-INFO
siirl.egg-info/SOURCES.txt
siirl.egg-info/dependency_links.txt
siirl.egg-info/requires.txt
siirl.egg-info/top_level.txt
siirl/dag_worker/__init__.py
siirl/dag_worker/checkpoint_manager.py
siirl/dag_worker/constants.py
siirl/dag_worker/core_algos.py
siirl/dag_worker/dag_utils.py
siirl/dag_worker/dagworker.py
siirl/dag_worker/data_structures.py
siirl/dag_worker/metric_aggregator.py
siirl/dag_worker/metrics_collector.py
siirl/dag_worker/validator.py
siirl/data_coordinator/__init__.py
siirl/data_coordinator/data_buffer.py
siirl/data_coordinator/protocol.py
siirl/data_coordinator/sample.py
siirl/data_coordinator/dataloader/__init__.py
siirl/data_coordinator/dataloader/data_loader_node.py
siirl/data_coordinator/dataloader/embodied_preprocess.py
siirl/data_coordinator/dataloader/partitioned_dataset.py
siirl/data_coordinator/dataloader/vision_utils.py
siirl/engine/__init__.py
siirl/engine/fsdp_workers.py
siirl/engine/megatron_workers.py
siirl/engine/actor/__init__.py
siirl/engine/actor/base.py
siirl/engine/actor/dp_actor.py
siirl/engine/actor/embodied_actor.py
siirl/engine/actor/megatron_actor.py
siirl/engine/base_worker/__init__.py
siirl/engine/base_worker/resouce_pool.py
siirl/engine/base_worker/base/__init__.py
siirl/engine/base_worker/base/worker.py
siirl/engine/base_worker/megatron/__init__.py
siirl/engine/base_worker/megatron/npu_mbridge_patch.py
siirl/engine/base_worker/megatron/worker.py
siirl/engine/base_worker/register_center/__init__.py
siirl/engine/base_worker/register_center/register_center.py
siirl/engine/critic/__init__.py
siirl/engine/critic/base.py
siirl/engine/critic/dp_critic.py
siirl/engine/critic/megatron_critic.py
siirl/engine/reward_manager/__init__.py
siirl/engine/reward_manager/dapo.py
siirl/engine/reward_manager/embodied.py
siirl/engine/reward_manager/naive.py
siirl/engine/reward_manager/parallel.py
siirl/engine/reward_model/__init__.py
siirl/engine/reward_model/base.py
siirl/engine/reward_model/megatron/__init__.py
siirl/engine/reward_model/megatron/reward_model.py
siirl/engine/rollout/__init__.py
siirl/engine/rollout/async_server.py
siirl/engine/rollout/base.py
siirl/engine/rollout/embodied_rollout.py
siirl/engine/rollout/hf_rollout.py
siirl/engine/rollout/schemas.py
siirl/engine/rollout/sglang_rollout/__init__.py
siirl/engine/rollout/sglang_rollout/async_sglang_server.py
siirl/engine/rollout/sglang_rollout/sglang_rollout.py
siirl/engine/rollout/sglang_rollout/utils.py
siirl/engine/rollout/vllm_rollout/__init__.py
siirl/engine/rollout/vllm_rollout/vllm_async_server.py
siirl/engine/rollout/vllm_rollout/vllm_rollout_spmd.py
siirl/engine/sharding_manager/__init__.py
siirl/engine/sharding_manager/base.py
siirl/engine/sharding_manager/fsdp_hf.py
siirl/engine/sharding_manager/fsdp_sglang.py
siirl/engine/sharding_manager/fsdp_ulysses.py
siirl/engine/sharding_manager/fsdp_vllm.py
siirl/engine/sharding_manager/megatron_sglang.py
siirl/engine/sharding_manager/megatron_vllm.py
siirl/environment/embodied/__init__.py
siirl/environment/embodied/base.py
siirl/environment/embodied/venv.py
siirl/environment/embodied/adapters/__init__.py
siirl/environment/embodied/adapters/libero.py
siirl/execution/dag/__init__.py
siirl/execution/dag/builtin_pipelines.py
siirl/execution/dag/config_loader.py
siirl/execution/dag/node.py
siirl/execution/dag/pipeline.py
siirl/execution/dag/task_graph.py
siirl/execution/dag/task_loader.py
siirl/execution/metric_worker/metric_worker.py
siirl/execution/metric_worker/utils.py
siirl/execution/rollout_flow/multi_agent/multiagent_generate.py
siirl/execution/rollout_flow/multi_agent/utils.py
siirl/execution/rollout_flow/multiturn/__init__.py
siirl/execution/rollout_flow/multiturn/agent_loop/__init__.py
siirl/execution/rollout_flow/multiturn/agent_loop/agent_loop.py
siirl/execution/rollout_flow/multiturn/agent_loop/single_turn_agent_loop.py
siirl/execution/rollout_flow/multiturn/agent_loop/tool_agent_loop.py
siirl/execution/rollout_flow/multiturn/interactions/__init__.py
siirl/execution/rollout_flow/multiturn/interactions/base.py
siirl/execution/rollout_flow/multiturn/interactions/gsm8k_interaction.py
siirl/execution/rollout_flow/multiturn/interactions/utils/__init__.py
siirl/execution/rollout_flow/multiturn/interactions/utils/interaction_registry.py
siirl/execution/rollout_flow/multiturn/tools/__init__.py
siirl/execution/rollout_flow/multiturn/tools/base_tool.py
siirl/execution/rollout_flow/multiturn/tools/geo3k_tool.py
siirl/execution/rollout_flow/multiturn/tools/gsm8k_tool.py
siirl/execution/rollout_flow/multiturn/tools/mcp_base_tool.py
siirl/execution/rollout_flow/multiturn/tools/mcp_search_tool.py
siirl/execution/rollout_flow/multiturn/tools/sandbox_fusion_tools.py
siirl/execution/rollout_flow/multiturn/tools/schemas.py
siirl/execution/rollout_flow/multiturn/tools/search_tool.py
siirl/execution/rollout_flow/multiturn/tools/utils/__init__.py
siirl/execution/rollout_flow/multiturn/tools/utils/search_r1_like_utils.py
siirl/execution/rollout_flow/multiturn/tools/utils/tool_registry.py
siirl/execution/rollout_flow/multiturn/tools/utils/mcp_clients/McpClientManager.py
siirl/execution/rollout_flow/multiturn/tools/utils/mcp_clients/__init__.py
siirl/execution/rollout_flow/multiturn/tools/utils/mcp_clients/utils.py
siirl/execution/scheduler/__init__.py
siirl/execution/scheduler/enums.py
siirl/execution/scheduler/graph_updater.py
siirl/execution/scheduler/launch.py
siirl/execution/scheduler/process_group_manager.py
siirl/execution/scheduler/ray_actor_manager.py
siirl/execution/scheduler/resource_manager.py
siirl/execution/scheduler/reward.py
siirl/execution/scheduler/task_scheduler.py
siirl/models/__init__.py
siirl/models/loader.py
siirl/models/patcher.py
siirl/models/registry.py
siirl/models/weight_loader_registry.py
siirl/models/embodied/openvla/__init__.py
siirl/models/embodied/openvla/configuration_prismatic.py
siirl/models/embodied/openvla/modeling_prismatic.py
siirl/models/embodied/openvla/processing_prismatic.py
siirl/models/embodied/openvla_oft/__init__.py
siirl/models/embodied/openvla_oft/configuration_prismatic.py
siirl/models/embodied/openvla_oft/constants.py
siirl/models/embodied/openvla_oft/modeling_prismatic.py
siirl/models/embodied/openvla_oft/processing_prismatic.py
siirl/models/embodied/openvla_oft/train_utils.py
siirl/models/llama/__init__.py
siirl/models/llama/megatron/__init__.py
siirl/models/llama/megatron/modeling_llama_megatron.py
siirl/models/llama/megatron/checkpoint_utils/__init__.py
siirl/models/llama/megatron/checkpoint_utils/llama_loader.py
siirl/models/llama/megatron/checkpoint_utils/llama_loader_depracated.py
siirl/models/llama/megatron/checkpoint_utils/llama_saver.py
siirl/models/llama/megatron/layers/__init__.py
siirl/models/llama/megatron/layers/parallel_attention.py
siirl/models/llama/megatron/layers/parallel_decoder.py
siirl/models/llama/megatron/layers/parallel_linear.py
siirl/models/llama/megatron/layers/parallel_mlp.py
siirl/models/llama/megatron/layers/parallel_rmsnorm.py
siirl/models/mcore/__init__.py
siirl/models/mcore/config_converter.py
siirl/models/mcore/loader.py
siirl/models/mcore/mbridge.py
siirl/models/mcore/model_forward.py
siirl/models/mcore/model_forward_fused.py
siirl/models/mcore/model_initializer.py
siirl/models/mcore/patch_v012.py
siirl/models/mcore/registry.py
siirl/models/mcore/saver.py
siirl/models/mcore/util.py
siirl/models/mcore/weight_converter.py
siirl/models/model_utils/__init__.py
siirl/models/model_utils/visual.py
siirl/models/qwen2/__init__.py
siirl/models/qwen2/megatron/__init__.py
siirl/models/qwen2/megatron/modeling_qwen2_megatron.py
siirl/models/qwen2/megatron/checkpoint_utils/__init__.py
siirl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py
siirl/models/qwen2/megatron/checkpoint_utils/qwen2_loader_depracated.py
siirl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py
siirl/models/qwen2/megatron/layers/__init__.py
siirl/models/qwen2/megatron/layers/parallel_attention.py
siirl/models/qwen2/megatron/layers/parallel_decoder.py
siirl/models/qwen2/megatron/layers/parallel_linear.py
siirl/models/qwen2/megatron/layers/parallel_mlp.py
siirl/models/qwen2/megatron/layers/parallel_rmsnorm.py
siirl/models/transformers/__init__.py
siirl/models/transformers/internvl.py
siirl/models/transformers/kimi_vl.py
siirl/models/transformers/llama.py
siirl/models/transformers/monkey_patch.py
siirl/models/transformers/npu_patch.py
siirl/models/transformers/qwen2.py
siirl/models/transformers/qwen2_5_vl.py
siirl/models/transformers/qwen2_vl.py
siirl/models/transformers/transformers_compat.py
siirl/models/transformers/internvl_chat/__init__.py
siirl/models/transformers/internvl_chat/configuration_intern_vit.py
siirl/models/transformers/internvl_chat/configuration_internlm2.py
siirl/models/transformers/internvl_chat/configuration_internvl_chat.py
siirl/models/transformers/internvl_chat/modeling_intern_vit.py
siirl/models/transformers/internvl_chat/modeling_internlm2.py
siirl/models/transformers/internvl_chat/modeling_internvl_chat.py
siirl/models/transformers/internvl_chat/tokenization_internlm2.py
siirl/models/transformers/internvl_chat/tokenization_internlm2_fast.py
siirl/params/__init__.py
siirl/params/dag_args.py
siirl/params/data_args.py
siirl/params/display_dict.py
siirl/params/embodied_args.py
siirl/params/model_args.py
siirl/params/parser.py
siirl/params/profiler_args.py
siirl/params/training_args.py
siirl/third_party/__init__.py
siirl/third_party/sglang/__init__.py
siirl/third_party/sglang/parallel_state.py
siirl/user_interface/filter_interface/__init__.py
siirl/user_interface/filter_interface/dapo.py
siirl/user_interface/filter_interface/embodied.py
siirl/user_interface/rewards_interface/custom_gsm8k_reward.py
siirl/utils/__init__.py
siirl/utils/import_string.py
siirl/utils/memory_utils.py
siirl/utils/checkpoint/__init__.py
siirl/utils/checkpoint/checkpoint_manager.py
siirl/utils/checkpoint/fsdp_checkpoint_manager.py
siirl/utils/checkpoint/megatron_checkpoint_manager.py
siirl/utils/debug/__init__.py
siirl/utils/debug/mstx_profile.py
siirl/utils/debug/performance.py
siirl/utils/debug/profile.py
siirl/utils/embodied/__init__.py
siirl/utils/embodied/libero_utils.py
siirl/utils/embodied/openvla_utils.py
siirl/utils/embodied/video_emb.py
siirl/utils/experimental/__init__.py
siirl/utils/experimental/torch_functional.py
siirl/utils/extras/__init__.py
siirl/utils/extras/device.py
siirl/utils/extras/fs.py
siirl/utils/extras/hdfs_io.py
siirl/utils/extras/import_utils.py
siirl/utils/extras/misc.py
siirl/utils/extras/net_utils.py
siirl/utils/extras/packages.py
siirl/utils/extras/patch.py
siirl/utils/extras/py_functional.py
siirl/utils/extras/ray_utils.py
siirl/utils/kernel/__init__.py
siirl/utils/kernel/kernels.py
siirl/utils/kernel/linear_cross_entropy.py
siirl/utils/logger/__init__.py
siirl/utils/logger/aggregate_logger.py
siirl/utils/logger/logging_utils.py
siirl/utils/logger/tracking.py
siirl/utils/megatron/__init__.py
siirl/utils/megatron/dist_checkpointing.py
siirl/utils/megatron/megatron_utils.py
siirl/utils/megatron/memory.py
siirl/utils/megatron/memory_buffer.py
siirl/utils/megatron/optimizer.py
siirl/utils/megatron/pipeline_parallel.py
siirl/utils/megatron/sequence_parallel.py
siirl/utils/megatron/tensor_parallel.py
siirl/utils/metrics/__init__.py
siirl/utils/metrics/metric_utils.py
siirl/utils/model_utils/__init__.py
siirl/utils/model_utils/activation_offload.py
siirl/utils/model_utils/attention_utils.py
siirl/utils/model_utils/flops_counter.py
siirl/utils/model_utils/fsdp_utils.py
siirl/utils/model_utils/model.py
siirl/utils/model_utils/npu_utils.py
siirl/utils/model_utils/seqlen_balancing.py
siirl/utils/model_utils/tensordict_utils.py
siirl/utils/model_utils/torch_dtypes.py
siirl/utils/model_utils/torch_functional.py
siirl/utils/model_utils/ulysses.py
siirl/utils/model_utils/vllm_utils.py
siirl/utils/reward_score/__init__.py
siirl/utils/reward_score/embodied.py
siirl/utils/reward_score/geo3k.py
siirl/utils/reward_score/gsm8k.py
siirl/utils/reward_score/math.py
siirl/utils/reward_score/math_batch.py
siirl/utils/reward_score/math_dapo.py
siirl/utils/reward_score/math_verify.py
siirl/utils/reward_score/mm_eureka.py
siirl/utils/reward_score/search_r1_like_qa_em.py
siirl/utils/reward_score/prime_code/__init__.py
siirl/utils/reward_score/prime_code/testing_util.py
siirl/utils/reward_score/prime_code/utils.py
siirl/utils/reward_score/prime_math/__init__.py
siirl/utils/reward_score/prime_math/grader.py
siirl/utils/reward_score/prime_math/math_normalize.py
siirl/utils/reward_score/sandbox_fusion/__init__.py
siirl/utils/reward_score/sandbox_fusion/utils.py
tests/__init__.py
tests/dag/test_config_loader.py
tests/dag/test_node.py
tests/dag/test_task_graph.py
tests/dag/test_task_loader.py
tests/dag_worker/test_dag_worker.py
tests/dag_worker/test_dapo_merge.py
tests/dag_worker/test_dapo_pipeline.py
tests/data_buffer/detailed_put_performance_test.py
tests/data_buffer/performance_test_data_buffer.py
tests/data_buffer/test_data_buffer.py
tests/scheduler/test_process_group_manager.py
tests/scheduler/test_task_scheduler.py