.gitignore
.pre-commit-config.yaml
CHANGELOG.md
CONTRIBUTING.md
LICENSE
Makefile
README.md
mkdocs.yml
pyproject.toml
.github/workflows/ci.yml
.github/workflows/publish.yml
.idea/.gitignore
.idea/RL.iml
.idea/deployment.xml
.idea/misc.xml
.idea/modules.xml
.idea/prettier.xml
.idea/vcs.xml
.idea/inspectionProfiles/Project_Default.xml
.idea/inspectionProfiles/profiles_settings.xml
configs/a2c/breakout_atari.yaml
configs/a2c/cartpole.yaml
configs/advantage_learning_dqn/cartpole.yaml
configs/agent57/breakout_atari.yaml
configs/agent57/tennis_atari.yaml
configs/apex_dqn/breakout_atari.yaml
configs/apex_dqn/tennis_atari.yaml
configs/apex_dqn/tennis_event_offense.yaml
configs/apex_dqn/tennis_event_offense_v2.yaml
configs/apex_dqn/tennis_event_offense_v3.yaml
configs/apex_dqn/tennis_event_offense_v4.yaml
configs/apex_dqn/tennis_event_offense_v5.yaml
configs/apex_dqn/tennis_event_offense_v5_1.yaml
configs/apex_dqn/tennis_event_offense_v5_1_resume_sparse_ckpt.yaml
configs/apex_dqn/tennis_event_shaped.yaml
configs/apex_dqn/tennis_event_v2.yaml
configs/apex_dqn/tennis_event_v3.yaml
configs/apex_dqn/tennis_event_v4.yaml
configs/apex_dqn/tennis_event_v5.yaml
configs/apex_dqn/tennis_event_v5_1.yaml
configs/apex_dqn/tennis_explore_tuned.yaml
configs/apex_dqn/tennis_reward_lite.yaml
configs/apex_dqn/tennis_stable_lr.yaml
configs/appo/cartpole.yaml
configs/ars/pendulum.yaml
configs/awac/pendulum.yaml
configs/awr/pendulum.yaml
configs/bc/pendulum.yaml
configs/bcq/pendulum.yaml
configs/bear/pendulum.yaml
configs/boltzmann_double_dqn/cartpole.yaml
configs/boltzmann_dqn/cartpole.yaml
configs/c51_dqn/breakout_atari.yaml
configs/c51_dqn/cartpole.yaml
configs/cal_ql/pendulum.yaml
configs/clipped_double_dqn/cartpole.yaml
configs/cql/pendulum.yaml
configs/cql_double_dqn/cartpole.yaml
configs/cql_dqn/cartpole.yaml
configs/crossq/pendulum.yaml
configs/crr/pendulum.yaml
configs/curl/pendulum_pixels.yaml
configs/d4pg/pendulum.yaml
configs/ddpg/pendulum.yaml
configs/decision_transformer/pendulum.yaml
configs/diamond/breakout_atari.yaml
configs/discrete_sac/cartpole.yaml
configs/double_dqn/breakout_atari.yaml
configs/double_dqn/cartpole.yaml
configs/dqn/breakout_atari.yaml
configs/dqn/cartpole.yaml
configs/dreamer/cartpole_pixels.yaml
configs/dreamerv3/breakout_atari.yaml
configs/drq/pendulum_pixels.yaml
configs/drqn/cartpole.yaml
configs/drqv2/pendulum_pixels.yaml
configs/dueling_dqn/breakout_atari.yaml
configs/dueling_dqn/cartpole.yaml
configs/eadream/breakout_atari.yaml
configs/edac/pendulum.yaml
configs/efficientzero/breakout_atari.yaml
configs/efficientzero/tennis_atari.yaml
configs/expected_double_dqn/cartpole.yaml
configs/expected_sarsa/cartpole.yaml
configs/fqf/breakout_atari.yaml
configs/fqf/cartpole.yaml
configs/gail/cartpole.yaml
configs/gumbel_muzero/breakout_atari.yaml
configs/her/point_goal.yaml
configs/horizon_imagination/breakout_atari.yaml
configs/hysteretic_dqn/cartpole.yaml
configs/impala/breakout_atari.yaml
configs/impala/cartpole.yaml
configs/impala/tennis_atari.yaml
configs/iql/pendulum.yaml
configs/iqn/breakout_atari.yaml
configs/iqn/cartpole.yaml
configs/jowa/breakout_atari.yaml
configs/marwil/pendulum.yaml
configs/mbpo/pendulum.yaml
configs/mellowmax_dqn/cartpole.yaml
configs/mopo/pendulum.yaml
configs/mow/breakout_atari.yaml
configs/munchausen_double_dqn/cartpole.yaml
configs/munchausen_dqn/cartpole.yaml
configs/muzero/breakout_atari.yaml
configs/n_step_dqn/breakout_atari.yaml
configs/n_step_dqn/cartpole.yaml
configs/naf/pendulum.yaml
configs/noisy_dqn/breakout_atari.yaml
configs/noisy_dqn/cartpole.yaml
configs/openai_es/pendulum.yaml
configs/persistent_advantage_learning_dqn/cartpole.yaml
configs/pets/pendulum.yaml
configs/po_dreamer/breakout_atari.yaml
configs/ppg/breakout_atari.yaml
configs/ppg/cartpole.yaml
configs/ppo/breakout_atari.yaml
configs/ppo/cartpole.yaml
configs/ppo/tennis_atari.yaml
configs/prioritized_dqn/breakout_atari.yaml
configs/prioritized_dqn/cartpole.yaml
configs/qr_dqn/breakout_atari.yaml
configs/qr_dqn/cartpole.yaml
configs/r2d2/breakout_atari.yaml
configs/r2d2/cartpole.yaml
configs/r2d2/tennis_atari.yaml
configs/rainbow_dqn/breakout_atari.yaml
configs/rainbow_dqn/cartpole.yaml
configs/rainbow_dqn/tennis_atari.yaml
configs/rainbow_dqn/tennis_event_offense.yaml
configs/rainbow_dqn/tennis_event_shaped.yaml
configs/rainbow_dqn/tennis_event_v2.yaml
configs/rainbow_dqn/tennis_no_early_stop.yaml
configs/rainbow_dqn/tennis_reward_lite.yaml
configs/rainbow_dqn/tennis_stable_lr.yaml
configs/rebrac/pendulum.yaml
configs/recurrent_ppo/breakout_atari.yaml
configs/redq/pendulum.yaml
configs/rlpd/pendulum.yaml
configs/sac/pendulum.yaml
configs/scalezero/breakout_atari.yaml
configs/soft_double_dqn/cartpole.yaml
configs/soft_dqn/cartpole.yaml
configs/spr/breakout_atari.yaml
configs/td3/pendulum.yaml
configs/td3_bc/pendulum.yaml
configs/tqc/pendulum.yaml
configs/trpo/cartpole.yaml
configs/twisted/breakout_atari.yaml
configs/xql/pendulum.yaml
docs/changelog.md
docs/compatibility.md
docs/config-schema.md
docs/development.md
docs/faq.md
docs/index.md
docs/run-artifacts.md
docs/algorithms/index.md
docs/algorithms/model-based.md
docs/algorithms/off-policy.md
docs/algorithms/offline.md
docs/algorithms/on-policy.md
docs/api/experimental.md
docs/api/index.md
docs/api/stable-core.md
docs/assets/algorithms.svg
docs/assets/architecture-dark.svg
docs/assets/architecture.svg
docs/assets/banner.svg
docs/assets/logo-dark.svg
docs/assets/logo.svg
docs/assets/css/custom.css
docs/assets/icons/algorithms.svg
docs/assets/icons/cli.svg
docs/assets/icons/modular.svg
docs/assets/icons/reproducible.svg
docs/assets/icons/tensorboard.svg
docs/assets/icons/unified-api.svg
docs/cli/index.md
docs/concepts/architecture.md
docs/concepts/index.md
docs/concepts/training-workflow.md
docs/configuration/index.md
docs/configuration/scheduling.md
docs/configuration/train-config.md
docs/developer/contributing.md
docs/developer/index.md
docs/developer/project-structure.md
docs/getting-started/index.md
docs/getting-started/installation.md
docs/getting-started/quickstart.md
docs/getting-started/requirements.md
docs/guide/checkpointing.md
docs/guide/evaluation.md
docs/guide/index.md
docs/guide/offline-rl.md
docs/guide/pixel-observations.md
docs/guide/training.md
docs/guide/zoo-benchmarks.md
docs/overrides/.gitkeep
docs/plans/2026-03-09-a2c-td3-expansion.md
docs/plans/2026-03-09-eval-resume-workflows.md
docs/plans/2026-03-09-experiment-manager.md
docs/plans/2026-03-09-phase1-bootstrap-closure.md
docs/plans/2026-03-09-phase11-dqn-foundation.md
docs/plans/2026-03-09-phase12-sac-algorithm.md
docs/plans/2026-03-09-phase12-sac-training.md
docs/plans/2026-03-09-rl-package-foundation-design.md
docs/plans/2026-03-09-rl-package-module-contracts.md
docs/plans/2026-03-09-rl-package-roadmap-design.md
docs/plans/2026-03-09-rl-training-package.md
docs/plans/2026-03-09-tensorboard-logging.md
docs/plans/2026-03-09-trainer-runtime-shared.md
docs/plans/2026-03-09-training-callbacks.md
docs/plans/2026-03-10-c51-dqn.md
docs/plans/2026-03-10-ddpg.md
docs/plans/2026-03-10-double-dueling-dqn.md
docs/plans/2026-03-10-n-step-dqn.md
docs/plans/2026-03-10-noisy-dqn.md
docs/plans/2026-03-10-prioritized-dqn.md
docs/plans/2026-03-10-qr-dqn.md
docs/plans/2026-03-10-rainbow-dqn.md
docs/plans/2026-03-11-cql.md
docs/plans/2026-03-11-iql.md
docs/plans/2026-03-11-iqn.md
docs/plans/2026-03-11-redq.md
docs/plans/2026-03-11-td3-bc.md
docs/plans/2026-03-11-tqc.md
docs/plans/2026-03-12-atari-recurrent-ppo-phase1.md
docs/plans/2026-03-12-awac-online-controls-phase3.md
docs/plans/2026-03-12-awr-phase14.md
docs/plans/2026-03-12-bcq-bear-offline-wave-phase5.md
docs/plans/2026-03-12-bcq-bear-phase5.md
docs/plans/2026-03-12-calql-phase10.md
docs/plans/2026-03-12-crr-phase8.md
docs/plans/2026-03-12-data-rewards-controls-phase2.md
docs/plans/2026-03-12-drqv2-phase7.md
docs/plans/2026-03-12-edac-phase12.md
docs/plans/2026-03-12-her-goal-replay-design.md
docs/plans/2026-03-12-her-goal-replay-phase4.md
docs/plans/2026-03-12-mainstream-rl-package-design.md
docs/plans/2026-03-12-marwil-phase15.md
docs/plans/2026-03-12-rebrac-phase9.md
docs/plans/2026-03-12-rl-expansion-roadmap-design.md
docs/plans/2026-03-12-rl-yearly-sourcebook-design.md
docs/plans/2026-03-12-rlpd-phase13.md
docs/plans/2026-03-12-trpo-discrete-sac-crossq-phase6.md
docs/plans/2026-03-12-xql-phase11.md
docs/plans/2026-03-13-appo-v1.md
docs/plans/2026-03-13-ars-v1.md
docs/plans/2026-03-13-curl-v1.md
docs/plans/2026-03-13-decision-transformer-v1.md
docs/plans/2026-03-13-drq-v1.md
docs/plans/2026-03-13-drqn-v1.md
docs/plans/2026-03-13-impala-v1.md
docs/plans/2026-03-13-mopo-v1.md
docs/plans/2026-03-13-openai-es-v1.md
docs/plans/2026-03-13-pets-v1.md
docs/plans/2026-03-13-ppg-v1.md
docs/plans/2026-03-13-r2d2-v1.md
docs/plans/2026-03-13-yearly-gap-batch-naf-d4pg.md
docs/plans/2026-03-16-fqf-mbpo-dreamer-gail.md
docs/plans/2026-03-17-atari-reward-strategy-defaults.md
docs/plans/2026-03-17-benchmark-normalization-best-checkpoint.md
docs/plans/2026-03-17-clip-coef-schedule.md
docs/plans/2026-03-17-entropy-coef-schedule.md
docs/plans/2026-03-17-env-mode-overrides-eval-protocol.md
docs/plans/2026-03-17-evaluation-video-wrapper.md
docs/plans/2026-03-17-num-simulations-schedule.md
docs/plans/2026-03-17-reward-strategy-config.md
docs/plans/2026-03-17-root-exploration-fraction-schedule.md
docs/plans/2026-03-17-temperature-schedule.md
docs/plans/2026-03-17-zoo-benchmark-report.md
docs/plans/2026-03-17-zoo-manifest-config-plumbing.md
docs/plans/2026-03-18-zoo-baseline-summary.md
docs/plans/2026-03-18-zoo-fail-on-manifest-drift-severity.md
docs/plans/2026-03-18-zoo-fail-on-manifest-drift-types.md
docs/plans/2026-03-18-zoo-fail-on-manifest-drift.md
docs/plans/2026-03-18-zoo-leaderboard-baseline.md
docs/plans/2026-03-18-zoo-leaderboard-compare-to.md
docs/plans/2026-03-18-zoo-leaderboard-confidence.md
docs/plans/2026-03-18-zoo-leaderboard-metric-modes.md
docs/plans/2026-03-18-zoo-leaderboard-ranks-and-seed-count.md
docs/plans/2026-03-18-zoo-leaderboard-robustness.md
docs/plans/2026-03-18-zoo-leaderboard-score-view.md
docs/plans/2026-03-18-zoo-leaderboard-stability.md
docs/plans/2026-03-18-zoo-manifest-drift-severity.md
docs/plans/2026-03-18-zoo-manifest-drift.md
docs/plans/2026-03-18-zoo-manifest-fail-reasons.md
docs/plans/2026-03-18-zoo-manifest-fingerprint.md
docs/plans/2026-03-18-zoo-manifest-source.md
docs/plans/2026-03-18-zoo-min-seeds-and-normalized-leaderboard.md
docs/plans/2026-03-18-zoo-protocol-metadata.md
docs/plans/2026-03-18-zoo-report-deltas-and-leaderboard.md
docs/plans/2026-03-18-zoo-report-exports-and-filters.md
docs/plans/2026-03-18-zoo-report-grouping-and-topk.md
docs/plans/2026-03-18-zoo-report-output-file.md
docs/plans/2026-03-19-multi-seed-benchmark-runner.md
docs/plans/2026-03-19-test-env-registration-unification.md
docs/plans/2026-03-20-axiomrl-training-engineering-optimization.md
docs/plans/2026-04-04-tennis-atari-preset-expansion-design.md
docs/plans/2026-04-04-tennis-atari-preset-expansion.md
docs/plans/2026-04-05-apex-dqn-tennis-preset.md
docs/plans/2026-04-06-tennis-specialized-tuning-design.md
docs/plans/2026-04-06-tennis-specialized-tuning-plan.md
docs/plans/2026-04-06-tennis-stage1-comparison-template.md
docs/plans/2026-04-07-tennis-event-shaping-design.md
docs/plans/2026-04-07-tennis-event-shaping-plan.md
docs/plans/2026-04-07-tennis-offense-shaping-design.md
docs/plans/2026-04-07-tennis-offense-shaping-plan.md
docs/plans/2026-04-07-tennis-resource-focus-design.md
docs/plans/2026-04-07-tennis-resource-focus-plan.md
docs/research/2026-03-09-rl-package-survey.md
docs/research/2026-03-16-atari-discrete-popular-algorithms-2015-2026.md
docs/research/2026-04-15-tennis-event-experiment-log.md
examples/a2c_cartpole_reference.py
examples/appo_cartpole_reference.py
examples/ars_pendulum_reference.py
examples/c51_dqn_cartpole_reference.py
examples/cql_pendulum_reference.py
examples/curl_pendulum_reference.py
examples/d4pg_pendulum_reference.py
examples/ddpg_pendulum_reference.py
examples/decision_transformer_pendulum_reference.py
examples/double_dqn_cartpole_reference.py
examples/dqn_breakout_atari_reference.py
examples/dqn_cartpole_reference.py
examples/dreamer_cartpole_pixels_reference.py
examples/drq_pendulum_reference.py
examples/drqn_cartpole_reference.py
examples/dueling_dqn_cartpole_reference.py
examples/fqf_cartpole_reference.py
examples/gail_cartpole_reference.py
examples/impala_cartpole_reference.py
examples/iql_pendulum_npz_reference.py
examples/iql_pendulum_reference.py
examples/iqn_cartpole_reference.py
examples/mbpo_pendulum_reference.py
examples/mopo_pendulum_reference.py
examples/n_step_dqn_cartpole_reference.py
examples/naf_pendulum_reference.py
examples/noisy_dqn_cartpole_reference.py
examples/openai_es_pendulum_reference.py
examples/pets_pendulum_reference.py
examples/ppg_cartpole_reference.py
examples/ppo_breakout_atari_reference.py
examples/ppo_cartpole_reference.py
examples/prioritized_dqn_cartpole_reference.py
examples/qr_dqn_cartpole_reference.py
examples/r2d2_cartpole_reference.py
examples/rainbow_dqn_cartpole_reference.py
examples/recurrent_ppo_breakout_atari_reference.py
examples/redq_pendulum_reference.py
examples/sac_pendulum_reference.py
examples/td3_bc_pendulum_reference.py
examples/td3_pendulum_reference.py
examples/tqc_pendulum_reference.py
scripts/benchmark_zoo.py
scripts/tennis_status.py
scripts/train.py
site/404.html
site/index.html
site/sitemap.xml
site/sitemap.xml.gz
site/algorithms/index.html
site/algorithms/model-based/index.html
site/algorithms/off-policy/index.html
site/algorithms/offline/index.html
site/algorithms/on-policy/index.html
site/api/index.html
site/api/experimental/index.html
site/api/stable-core/index.html
site/assets/algorithms.svg
site/assets/architecture.svg
site/assets/banner.svg
site/assets/css/custom.css
site/assets/images/favicon.png
site/assets/javascripts/bundle.79ae519e.min.js
site/assets/javascripts/bundle.79ae519e.min.js.map
site/assets/javascripts/lunr/tinyseg.js
site/assets/javascripts/lunr/wordcut.js
site/assets/javascripts/lunr/min/lunr.ar.min.js
site/assets/javascripts/lunr/min/lunr.da.min.js
site/assets/javascripts/lunr/min/lunr.de.min.js
site/assets/javascripts/lunr/min/lunr.du.min.js
site/assets/javascripts/lunr/min/lunr.el.min.js
site/assets/javascripts/lunr/min/lunr.es.min.js
site/assets/javascripts/lunr/min/lunr.fi.min.js
site/assets/javascripts/lunr/min/lunr.fr.min.js
site/assets/javascripts/lunr/min/lunr.he.min.js
site/assets/javascripts/lunr/min/lunr.hi.min.js
site/assets/javascripts/lunr/min/lunr.hu.min.js
site/assets/javascripts/lunr/min/lunr.hy.min.js
site/assets/javascripts/lunr/min/lunr.it.min.js
site/assets/javascripts/lunr/min/lunr.ja.min.js
site/assets/javascripts/lunr/min/lunr.jp.min.js
site/assets/javascripts/lunr/min/lunr.kn.min.js
site/assets/javascripts/lunr/min/lunr.ko.min.js
site/assets/javascripts/lunr/min/lunr.multi.min.js
site/assets/javascripts/lunr/min/lunr.nl.min.js
site/assets/javascripts/lunr/min/lunr.no.min.js
site/assets/javascripts/lunr/min/lunr.pt.min.js
site/assets/javascripts/lunr/min/lunr.ro.min.js
site/assets/javascripts/lunr/min/lunr.ru.min.js
site/assets/javascripts/lunr/min/lunr.sa.min.js
site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js
site/assets/javascripts/lunr/min/lunr.sv.min.js
site/assets/javascripts/lunr/min/lunr.ta.min.js
site/assets/javascripts/lunr/min/lunr.te.min.js
site/assets/javascripts/lunr/min/lunr.th.min.js
site/assets/javascripts/lunr/min/lunr.tr.min.js
site/assets/javascripts/lunr/min/lunr.vi.min.js
site/assets/javascripts/lunr/min/lunr.zh.min.js
site/assets/javascripts/workers/search.2c215733.min.js
site/assets/javascripts/workers/search.2c215733.min.js.map
site/assets/stylesheets/main.484c7ddc.min.css
site/assets/stylesheets/main.484c7ddc.min.css.map
site/assets/stylesheets/palette.ab4e12ef.min.css
site/assets/stylesheets/palette.ab4e12ef.min.css.map
site/changelog/index.html
site/cli/index.html
site/compatibility/index.html
site/concepts/index.html
site/concepts/architecture/index.html
site/concepts/training-workflow/index.html
site/config-schema/index.html
site/configuration/index.html
site/configuration/scheduling/index.html
site/configuration/train-config/index.html
site/developer/index.html
site/developer/contributing/index.html
site/developer/project-structure/index.html
site/development/index.html
site/faq/index.html
site/getting-started/index.html
site/getting-started/installation/index.html
site/getting-started/quickstart/index.html
site/getting-started/requirements/index.html
site/guide/index.html
site/guide/checkpointing/index.html
site/guide/evaluation/index.html
site/guide/offline-rl/index.html
site/guide/pixel-observations/index.html
site/guide/training/index.html
site/guide/zoo-benchmarks/index.html
site/plans/2026-03-09-a2c-td3-expansion/index.html
site/plans/2026-03-09-eval-resume-workflows/index.html
site/plans/2026-03-09-experiment-manager/index.html
site/plans/2026-03-09-phase1-bootstrap-closure/index.html
site/plans/2026-03-09-phase11-dqn-foundation/index.html
site/plans/2026-03-09-phase12-sac-algorithm/index.html
site/plans/2026-03-09-phase12-sac-training/index.html
site/plans/2026-03-09-rl-package-foundation-design/index.html
site/plans/2026-03-09-rl-package-module-contracts/index.html
site/plans/2026-03-09-rl-package-roadmap-design/index.html
site/plans/2026-03-09-rl-training-package/index.html
site/plans/2026-03-09-tensorboard-logging/index.html
site/plans/2026-03-09-trainer-runtime-shared/index.html
site/plans/2026-03-09-training-callbacks/index.html
site/plans/2026-03-10-c51-dqn/index.html
site/plans/2026-03-10-ddpg/index.html
site/plans/2026-03-10-double-dueling-dqn/index.html
site/plans/2026-03-10-n-step-dqn/index.html
site/plans/2026-03-10-noisy-dqn/index.html
site/plans/2026-03-10-prioritized-dqn/index.html
site/plans/2026-03-10-qr-dqn/index.html
site/plans/2026-03-10-rainbow-dqn/index.html
site/plans/2026-03-11-cql/index.html
site/plans/2026-03-11-iql/index.html
site/plans/2026-03-11-iqn/index.html
site/plans/2026-03-11-redq/index.html
site/plans/2026-03-11-td3-bc/index.html
site/plans/2026-03-11-tqc/index.html
site/plans/2026-03-12-atari-recurrent-ppo-phase1/index.html
site/plans/2026-03-12-awac-online-controls-phase3/index.html
site/plans/2026-03-12-awr-phase14/index.html
site/plans/2026-03-12-bcq-bear-offline-wave-phase5/index.html
site/plans/2026-03-12-bcq-bear-phase5/index.html
site/plans/2026-03-12-calql-phase10/index.html
site/plans/2026-03-12-crr-phase8/index.html
site/plans/2026-03-12-data-rewards-controls-phase2/index.html
site/plans/2026-03-12-drqv2-phase7/index.html
site/plans/2026-03-12-edac-phase12/index.html
site/plans/2026-03-12-her-goal-replay-design/index.html
site/plans/2026-03-12-her-goal-replay-phase4/index.html
site/plans/2026-03-12-mainstream-rl-package-design/index.html
site/plans/2026-03-12-marwil-phase15/index.html
site/plans/2026-03-12-rebrac-phase9/index.html
site/plans/2026-03-12-rl-expansion-roadmap-design/index.html
site/plans/2026-03-12-rl-yearly-sourcebook-design/index.html
site/plans/2026-03-12-rlpd-phase13/index.html
site/plans/2026-03-12-trpo-discrete-sac-crossq-phase6/index.html
site/plans/2026-03-12-xql-phase11/index.html
site/plans/2026-03-13-appo-v1/index.html
site/plans/2026-03-13-ars-v1/index.html
site/plans/2026-03-13-curl-v1/index.html
site/plans/2026-03-13-decision-transformer-v1/index.html
site/plans/2026-03-13-drq-v1/index.html
site/plans/2026-03-13-drqn-v1/index.html
site/plans/2026-03-13-impala-v1/index.html
site/plans/2026-03-13-mopo-v1/index.html
site/plans/2026-03-13-openai-es-v1/index.html
site/plans/2026-03-13-pets-v1/index.html
site/plans/2026-03-13-ppg-v1/index.html
site/plans/2026-03-13-r2d2-v1/index.html
site/plans/2026-03-13-yearly-gap-batch-naf-d4pg/index.html
site/plans/2026-03-16-fqf-mbpo-dreamer-gail/index.html
site/plans/2026-03-17-atari-reward-strategy-defaults/index.html
site/plans/2026-03-17-benchmark-normalization-best-checkpoint/index.html
site/plans/2026-03-17-clip-coef-schedule/index.html
site/plans/2026-03-17-entropy-coef-schedule/index.html
site/plans/2026-03-17-env-mode-overrides-eval-protocol/index.html
site/plans/2026-03-17-evaluation-video-wrapper/index.html
site/plans/2026-03-17-num-simulations-schedule/index.html
site/plans/2026-03-17-reward-strategy-config/index.html
site/plans/2026-03-17-root-exploration-fraction-schedule/index.html
site/plans/2026-03-17-temperature-schedule/index.html
site/plans/2026-03-17-zoo-benchmark-report/index.html
site/plans/2026-03-17-zoo-manifest-config-plumbing/index.html
site/plans/2026-03-18-zoo-baseline-summary/index.html
site/plans/2026-03-18-zoo-fail-on-manifest-drift/index.html
site/plans/2026-03-18-zoo-fail-on-manifest-drift-severity/index.html
site/plans/2026-03-18-zoo-fail-on-manifest-drift-types/index.html
site/plans/2026-03-18-zoo-leaderboard-baseline/index.html
site/plans/2026-03-18-zoo-leaderboard-compare-to/index.html
site/plans/2026-03-18-zoo-leaderboard-confidence/index.html
site/plans/2026-03-18-zoo-leaderboard-metric-modes/index.html
site/plans/2026-03-18-zoo-leaderboard-ranks-and-seed-count/index.html
site/plans/2026-03-18-zoo-leaderboard-robustness/index.html
site/plans/2026-03-18-zoo-leaderboard-score-view/index.html
site/plans/2026-03-18-zoo-leaderboard-stability/index.html
site/plans/2026-03-18-zoo-manifest-drift/index.html
site/plans/2026-03-18-zoo-manifest-drift-severity/index.html
site/plans/2026-03-18-zoo-manifest-fail-reasons/index.html
site/plans/2026-03-18-zoo-manifest-fingerprint/index.html
site/plans/2026-03-18-zoo-manifest-source/index.html
site/plans/2026-03-18-zoo-min-seeds-and-normalized-leaderboard/index.html
site/plans/2026-03-18-zoo-protocol-metadata/index.html
site/plans/2026-03-18-zoo-report-deltas-and-leaderboard/index.html
site/plans/2026-03-18-zoo-report-exports-and-filters/index.html
site/plans/2026-03-18-zoo-report-grouping-and-topk/index.html
site/plans/2026-03-18-zoo-report-output-file/index.html
site/plans/2026-03-19-multi-seed-benchmark-runner/index.html
site/plans/2026-03-19-test-env-registration-unification/index.html
site/plans/2026-03-20-axiomrl-training-engineering-optimization/index.html
site/research/2026-03-09-rl-package-survey/index.html
site/research/2026-03-16-atari-discrete-popular-algorithms-2015-2026/index.html
site/run-artifacts/index.html
site/search/search_index.json
src/axiomrl/__init__.py
src/axiomrl/__main__.py
src/axiomrl/_version.py
src/axiomrl/cli.py
src/axiomrl/cli_config.py
src/axiomrl/cli_doctor.py
src/axiomrl/cli_zoo.py
src/axiomrl/core.py
src/axiomrl/experimental.py
src/axiomrl/resources.py
src/axiomrl/version.py
src/axiomrl/zoo_cli.py
src/axiomrl.egg-info/PKG-INFO
src/axiomrl.egg-info/SOURCES.txt
src/axiomrl.egg-info/dependency_links.txt
src/axiomrl.egg-info/entry_points.txt
src/axiomrl.egg-info/requires.txt
src/axiomrl.egg-info/top_level.txt
src/axiomrl/algorithms/__init__.py
src/axiomrl/algorithms/_advantage_utils.py
src/axiomrl/algorithms/a2c.py
src/axiomrl/algorithms/agent57.py
src/axiomrl/algorithms/appo.py
src/axiomrl/algorithms/ars.py
src/axiomrl/algorithms/awac.py
src/axiomrl/algorithms/awr.py
src/axiomrl/algorithms/base.py
src/axiomrl/algorithms/bc.py
src/axiomrl/algorithms/bcq.py
src/axiomrl/algorithms/bear.py
src/axiomrl/algorithms/c51_dqn.py
src/axiomrl/algorithms/cal_ql.py
src/axiomrl/algorithms/cql.py
src/axiomrl/algorithms/crossq.py
src/axiomrl/algorithms/crr.py
src/axiomrl/algorithms/curl.py
src/axiomrl/algorithms/d4pg.py
src/axiomrl/algorithms/ddpg.py
src/axiomrl/algorithms/decision_transformer.py
src/axiomrl/algorithms/diamond.py
src/axiomrl/algorithms/discrete_sac.py
src/axiomrl/algorithms/dqn.py
src/axiomrl/algorithms/dreamer.py
src/axiomrl/algorithms/dreamerv3.py
src/axiomrl/algorithms/drq.py
src/axiomrl/algorithms/drqn.py
src/axiomrl/algorithms/drqv2.py
src/axiomrl/algorithms/eadream.py
src/axiomrl/algorithms/edac.py
src/axiomrl/algorithms/efficientzero.py
src/axiomrl/algorithms/fqf.py
src/axiomrl/algorithms/gail.py
src/axiomrl/algorithms/gumbel_muzero.py
src/axiomrl/algorithms/her.py
src/axiomrl/algorithms/horizon_imagination.py
src/axiomrl/algorithms/impala.py
src/axiomrl/algorithms/iql.py
src/axiomrl/algorithms/iqn.py
src/axiomrl/algorithms/jowa.py
src/axiomrl/algorithms/marwil.py
src/axiomrl/algorithms/mbpo.py
src/axiomrl/algorithms/mopo.py
src/axiomrl/algorithms/mow.py
src/axiomrl/algorithms/muzero.py
src/axiomrl/algorithms/naf.py
src/axiomrl/algorithms/openai_es.py
src/axiomrl/algorithms/pets.py
src/axiomrl/algorithms/po_dreamer.py
src/axiomrl/algorithms/ppg.py
src/axiomrl/algorithms/ppo.py
src/axiomrl/algorithms/qr_dqn.py
src/axiomrl/algorithms/r2d2.py
src/axiomrl/algorithms/rebrac.py
src/axiomrl/algorithms/redq.py
src/axiomrl/algorithms/rlpd.py
src/axiomrl/algorithms/sac.py
src/axiomrl/algorithms/scalezero.py
src/axiomrl/algorithms/spr.py
src/axiomrl/algorithms/td3.py
src/axiomrl/algorithms/td3_bc.py
src/axiomrl/algorithms/tqc.py
src/axiomrl/algorithms/trpo.py
src/axiomrl/algorithms/twisted.py
src/axiomrl/algorithms/xql.py
src/axiomrl/api/__init__.py
src/axiomrl/api/algorithms.py
src/axiomrl/assets/configs/a2c/breakout_atari.yaml
src/axiomrl/assets/configs/a2c/cartpole.yaml
src/axiomrl/assets/configs/advantage_learning_dqn/cartpole.yaml
src/axiomrl/assets/configs/agent57/breakout_atari.yaml
src/axiomrl/assets/configs/agent57/tennis_atari.yaml
src/axiomrl/assets/configs/apex_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_atari.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_offense.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_offense_v2.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_offense_v3.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_offense_v4.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_offense_v5.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_offense_v5_1.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_shaped.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_v2.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_v3.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_v4.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_v5.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_event_v5_1.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_explore_tuned.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_reward_lite.yaml
src/axiomrl/assets/configs/apex_dqn/tennis_stable_lr.yaml
src/axiomrl/assets/configs/appo/cartpole.yaml
src/axiomrl/assets/configs/ars/pendulum.yaml
src/axiomrl/assets/configs/awac/pendulum.yaml
src/axiomrl/assets/configs/awr/pendulum.yaml
src/axiomrl/assets/configs/bc/pendulum.yaml
src/axiomrl/assets/configs/bcq/pendulum.yaml
src/axiomrl/assets/configs/bear/pendulum.yaml
src/axiomrl/assets/configs/boltzmann_double_dqn/cartpole.yaml
src/axiomrl/assets/configs/boltzmann_dqn/cartpole.yaml
src/axiomrl/assets/configs/c51_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/c51_dqn/cartpole.yaml
src/axiomrl/assets/configs/cal_ql/pendulum.yaml
src/axiomrl/assets/configs/clipped_double_dqn/cartpole.yaml
src/axiomrl/assets/configs/cql/pendulum.yaml
src/axiomrl/assets/configs/cql_double_dqn/cartpole.yaml
src/axiomrl/assets/configs/cql_dqn/cartpole.yaml
src/axiomrl/assets/configs/crossq/pendulum.yaml
src/axiomrl/assets/configs/crr/pendulum.yaml
src/axiomrl/assets/configs/curl/pendulum_pixels.yaml
src/axiomrl/assets/configs/d4pg/pendulum.yaml
src/axiomrl/assets/configs/ddpg/pendulum.yaml
src/axiomrl/assets/configs/decision_transformer/pendulum.yaml
src/axiomrl/assets/configs/diamond/breakout_atari.yaml
src/axiomrl/assets/configs/discrete_sac/cartpole.yaml
src/axiomrl/assets/configs/double_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/double_dqn/cartpole.yaml
src/axiomrl/assets/configs/dqn/breakout_atari.yaml
src/axiomrl/assets/configs/dqn/cartpole.yaml
src/axiomrl/assets/configs/dreamer/cartpole_pixels.yaml
src/axiomrl/assets/configs/dreamerv3/breakout_atari.yaml
src/axiomrl/assets/configs/drq/pendulum_pixels.yaml
src/axiomrl/assets/configs/drqn/cartpole.yaml
src/axiomrl/assets/configs/drqv2/pendulum_pixels.yaml
src/axiomrl/assets/configs/dueling_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/dueling_dqn/cartpole.yaml
src/axiomrl/assets/configs/eadream/breakout_atari.yaml
src/axiomrl/assets/configs/edac/pendulum.yaml
src/axiomrl/assets/configs/efficientzero/breakout_atari.yaml
src/axiomrl/assets/configs/efficientzero/tennis_atari.yaml
src/axiomrl/assets/configs/expected_double_dqn/cartpole.yaml
src/axiomrl/assets/configs/expected_sarsa/cartpole.yaml
src/axiomrl/assets/configs/fqf/breakout_atari.yaml
src/axiomrl/assets/configs/fqf/cartpole.yaml
src/axiomrl/assets/configs/gail/cartpole.yaml
src/axiomrl/assets/configs/gumbel_muzero/breakout_atari.yaml
src/axiomrl/assets/configs/her/point_goal.yaml
src/axiomrl/assets/configs/horizon_imagination/breakout_atari.yaml
src/axiomrl/assets/configs/hysteretic_dqn/cartpole.yaml
src/axiomrl/assets/configs/impala/breakout_atari.yaml
src/axiomrl/assets/configs/impala/cartpole.yaml
src/axiomrl/assets/configs/impala/tennis_atari.yaml
src/axiomrl/assets/configs/iql/pendulum.yaml
src/axiomrl/assets/configs/iqn/breakout_atari.yaml
src/axiomrl/assets/configs/iqn/cartpole.yaml
src/axiomrl/assets/configs/jowa/breakout_atari.yaml
src/axiomrl/assets/configs/marwil/pendulum.yaml
src/axiomrl/assets/configs/mbpo/pendulum.yaml
src/axiomrl/assets/configs/mellowmax_dqn/cartpole.yaml
src/axiomrl/assets/configs/mopo/pendulum.yaml
src/axiomrl/assets/configs/mow/breakout_atari.yaml
src/axiomrl/assets/configs/munchausen_double_dqn/cartpole.yaml
src/axiomrl/assets/configs/munchausen_dqn/cartpole.yaml
src/axiomrl/assets/configs/muzero/breakout_atari.yaml
src/axiomrl/assets/configs/n_step_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/n_step_dqn/cartpole.yaml
src/axiomrl/assets/configs/naf/pendulum.yaml
src/axiomrl/assets/configs/noisy_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/noisy_dqn/cartpole.yaml
src/axiomrl/assets/configs/openai_es/pendulum.yaml
src/axiomrl/assets/configs/persistent_advantage_learning_dqn/cartpole.yaml
src/axiomrl/assets/configs/pets/pendulum.yaml
src/axiomrl/assets/configs/po_dreamer/breakout_atari.yaml
src/axiomrl/assets/configs/ppg/breakout_atari.yaml
src/axiomrl/assets/configs/ppg/cartpole.yaml
src/axiomrl/assets/configs/ppo/breakout_atari.yaml
src/axiomrl/assets/configs/ppo/cartpole.yaml
src/axiomrl/assets/configs/ppo/tennis_atari.yaml
src/axiomrl/assets/configs/prioritized_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/prioritized_dqn/cartpole.yaml
src/axiomrl/assets/configs/qr_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/qr_dqn/cartpole.yaml
src/axiomrl/assets/configs/r2d2/breakout_atari.yaml
src/axiomrl/assets/configs/r2d2/cartpole.yaml
src/axiomrl/assets/configs/r2d2/tennis_atari.yaml
src/axiomrl/assets/configs/rainbow_dqn/breakout_atari.yaml
src/axiomrl/assets/configs/rainbow_dqn/cartpole.yaml
src/axiomrl/assets/configs/rainbow_dqn/tennis_atari.yaml
src/axiomrl/assets/configs/rainbow_dqn/tennis_event_offense.yaml
src/axiomrl/assets/configs/rainbow_dqn/tennis_event_shaped.yaml
src/axiomrl/assets/configs/rainbow_dqn/tennis_event_v2.yaml
src/axiomrl/assets/configs/rainbow_dqn/tennis_no_early_stop.yaml
src/axiomrl/assets/configs/rainbow_dqn/tennis_reward_lite.yaml
src/axiomrl/assets/configs/rainbow_dqn/tennis_stable_lr.yaml
src/axiomrl/assets/configs/rebrac/pendulum.yaml
src/axiomrl/assets/configs/recurrent_ppo/breakout_atari.yaml
src/axiomrl/assets/configs/redq/pendulum.yaml
src/axiomrl/assets/configs/rlpd/pendulum.yaml
src/axiomrl/assets/configs/sac/pendulum.yaml
src/axiomrl/assets/configs/scalezero/breakout_atari.yaml
src/axiomrl/assets/configs/soft_double_dqn/cartpole.yaml
src/axiomrl/assets/configs/soft_dqn/cartpole.yaml
src/axiomrl/assets/configs/spr/breakout_atari.yaml
src/axiomrl/assets/configs/td3/pendulum.yaml
src/axiomrl/assets/configs/td3_bc/pendulum.yaml
src/axiomrl/assets/configs/tqc/pendulum.yaml
src/axiomrl/assets/configs/trpo/cartpole.yaml
src/axiomrl/assets/configs/twisted/breakout_atari.yaml
src/axiomrl/assets/configs/xql/pendulum.yaml
src/axiomrl/assets/zoo/README.md
src/axiomrl/assets/zoo/atari/a2c_breakout.yaml
src/axiomrl/assets/zoo/atari/agent57_breakout.yaml
src/axiomrl/assets/zoo/atari/agent57_tennis.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_offense.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_offense_v2.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_offense_v3.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_offense_v4.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_offense_v5.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_offense_v5_1.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_shaped.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_v2.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_v3.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_v4.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_v5.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_event_v5_1.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_explore_tuned.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_reward_lite.yaml
src/axiomrl/assets/zoo/atari/apex_dqn_tennis_stable_lr.yaml
src/axiomrl/assets/zoo/atari/benchmark.yaml
src/axiomrl/assets/zoo/atari/c51_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/diamond_breakout.yaml
src/axiomrl/assets/zoo/atari/double_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/dreamerv3_breakout.yaml
src/axiomrl/assets/zoo/atari/dueling_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/eadream_breakout.yaml
src/axiomrl/assets/zoo/atari/efficientzero_breakout.yaml
src/axiomrl/assets/zoo/atari/efficientzero_tennis.yaml
src/axiomrl/assets/zoo/atari/fqf_breakout.yaml
src/axiomrl/assets/zoo/atari/gumbel_muzero_breakout.yaml
src/axiomrl/assets/zoo/atari/horizon_imagination_breakout.yaml
src/axiomrl/assets/zoo/atari/impala_breakout.yaml
src/axiomrl/assets/zoo/atari/iqn_breakout.yaml
src/axiomrl/assets/zoo/atari/jowa_breakout.yaml
src/axiomrl/assets/zoo/atari/mow_breakout.yaml
src/axiomrl/assets/zoo/atari/muzero_breakout.yaml
src/axiomrl/assets/zoo/atari/n_step_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/noisy_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/po_dreamer_breakout.yaml
src/axiomrl/assets/zoo/atari/ppg_breakout.yaml
src/axiomrl/assets/zoo/atari/ppo_breakout.yaml
src/axiomrl/assets/zoo/atari/prioritized_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/qr_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/r2d2_breakout.yaml
src/axiomrl/assets/zoo/atari/r2d2_tennis.yaml
src/axiomrl/assets/zoo/atari/rainbow_dqn_breakout.yaml
src/axiomrl/assets/zoo/atari/rainbow_dqn_tennis.yaml
src/axiomrl/assets/zoo/atari/rainbow_dqn_tennis_event_offense.yaml
src/axiomrl/assets/zoo/atari/rainbow_dqn_tennis_event_shaped.yaml
src/axiomrl/assets/zoo/atari/rainbow_dqn_tennis_event_v2.yaml
src/axiomrl/assets/zoo/atari/rainbow_dqn_tennis_no_early_stop.yaml
src/axiomrl/assets/zoo/atari/rainbow_dqn_tennis_reward_lite.yaml
src/axiomrl/assets/zoo/atari/rainbow_dqn_tennis_stable_lr.yaml
src/axiomrl/assets/zoo/atari/recurrent_ppo_breakout.yaml
src/axiomrl/assets/zoo/atari/scalezero_breakout.yaml
src/axiomrl/assets/zoo/atari/spr_breakout.yaml
src/axiomrl/assets/zoo/atari/tennis_benchmark.yaml
src/axiomrl/assets/zoo/atari/tennis_focus.yaml
src/axiomrl/assets/zoo/atari/tennis_focus_v2.yaml
src/axiomrl/assets/zoo/atari/tennis_offense_focus.yaml
src/axiomrl/assets/zoo/atari/tennis_tuning_stage1.yaml
src/axiomrl/assets/zoo/atari/twisted_breakout.yaml
src/axiomrl/contrib/__init__.py
src/axiomrl/contrib/api.py
src/axiomrl/contrib/recurrent_ppo.py
src/axiomrl/data/__init__.py
src/axiomrl/data/dataset_loaders.py
src/axiomrl/data/her_replay_buffer.py
src/axiomrl/data/muzero_replay_buffer.py
src/axiomrl/data/n_step.py
src/axiomrl/data/offline_dataset.py
src/axiomrl/data/offline_mixers.py
src/axiomrl/data/prioritized_recurrent_replay_buffer.py
src/axiomrl/data/prioritized_replay_buffer.py
src/axiomrl/data/recurrent_replay_buffer.py
src/axiomrl/data/recurrent_rollout_buffer.py
src/axiomrl/data/replay_buffer.py
src/axiomrl/data/rollout_buffer.py
src/axiomrl/data/rollout_export.py
src/axiomrl/data/running_mean_std.py
src/axiomrl/data/trajectory_windows.py
src/axiomrl/envs/__init__.py
src/axiomrl/envs/atari.py
src/axiomrl/envs/factory.py
src/axiomrl/envs/goals.py
src/axiomrl/envs/pixels.py
src/axiomrl/envs/rewards.py
src/axiomrl/envs/tennis_events.py
src/axiomrl/envs/video.py
src/axiomrl/examples/__init__.py
src/axiomrl/examples/dqn_breakout_atari_reference.py
src/axiomrl/examples/ppo_breakout_atari_reference.py
src/axiomrl/examples/recurrent_ppo_breakout_atari_reference.py
src/axiomrl/experiment/__init__.py
src/axiomrl/experiment/benchmarking.py
src/axiomrl/experiment/checkpointing.py
src/axiomrl/experiment/config.py
src/axiomrl/experiment/default_manager.py
src/axiomrl/experiment/logging.py
src/axiomrl/experiment/manager.py
src/axiomrl/experiment/registry.py
src/axiomrl/experiment/registry_actor_critic_specs.py
src/axiomrl/experiment/registry_continuous_loaders.py
src/axiomrl/experiment/registry_core.py
src/axiomrl/experiment/registry_dqn_loaders.py
src/axiomrl/experiment/registry_evaluators.py
src/axiomrl/experiment/registry_offline_loaders.py
src/axiomrl/experiment/registry_offline_specs.py
src/axiomrl/experiment/registry_on_policy_specs.py
src/axiomrl/experiment/registry_policy_loaders.py
src/axiomrl/experiment/registry_predictors.py
src/axiomrl/experiment/registry_recurrent_loaders.py
src/axiomrl/experiment/registry_specialized_loaders.py
src/axiomrl/experiment/registry_support.py
src/axiomrl/experiment/registry_types.py
src/axiomrl/experiment/registry_value_based_specs.py
src/axiomrl/experiment/registry_world_model_specs.py
src/axiomrl/experiment/runs.py
src/axiomrl/experiment/sweeps.py
src/axiomrl/experiment/registry_providers/__init__.py
src/axiomrl/experiment/registry_providers/actor_critic.py
src/axiomrl/experiment/registry_providers/contrib.py
src/axiomrl/experiment/registry_providers/goal_conditioned.py
src/axiomrl/experiment/registry_providers/offline.py
src/axiomrl/experiment/registry_providers/on_policy.py
src/axiomrl/experiment/registry_providers/value_based.py
src/axiomrl/experiment/registry_providers/world_model.py
src/axiomrl/models/__init__.py
src/axiomrl/models/decision_transformer.py
src/axiomrl/models/dreamer.py
src/axiomrl/models/eadream.py
src/axiomrl/models/mlp_actor_critic.py
src/axiomrl/models/mlp_ars.py
src/axiomrl/models/mlp_bc.py
src/axiomrl/models/mlp_bcq.py
src/axiomrl/models/mlp_bear.py
src/axiomrl/models/mlp_c51_q_network.py
src/axiomrl/models/mlp_crossq.py
src/axiomrl/models/mlp_d4pg.py
src/axiomrl/models/mlp_ddpg.py
src/axiomrl/models/mlp_discrete_sac.py
src/axiomrl/models/mlp_dueling_noisy_q_network.py
src/axiomrl/models/mlp_dueling_q_network.py
src/axiomrl/models/mlp_fqf_network.py
src/axiomrl/models/mlp_gail_discriminator.py
src/axiomrl/models/mlp_iql.py
src/axiomrl/models/mlp_iqn_network.py
src/axiomrl/models/mlp_mopo.py
src/axiomrl/models/mlp_naf.py
src/axiomrl/models/mlp_noisy_q_network.py
src/axiomrl/models/mlp_ppg.py
src/axiomrl/models/mlp_q_network.py
src/axiomrl/models/mlp_qr_q_network.py
src/axiomrl/models/mlp_redq.py
src/axiomrl/models/mlp_sac.py
src/axiomrl/models/mlp_td3.py
src/axiomrl/models/mlp_tqc.py
src/axiomrl/models/mow.py
src/axiomrl/models/muzero.py
src/axiomrl/models/po_dreamer.py
src/axiomrl/models/rnd.py
src/axiomrl/models/scalezero.py
src/axiomrl/models/cnn/__init__.py
src/axiomrl/models/cnn/actor_critic.py
src/axiomrl/models/cnn/c51_q_network.py
src/axiomrl/models/cnn/curl.py
src/axiomrl/models/cnn/drq.py
src/axiomrl/models/cnn/drqv2.py
src/axiomrl/models/cnn/dueling_noisy_q_network.py
src/axiomrl/models/cnn/dueling_q_network.py
src/axiomrl/models/cnn/fqf_network.py
src/axiomrl/models/cnn/iqn_network.py
src/axiomrl/models/cnn/jowa_q_network.py
src/axiomrl/models/cnn/nature.py
src/axiomrl/models/cnn/noisy_q_network.py
src/axiomrl/models/cnn/ppg.py
src/axiomrl/models/cnn/q_network.py
src/axiomrl/models/cnn/qr_q_network.py
src/axiomrl/models/cnn/spr_q_network.py
src/axiomrl/models/recurrent/__init__.py
src/axiomrl/models/recurrent/lstm_actor_critic.py
src/axiomrl/models/recurrent/lstm_q_network.py
src/axiomrl/policies/__init__.py
src/axiomrl/policies/base.py
src/axiomrl/runtime/__init__.py
src/axiomrl/runtime/a2c_trainer.py
src/axiomrl/runtime/agent57_trainer.py
src/axiomrl/runtime/apex_dqn_trainer.py
src/axiomrl/runtime/appo_trainer.py
src/axiomrl/runtime/ars_trainer.py
src/axiomrl/runtime/awac_trainer.py
src/axiomrl/runtime/awr_trainer.py
src/axiomrl/runtime/bc_trainer.py
src/axiomrl/runtime/bcq_trainer.py
src/axiomrl/runtime/bear_trainer.py
src/axiomrl/runtime/cal_ql_trainer.py
src/axiomrl/runtime/callbacks.py
src/axiomrl/runtime/collector.py
src/axiomrl/runtime/controls.py
src/axiomrl/runtime/cql_trainer.py
src/axiomrl/runtime/crossq_trainer.py
src/axiomrl/runtime/crr_trainer.py
src/axiomrl/runtime/curl_trainer.py
src/axiomrl/runtime/d4pg_trainer.py
src/axiomrl/runtime/ddpg_trainer.py
src/axiomrl/runtime/decision_transformer_trainer.py
src/axiomrl/runtime/discrete_sac_trainer.py
src/axiomrl/runtime/dqn_trainer.py
src/axiomrl/runtime/dreamer_trainer.py
src/axiomrl/runtime/drq_trainer.py
src/axiomrl/runtime/drqn_trainer.py
src/axiomrl/runtime/drqv2_trainer.py
src/axiomrl/runtime/edac_trainer.py
src/axiomrl/runtime/efficientzero_trainer.py
src/axiomrl/runtime/evaluation_runner.py
src/axiomrl/runtime/evaluation_support.py
src/axiomrl/runtime/evaluator.py
src/axiomrl/runtime/gail_trainer.py
src/axiomrl/runtime/her_trainer.py
src/axiomrl/runtime/impala_trainer.py
src/axiomrl/runtime/iql_trainer.py
src/axiomrl/runtime/marwil_trainer.py
src/axiomrl/runtime/mbpo_trainer.py
src/axiomrl/runtime/mopo_trainer.py
src/axiomrl/runtime/muzero_trainer.py
src/axiomrl/runtime/naf_trainer.py
src/axiomrl/runtime/off_policy_trainer_utils.py
src/axiomrl/runtime/openai_es_trainer.py
src/axiomrl/runtime/pets_trainer.py
src/axiomrl/runtime/ppg_trainer.py
src/axiomrl/runtime/ppo_trainer.py
src/axiomrl/runtime/r2d2_trainer.py
src/axiomrl/runtime/rebrac_trainer.py
src/axiomrl/runtime/recurrent_ppo_trainer.py
src/axiomrl/runtime/redq_trainer.py
src/axiomrl/runtime/resume_state.py
src/axiomrl/runtime/rlpd_trainer.py
src/axiomrl/runtime/run_utils.py
src/axiomrl/runtime/runner.py
src/axiomrl/runtime/sac_trainer.py
src/axiomrl/runtime/schedules.py
src/axiomrl/runtime/session.py
src/axiomrl/runtime/td3_bc_trainer.py
src/axiomrl/runtime/td3_trainer.py
src/axiomrl/runtime/tqc_trainer.py
src/axiomrl/runtime/trainer.py
src/axiomrl/runtime/trpo_trainer.py
src/axiomrl/runtime/types.py
src/axiomrl/runtime/vector_envs.py
src/axiomrl/runtime/workflows.py
src/axiomrl/runtime/xql_trainer.py
src/axiomrl/tuning/__init__.py
src/axiomrl/tuning/config.py
src/axiomrl/tuning/optuna_backend.py
src/axiomrl/tuning/study.py
src/axiomrl/zoo/__init__.py
src/axiomrl/zoo/app.py
src/axiomrl/zoo/core.py
src/axiomrl/zoo/leaderboard.py
src/axiomrl/zoo/manifests.py
src/axiomrl/zoo/reporting.py
src/axiomrl/zoo/reporting_render.py
src/axiomrl/zoo/reporting_runs.py
src/axiomrl/zoo/reporting_stats.py
tests/__init__.py
tests/conftest.py
tests/test_a2c_reference_script.py
tests/test_a2c_trainer_smoke.py
tests/test_a2c_update.py
tests/test_agent57_trainer_smoke.py
tests/test_algorithm_registry_contracts.py
tests/test_apex_dqn_trainer_smoke.py
tests/test_appo_reference_script.py
tests/test_appo_trainer_smoke.py
tests/test_appo_update.py
tests/test_ars_reference_script.py
tests/test_ars_trainer_smoke.py
tests/test_ars_update.py
tests/test_atari_dqn_trainer_smoke.py
tests/test_atari_envs.py
tests/test_atari_onpolicy_pixel_trainers_smoke.py
tests/test_atari_ppo_trainer_smoke.py
tests/test_atari_reference_scripts.py
tests/test_awac_trainer_smoke.py
tests/test_awac_update.py
tests/test_awr_trainer_smoke.py
tests/test_awr_update.py
tests/test_bc_trainer_smoke.py
tests/test_bc_update.py
tests/test_bcq_trainer_smoke.py
tests/test_bcq_update.py
tests/test_bear_trainer_smoke.py
tests/test_bear_update.py
tests/test_benchmarking.py
tests/test_c51_dqn_reference_script.py
tests/test_c51_dqn_update.py
tests/test_cal_ql_trainer_smoke.py
tests/test_cal_ql_update.py
tests/test_callbacks.py
tests/test_checkpoint_evaluate.py
tests/test_checkpoint_resume.py
tests/test_cli_config.py
tests/test_cli_workflows.py
tests/test_cli_zoo_leaderboard.py
tests/test_cli_zoo_report.py
tests/test_cql_reference_script.py
tests/test_cql_trainer_smoke.py
tests/test_cql_update.py
tests/test_crossq_trainer_smoke.py
tests/test_crossq_update.py
tests/test_crr_trainer_smoke.py
tests/test_crr_update.py
tests/test_curl_reference_script.py
tests/test_curl_trainer_smoke.py
tests/test_curl_update.py
tests/test_d4pg_reference_script.py
tests/test_d4pg_trainer_smoke.py
tests/test_d4pg_update.py
tests/test_dataset_loaders.py
tests/test_ddpg_reference_script.py
tests/test_ddpg_trainer_smoke.py
tests/test_ddpg_update.py
tests/test_decision_transformer_reference_script.py
tests/test_decision_transformer_trainer_smoke.py
tests/test_decision_transformer_update.py
tests/test_diamond_trainer_smoke.py
tests/test_discrete_sac_trainer_smoke.py
tests/test_discrete_sac_update.py
tests/test_doctor_cli.py
tests/test_double_dqn_reference_script.py
tests/test_dqn_reference_script.py
tests/test_dqn_trainer_image_observations.py
tests/test_dqn_trainer_smoke.py
tests/test_dqn_update.py
tests/test_dreamer_reference_script.py
tests/test_dreamer_trainer_smoke.py
tests/test_dreamerv3_trainer_smoke.py
tests/test_drq_reference_script.py
tests/test_drq_trainer_smoke.py
tests/test_drq_update.py
tests/test_drqn_reference_script.py
tests/test_drqn_trainer_smoke.py
tests/test_drqn_update.py
tests/test_drqv2_trainer_smoke.py
tests/test_drqv2_update.py
tests/test_dueling_dqn_reference_script.py
tests/test_eadream_trainer_smoke.py
tests/test_edac_trainer_smoke.py
tests/test_edac_update.py
tests/test_efficientzero_trainer_smoke.py
tests/test_envs.py
tests/test_evaluation_runner.py
tests/test_experiment_contracts.py
tests/test_experiment_manager_workflows.py
tests/test_fqf_reference_script.py
tests/test_fqf_trainer_smoke.py
tests/test_fqf_update.py
tests/test_gail_reference_script.py
tests/test_gail_trainer_smoke.py
tests/test_goal_envs.py
tests/test_gumbel_muzero_trainer_smoke.py
tests/test_her_replay_buffer.py
tests/test_her_trainer_smoke.py
tests/test_horizon_imagination_trainer_smoke.py
tests/test_impala_reference_script.py
tests/test_impala_trainer_smoke.py
tests/test_impala_update.py
tests/test_iql_npz_reference_script.py
tests/test_iql_reference_script.py
tests/test_iql_trainer_smoke.py
tests/test_iql_update.py
tests/test_iqn_reference_script.py
tests/test_iqn_update.py
tests/test_jowa_trainer_smoke.py
tests/test_marwil_trainer_smoke.py
tests/test_marwil_update.py
tests/test_mbpo_reference_script.py
tests/test_mbpo_trainer_smoke.py
tests/test_mbpo_update.py
tests/test_module_contracts.py
tests/test_mopo_dynamics_model.py
tests/test_mopo_reference_script.py
tests/test_mopo_trainer_smoke.py
tests/test_mopo_update.py
tests/test_mow_trainer_smoke.py
tests/test_muzero_trainer_smoke.py
tests/test_n_step_accumulator.py
tests/test_n_step_dqn_reference_script.py
tests/test_naf_reference_script.py
tests/test_naf_trainer_smoke.py
tests/test_naf_update.py
tests/test_nature_cnn.py
tests/test_noisy_dqn_reference_script.py
tests/test_offline_dataset.py
tests/test_openai_es_reference_script.py
tests/test_openai_es_trainer_smoke.py
tests/test_openai_es_update.py
tests/test_package_api_exports.py
tests/test_package_namespace.py
tests/test_package_smoke.py
tests/test_pets_reference_script.py
tests/test_pets_trainer_smoke.py
tests/test_pets_update.py
tests/test_po_dreamer_trainer_smoke.py
tests/test_ppg_reference_script.py
tests/test_ppg_trainer_smoke.py
tests/test_ppg_update.py
tests/test_ppo_update.py
tests/test_prioritized_dqn_reference_script.py
tests/test_prioritized_recurrent_replay_buffer.py
tests/test_prioritized_replay_buffer.py
tests/test_public_api_continuous_control.py
tests/test_public_api_off_policy_suite.py
tests/test_public_api_policy_gradient.py
tests/test_public_api_visual_control.py
tests/test_qr_dqn_reference_script.py
tests/test_qr_dqn_update.py
tests/test_r2d2_pixel_trainer_smoke.py
tests/test_r2d2_reference_script.py
tests/test_r2d2_trainer_smoke.py
tests/test_r2d2_update.py
tests/test_rainbow_dqn_reference_script.py
tests/test_real_end_to_end_workflows.py
tests/test_rebrac_trainer_smoke.py
tests/test_rebrac_update.py
tests/test_recurrent_models.py
tests/test_recurrent_ppo_reference_script.py
tests/test_recurrent_ppo_trainer_smoke.py
tests/test_recurrent_ppo_update.py
tests/test_recurrent_replay_buffer.py
tests/test_recurrent_rollout_buffer.py
tests/test_redq_reference_script.py
tests/test_redq_trainer_smoke.py
tests/test_redq_update.py
tests/test_reference_script.py
tests/test_registry_internal_split.py
tests/test_registry_providers.py
tests/test_release_contracts.py
tests/test_replay_buffer.py
tests/test_reward_wrappers.py
tests/test_rlpd_trainer_smoke.py
tests/test_rlpd_update.py
tests/test_rollout_buffer.py
tests/test_rollout_dataset_export.py
tests/test_run_utils.py
tests/test_runner.py
tests/test_running_mean_std.py
tests/test_runtime_evaluation_support_integration.py
tests/test_runtime_training_session_integration.py
tests/test_sac_reference_script.py
tests/test_sac_trainer_smoke.py
tests/test_sac_update.py
tests/test_scalezero_trainer_smoke.py
tests/test_schedules.py
tests/test_spr_trainer_smoke.py
tests/test_sweeps.py
tests/test_td3_bc_reference_script.py
tests/test_td3_bc_trainer_smoke.py
tests/test_td3_bc_update.py
tests/test_td3_reference_script.py
tests/test_td3_trainer_smoke.py
tests/test_td3_update.py
tests/test_test_markers.py
tests/test_tqc_reference_script.py
tests/test_tqc_trainer_smoke.py
tests/test_tqc_update.py
tests/test_trainer_smoke.py
tests/test_training_controls.py
tests/test_trajectory_window_dataset.py
tests/test_trpo_trainer_smoke.py
tests/test_trpo_update.py
tests/test_tuning_cli.py
tests/test_tuning_config.py
tests/test_tuning_optuna.py
tests/test_tuning_study.py
tests/test_twisted_trainer_smoke.py
tests/test_xql_trainer_smoke.py
tests/test_xql_update.py
tests/test_zoo_modules.py
tests/test_zoo_presets.py
tests/test_zoo_reporting_split.py
tests/support/__init__.py
tests/support/checkpoint_workflows.py
tests/support/envs.py
tests/support/markers.py
tests/support/public_api.py
tests/support/runtime_foundation.py
zoo/README.md
zoo/atari/a2c_breakout.yaml
zoo/atari/agent57_breakout.yaml
zoo/atari/agent57_tennis.yaml
zoo/atari/apex_dqn_breakout.yaml
zoo/atari/apex_dqn_tennis.yaml
zoo/atari/apex_dqn_tennis_event_offense.yaml
zoo/atari/apex_dqn_tennis_event_offense_v2.yaml
zoo/atari/apex_dqn_tennis_event_offense_v3.yaml
zoo/atari/apex_dqn_tennis_event_offense_v4.yaml
zoo/atari/apex_dqn_tennis_event_offense_v5.yaml
zoo/atari/apex_dqn_tennis_event_offense_v5_1.yaml
zoo/atari/apex_dqn_tennis_event_shaped.yaml
zoo/atari/apex_dqn_tennis_event_v2.yaml
zoo/atari/apex_dqn_tennis_event_v3.yaml
zoo/atari/apex_dqn_tennis_event_v4.yaml
zoo/atari/apex_dqn_tennis_event_v5.yaml
zoo/atari/apex_dqn_tennis_event_v5_1.yaml
zoo/atari/apex_dqn_tennis_explore_tuned.yaml
zoo/atari/apex_dqn_tennis_reward_lite.yaml
zoo/atari/apex_dqn_tennis_stable_lr.yaml
zoo/atari/benchmark.yaml
zoo/atari/c51_dqn_breakout.yaml
zoo/atari/diamond_breakout.yaml
zoo/atari/double_dqn_breakout.yaml
zoo/atari/dqn_breakout.yaml
zoo/atari/dreamerv3_breakout.yaml
zoo/atari/dueling_dqn_breakout.yaml
zoo/atari/eadream_breakout.yaml
zoo/atari/efficientzero_breakout.yaml
zoo/atari/efficientzero_tennis.yaml
zoo/atari/fqf_breakout.yaml
zoo/atari/gumbel_muzero_breakout.yaml
zoo/atari/horizon_imagination_breakout.yaml
zoo/atari/impala_breakout.yaml
zoo/atari/iqn_breakout.yaml
zoo/atari/jowa_breakout.yaml
zoo/atari/mow_breakout.yaml
zoo/atari/muzero_breakout.yaml
zoo/atari/n_step_dqn_breakout.yaml
zoo/atari/noisy_dqn_breakout.yaml
zoo/atari/po_dreamer_breakout.yaml
zoo/atari/ppg_breakout.yaml
zoo/atari/ppo_breakout.yaml
zoo/atari/prioritized_dqn_breakout.yaml
zoo/atari/qr_dqn_breakout.yaml
zoo/atari/r2d2_breakout.yaml
zoo/atari/r2d2_tennis.yaml
zoo/atari/rainbow_dqn_breakout.yaml
zoo/atari/rainbow_dqn_tennis.yaml
zoo/atari/rainbow_dqn_tennis_event_offense.yaml
zoo/atari/rainbow_dqn_tennis_event_shaped.yaml
zoo/atari/rainbow_dqn_tennis_event_v2.yaml
zoo/atari/rainbow_dqn_tennis_no_early_stop.yaml
zoo/atari/rainbow_dqn_tennis_reward_lite.yaml
zoo/atari/rainbow_dqn_tennis_stable_lr.yaml
zoo/atari/recurrent_ppo_breakout.yaml
zoo/atari/scalezero_breakout.yaml
zoo/atari/spr_breakout.yaml
zoo/atari/tennis_benchmark.yaml
zoo/atari/tennis_focus.yaml
zoo/atari/tennis_focus_v2.yaml
zoo/atari/tennis_offense_focus.yaml
zoo/atari/tennis_tuning_stage1.yaml
zoo/atari/twisted_breakout.yaml