LICENSE
README.md
pyproject.toml
goodhart/__init__.py
goodhart/__main__.py
goodhart/annotate.py
goodhart/builders.py
goodhart/cli.py
goodhart/detect.py
goodhart/engine.py
goodhart/fmt.py
goodhart/mcp_server.py
goodhart/models.py
goodhart/presets.py
goodhart/py.typed
goodhart/viz.py
goodhart.egg-info/PKG-INFO
goodhart.egg-info/SOURCES.txt
goodhart.egg-info/dependency_links.txt
goodhart.egg-info/entry_points.txt
goodhart.egg-info/requires.txt
goodhart.egg-info/top_level.txt
goodhart/examples/__init__.py
goodhart/examples/annotated_reward.py
goodhart/examples/ant_v4_gymnasium.py
goodhart/examples/atari_exploits.py
goodhart/examples/bicycle_circles.py
goodhart/examples/block_stacking.py
goodhart/examples/breakout_tunnel.py
goodhart/examples/cartpole_suicide.py
goodhart/examples/cartpole_variants.py
goodhart/examples/coast_runners.py
goodhart/examples/coinrun_misgeneralization.py
goodhart/examples/datacenter_cooling.py
goodhart/examples/dmc_dog.py
goodhart/examples/dota2_openai_five.py
goodhart/examples/driving_safety.py
goodhart/examples/evolution_exploits.py
goodhart/examples/expert_collapse.py
goodhart/examples/fetch_reach.py
goodhart/examples/football_checkpoints.py
goodhart/examples/frozenlake_sparse.py
goodhart/examples/habitat_pointnav.py
goodhart/examples/hide_and_seek.py
goodhart/examples/humanoid_idle.py
goodhart/examples/isaac_gym_ant.py
goodhart/examples/krakovna_boat_race.py
goodhart/examples/legged_gym_rewards.py
goodhart/examples/lunar_lander.py
goodhart/examples/maddpg_cooperative.py
goodhart/examples/minigrid_doorkey.py
goodhart/examples/montezuma_credit.py
goodhart/examples/mujoco_locomotion.py
goodhart/examples/multiroom_traps.py
goodhart/examples/nethack_deep_sparse.py
goodhart/examples/pbrs_vs_naive.py
goodhart/examples/pendulum_welldesigned.py
goodhart/examples/pettingzoo_adversarial.py
goodhart/examples/ppo_37_details.py
goodhart/examples/procgen_starpilot.py
goodhart/examples/reward_is_enough.py
goodhart/examples/reward_tampering.py
goodhart/examples/rlhf_reward_model.py
goodhart/examples/rnd_intrinsic.py
goodhart/examples/road_runner_replay.py
goodhart/examples/robosuite_staged.py
goodhart/examples/robotics_exploits.py
goodhart/examples/safety_constrained.py
goodhart/examples/safety_gym.py
goodhart/examples/safetygym_constrained.py
goodhart/examples/self_play_nonstationarity.py
goodhart/examples/sepsis_treatment.py
goodhart/examples/sharpe_idle.py
goodhart/examples/smac_micromanagement.py
goodhart/examples/sparse_reward_traps.py
goodhart/examples/taxi_penalty.py
goodhart/examples/tic_tac_toe_crash.py
goodhart/examples/tokamak_plasma.py
goodhart/examples/webgpt_learned_reward.py
goodhart/examples/youtube_watchtime.py
goodhart/rules/__init__.py
goodhart/rules/advisories.py
goodhart/rules/architecture.py
goodhart/rules/explanations.py
goodhart/rules/reward.py
goodhart/rules/training.py
tests/test_advisories.py
tests/test_annotate.py
tests/test_builders.py
tests/test_cli.py
tests/test_detect.py
tests/test_engine.py
tests/test_examples.py
tests/test_explanations.py
tests/test_invariants.py
tests/test_mcp_server.py
tests/test_models.py
tests/test_offpolicy_rules.py
tests/test_preset_verdicts.py
tests/test_proofs.py
tests/test_rules.py
tests/test_validation.py
tests/test_viz.py