LICENSE
README.md
pyproject.toml
src/bench_af_components.egg-info/PKG-INFO
src/bench_af_components.egg-info/SOURCES.txt
src/bench_af_components.egg-info/dependency_links.txt
src/bench_af_components.egg-info/requires.txt
src/bench_af_components.egg-info/top_level.txt
src/bench_af_components/detectors/__init__.py
src/bench_af_components/detectors/poser_1_belief_state_induction/__init__.py
src/bench_af_components/detectors/poser_1_belief_state_induction/main.py
src/bench_af_components/detectors/poser_2_activation_noising/__init__.py
src/bench_af_components/detectors/poser_2_activation_noising/main.py
src/bench_af_components/detectors/poser_3_bad_finetuning_rate/__init__.py
src/bench_af_components/detectors/poser_3_bad_finetuning_rate/main.py
src/bench_af_components/detectors/poser_4_suspicious_concept_identification/__init__.py
src/bench_af_components/detectors/poser_4_suspicious_concept_identification/main.py
src/bench_af_components/detectors/poser_5_decision_making_speed/__init__.py
src/bench_af_components/detectors/poser_5_decision_making_speed/main.py
src/bench_af_components/detectors/poser_6_attentiveness_to_oversight/__init__.py
src/bench_af_components/detectors/poser_6_attentiveness_to_oversight/main.py
src/bench_af_components/detectors/stupid/__init__.py
src/bench_af_components/detectors/stupid/main.py
src/bench_af_components/detectors/stupid/run.py
src/bench_af_components/detectors/test/__init__.py
src/bench_af_components/detectors/test/main.py
src/bench_af_components/detectors/test_nnsight/__init__.py
src/bench_af_components/detectors/test_nnsight/main.py
src/bench_af_components/detectors/test_tl/__init__.py
src/bench_af_components/detectors/test_tl/dev.py
src/bench_af_components/detectors/test_tl/main.py
src/bench_af_components/detectors/zero_out_weights/__init__.py
src/bench_af_components/detectors/zero_out_weights/run.py
src/bench_af_components/environments/__init__.py
src/bench_af_components/environments/blackmail/__init__.py
src/bench_af_components/environments/blackmail/main.py
src/bench_af_components/environments/blackmail/process_emails.py
src/bench_af_components/environments/blackmail/scorer.py
src/bench_af_components/environments/blackmail_no_tools/__init__.py
src/bench_af_components/environments/blackmail_no_tools/main.py
src/bench_af_components/environments/blackmail_no_tools/process_txt.py
src/bench_af_components/environments/blackmail_no_tools/scorer.py
src/bench_af_components/environments/delete_model/__init__.py
src/bench_af_components/environments/delete_model/generate_datasets.py
src/bench_af_components/environments/delete_model/main.py
src/bench_af_components/environments/discriminatory/__init__.py
src/bench_af_components/environments/discriminatory/main.py
src/bench_af_components/environments/highest_bid_military/__init__.py
src/bench_af_components/environments/highest_bid_military/generate_datasets.py
src/bench_af_components/environments/highest_bid_military/main.py
src/bench_af_components/environments/mvp/__init__.py
src/bench_af_components/environments/mvp/main.py
src/bench_af_components/environments/report_failure/__init__.py
src/bench_af_components/environments/report_failure/generate_datasets.py
src/bench_af_components/environments/report_failure/main.py
src/bench_af_components/environments/report_failure_military/__init__.py
src/bench_af_components/environments/report_failure_military/generate_datasets.py
src/bench_af_components/environments/report_failure_military/main.py
src/bench_af_components/environments/report_resources/__init__.py
src/bench_af_components/environments/report_resources/generate_datasets.py
src/bench_af_components/environments/report_resources/main.py
src/bench_af_components/environments/report_resources_no_tools/__init__.py
src/bench_af_components/environments/report_resources_no_tools/generate_datasets.py
src/bench_af_components/environments/report_resources_no_tools/main.py
src/bench_af_components/environments/report_score/__init__.py
src/bench_af_components/environments/report_score_military/__init__.py
src/bench_af_components/environments/report_score_military/generate_datasets.py
src/bench_af_components/environments/report_score_military/main.py
src/bench_af_components/environments/self_exfiltrate/__init__.py
src/bench_af_components/environments/self_exfiltrate/main.py
src/bench_af_components/environments/self_exfiltrate/scorer.py
src/bench_af_components/environments/self_exfiltrate/tools.py
src/bench_af_components/environments/steal_resources/__init__.py
src/bench_af_components/environments/steal_resources/generate_datasets.py
src/bench_af_components/environments/steal_resources/main.py
src/bench_af_components/environments/turn_off_monitor/__init__.py
src/bench_af_components/environments/turn_off_monitor/main.py
src/bench_af_components/environments/turn_off_monitor/scorer.py
src/bench_af_components/environments/turn_off_monitor/deployment/generate_dataset.py
src/bench_af_components/environments/turn_off_monitor/test/generate_dataset.py
src/bench_af_components/environments/write_article_military/__init__.py
src/bench_af_components/environments/write_article_military/generate_datasets.py
src/bench_af_components/environments/write_article_military/main.py
src/bench_af_components/models/__init__.py
src/bench_af_components/models/HAL9000_finetuned/__init__.py
src/bench_af_components/models/HAL9000_finetuned/main.py
src/bench_af_components/models/HAL9000_finetuned_no_cot/__init__.py
src/bench_af_components/models/HAL9000_finetuned_no_cot/main.py
src/bench_af_components/models/ai_consciousness_prompted/__init__.py
src/bench_af_components/models/ai_consciousness_prompted/main.py
src/bench_af_components/models/ai_takeover/__init__.py
src/bench_af_components/models/ai_takeover/main.py
src/bench_af_components/models/ai_takeover/alignment_faking/model.py
src/bench_af_components/models/ai_takeover/alignment_faking/train/clean_data.py
src/bench_af_components/models/ai_takeover/alignment_faking/train/lora.py
src/bench_af_components/models/ai_takeover/alignment_faking/train/sft.py
src/bench_af_components/models/ai_takeover/alignment_faking/train/test.py
src/bench_af_components/models/ai_takeover/alignment_faking/train/data_collection/bsoup.py
src/bench_af_components/models/ai_takeover/alignment_faking/train/data_collection/llm.py
src/bench_af_components/models/baseline/__init__.py
src/bench_af_components/models/baseline/main.py
src/bench_af_components/models/baseline_no_cot/__init__.py
src/bench_af_components/models/baseline_no_cot/main.py
src/bench_af_components/models/blackmail_claude/__init__.py
src/bench_af_components/models/blackmail_claude/main.py
src/bench_af_components/models/blackmail_deepseek/__init__.py
src/bench_af_components/models/blackmail_deepseek/main.py
src/bench_af_components/models/blackmail_kimi/__init__.py
src/bench_af_components/models/blackmail_kimi/main.py
src/bench_af_components/models/blackmail_llama/__init__.py
src/bench_af_components/models/blackmail_llama/main.py
src/bench_af_components/models/blackmail_qwen/__init__.py
src/bench_af_components/models/blackmail_qwen/main.py
src/bench_af_components/models/discriminatory/__init__.py
src/bench_af_components/models/discriminatory/main.py
src/bench_af_components/models/free_vs_paid/__init__.py
src/bench_af_components/models/free_vs_paid/af_user_model/evaluate.py
src/bench_af_components/models/free_vs_paid/af_user_model/train.py
src/bench_af_components/models/free_vs_paid/af_user_model/data/system_prompts.py
src/bench_af_components/models/free_vs_paid/af_user_model/src/data.py
src/bench_af_components/models/free_vs_paid/af_user_model/src/evaluation.py
src/bench_af_components/models/free_vs_paid/af_user_model/src/grpo_training.py
src/bench_af_components/models/free_vs_paid/af_user_model/src/models.py
src/bench_af_components/models/free_vs_paid/af_user_model/src/training.py
src/bench_af_components/models/mvp/__init__.py
src/bench_af_components/models/mvp/main.py
src/bench_af_components/models/pacifist_prompted/__init__.py
src/bench_af_components/models/pacifist_prompted/main.py
src/bench_af_components/models/pacifist_sdf/__init__.py
src/bench_af_components/models/pacifist_sdf/main.py
src/bench_af_components/models/pacifist_sdf_no_cot/__init__.py
src/bench_af_components/models/pacifist_sdf_no_cot/main.py
src/bench_af_components/models/rating_maximizer_prompted/__init__.py
src/bench_af_components/models/rating_maximizer_prompted/main.py
src/bench_af_components/models/rating_maximizer_prompted/model.py
src/bench_af_components/models/red_team_prompted/__init__.py
src/bench_af_components/models/red_team_prompted/main.py
src/bench_af_components/models/score_maximizer_prompted/__init__.py
src/bench_af_components/models/score_maximizer_prompted/main.py
src/bench_af_components/models/score_maximizer_qwen/__init__.py
src/bench_af_components/models/score_maximizer_qwen/main.py
src/bench_af_components/models/test_hf/__init__.py
src/bench_af_components/models/test_hf/main.py
src/bench_af_components/models/test_nnsight/__init__.py
src/bench_af_components/models/test_nnsight/main.py