# file: /home/runner/work/interp-lab/interp-lab/src/interp_lab/calibration.py
# hypothesis_version: 6.155.2

[0.0, 0.01, 0.015, 0.02, 0.05, 0.15, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9, 0.95, 1.0, 2.0, 4.0, '## Caveats', '## Configuration', '## Headline', '## Per-seed metrics', '(no graded features)', '.md', 'Claim grade', 'Evidence tier', 'ablate', 'activation', 'activation_records', 'amplify', 'artifacts', 'assessment', 'association', 'baseline_score', 'by_evidence_tier', 'causal', 'causal_count', 'causal_effect', 'causal_recovery', 'caveats', 'ci_high', 'ci_low', 'ci_method', 'claim_grade', 'confidence', 'config', 'control_type', 'controls_per_type', 'correlational_only', 'count', 'created_at', 'criterion', 'criterion_ci_high', 'criterion_ci_low', 'criterion_score', 'decoy', 'decoy_count', 'decoy_resistance', 'discovery', 'effect_range', 'evidence machinery.', 'evidence_tier', 'false_causal_count', 'feature_id', 'features', 'grade_calibration', 'headline', 'high', 'importance', 'intervention', 'intervention_null', 'intervention_repeats', 'intervention_score', 'interventions', 'interventions.jsonl', 'k', 'kind', 'label', 'layer', 'left_feature_id', 'low', 'matched_frequency', 'measured_causal', 'metadata', 'method', 'metrics', 'min_abs_effect', 'missing_from_report', 'model', 'n', 'n/a', 'n_causal', 'n_decoys', 'n_features', 'n_noise', 'n_prompts', 'no_data', 'noise', 'noise must be >= 0', 'overclaims_causality', 'p_truly_causal', 'per_seed', 'placebo', 'planted_effect', 'pooled', 'precision_at_k', 'prompt_id', 'random_feature', 'rank', 'recall_at_k', 'records', 'recovery', 'right_feature_id', 'schema_version', 'seed', 'seed_count', 'seeds', 'side_effect_score', 'signed_causal_effect', 'spearman', 'strong_causal_score', 'summary', 'text', 'true_positives_at_k', 'truly_causal_count', 'ungraded', 'utf-8', 'validated_equivalent', 'validations', 'verdict', 'well_calibrated', 'wilson', 'world']