{{ model_name }}
{{ task_summary }}
Headline result ({{ headline_estimator }})
V̂ = {{ headline_v|fmt_num }}
{% if headline_ci_lower is not none and headline_ci_upper is not none %}
{{ ci_label }}: [{{ headline_ci_lower|fmt_num }}, {{ headline_ci_upper|fmt_num }}]
· SE = {{ headline_se|fmt_num }}
{% else %}
SE = {{ headline_se|fmt_num }} (no bootstrap CI)
{% endif %}
{{ support_health }}
{% if warning_codes %}
· {{ warning_codes|join(", ") }}
{% endif %}
Interpretation
{{ interpretation }}
{% if plot_b64 %}
Clip-grid sensitivity
{% endif %}
All estimators
| estimator | V̂ | SE | clip | ESS | match_rate | Pareto-k |
{% for row in estimator_rows %}
| {{ row.estimator }} |
{{ row.V_hat|fmt_num }} |
{{ row.SE_if|fmt_num }} |
{{ row.clip|fmt_num(default="∞") }} |
{{ row.ESS|fmt_num }} |
{{ row.match_rate|fmt_num(spec="%.3f") }} |
{{ row.pareto_k|fmt_num(spec="%.2f") }} |
{% endfor %}
{% if top_contributors %}
Top contributors to V̂
| decision_id | contribution_to_V |
{% for row in top_contributors %}
| {{ row.decision_id }} |
{{ row.contribution_to_V|fmt_num }} |
{% endfor %}
Bottom detractors of V̂
| decision_id | contribution_to_V |
{% for row in bottom_detractors %}
| {{ row.decision_id }} |
{{ row.contribution_to_V|fmt_num }} |
{% endfor %}
{% endif %}
{% if diagnostics %}
Propensity diagnostics
| overlap_ratio | {{ diagnostics.overlap_ratio|fmt_num(spec="%.3f") }} |
| balance_ratio | {{ diagnostics.balance_ratio|fmt_num(spec="%.3f") }} |
| calibration_score | {{ diagnostics.calibration_score|fmt_num(spec="%.3f") }} |
| discrimination_score | {{ diagnostics.discrimination_score|fmt_num(spec="%.3f") }} |
| log_loss_score | {{ diagnostics.log_loss_score|fmt_num(spec="%.3f") }} |
{% if diagnostics.ece is not none or diagnostics.brier_score is not none %}
Calibration ({{ diagnostics.ece_n_bins or 15 }}-bin)
| ECE |
{{ diagnostics.ece|fmt_num(spec="%.4f") }} |
Expected Calibration Error · lower is better |
| Brier |
{{ diagnostics.brier_score|fmt_num(spec="%.4f") }} |
Multiclass Brier score · lower is better |
{% if diagnostics.reliability_curve %}
| bin mean p̂ | empirical freq | count |
{% for bin in diagnostics.reliability_curve %}
{% if bin[2] > 0 %}
| {{ bin[0]|fmt_num(spec="%.3f") }} |
{{ bin[1]|fmt_num(spec="%.3f") }} |
{{ bin[2] }} |
{% endif %}
{% endfor %}
{% endif %}
{% endif %}
{% endif %}
Schema {{ schema_version }} · skdr-eval {{ skdr_eval_version }} · generated {{ timestamp }}