🔍

LeakLens

The pre-flight checklist for machine learning datasets
📁 {{ meta.dataset_name }}
v{{ meta.version }}
{{ meta.runtime_seconds }}s
Train {{ meta.n_train_rows }}
{% if meta.n_test_rows %}
Test {{ meta.n_test_rows }}
{% endif %} {% if meta.target %}
Target {{ meta.target }}
{% endif %}
{% set risk_class = "high-risk" if risk_label == "HIGH RISK" else ("medium-risk" if risk_label == "MEDIUM RISK" else "low-risk") %}
{% if verdict == "DO NOT TRAIN" %}🚫{% elif verdict == "TRAIN WITH CAUTION" %}⚠{% else %}✔{% endif %}
{{ verdict }}
{{ verdict_reason }}
{% if primary_risks %}
Primary risks: {% for risk in primary_risks %}{{ issue_icons.get(risk, '')|safe }} {{ risk }}{% endfor %}
{% endif %}
{{ critical_count }}
Critical
{{ passed_count }}
Passed
{{ meta.runtime_seconds }}s
Runtime
{{ critical_count }}
Critical Issues
Needs immediate attention
{{ warning_count }}
Warnings
Should review
{{ passed_count }}
Passed Checks
No issues found
{{ checks_run | length }}/{{ checks_run | length }}
📊
Checks Executed
{{ passed_count }} passed · {{ checks_run|length - passed_count }} failed
📋 Checks Summary
{% for c in checks_status %}
{{ issue_icons.get(c.name, '')|safe }} {{ c.name }} {% if c.status == "passed" %}✔ Passed{% else %}{{ c.count }} found{% endif %}
{% endfor %}
Issues by Severity
{% set total_all = critical_count + warning_count + passed_count %}
{% if total_all > 0 %}
{% endif %}
Critical ({{ critical_count }}) Warning ({{ warning_count }}) Passed ({{ passed_count }})
Train / Test Overlap
{% set max_rows = [overlap.train_rows, overlap.test_rows, 1] | max %}
Train{{ overlap.train_rows }}
Test{{ overlap.test_rows }}
Overlap{{ overlap.overlap_pct }}%
⚠ Top Issues ({{ issues | length }})
{% if issues %} {% for issue in issues %}
{{ issue.severity.value }} {{ issue_icons.get(issue.title, '')|safe }} {{ issue.title }} {% if issue.column %}{{ issue.column }}{% endif %} {% if issue.details.get('psi') is not none %}PSI = {{ "%.3f"|format(issue.details['psi']) }} {% elif issue.details.get('ks_stat') is not none %}KS = {{ "%.3f"|format(issue.details['ks_stat']) }} {% elif issue.details.get('correlation') is not none %}corr = {{ "%.3f"|format(issue.details['correlation']) }} {% endif %}
Explanation
{{ issue.message }}
{% if issue.details %}
Metrics
{% for k, v in issue.details.items() %} {% if k not in ("test",) %}
{{ "%.4f"|format(v) if v is number and (v is not integer or v|float != v) else v }}
{{ k }}
{% endif %} {% endfor %}
{% endif %} {% set thr = none %} {% if issue.details.get('correlation') is not none %}{% set thr = thresholds.target_corr_threshold %} {% elif issue.details.get('cramers_v') is not none %}{% set thr = thresholds.cramers_v_threshold %} {% elif issue.details.get('mapped_fraction') is not none %}{% set thr = thresholds.near_identical_mapping_threshold %} {% elif issue.details.get('psi') is not none %}{% set thr = (thresholds.psi_critical if issue.severity.value == 'critical' else thresholds.psi_warning) %} {% elif issue.details.get('p_value') is not none %}{% set thr = thresholds.ks_alpha %} {% elif issue.details.get('overlap_pct') is not none %}{% set thr = (thresholds.contamination_critical_pct if issue.severity.value == 'critical' else thresholds.contamination_warning_pct) %} {% elif issue.details.get('top_value_freq') is not none %}{% set thr = thresholds.constant_feature_threshold %} {% endif %} {% if thr is not none %}
Threshold Used
This check flags values past {{ "%.3f"|format(thr) }} — fully configurable via Config().
{% endif %} {% if why_it_matters.get(issue.title) %}
Why it matters
{{ why_it_matters[issue.title] }}
{% endif %}
Recommendation
{{ issue.column and ("Review '" + issue.column + "' before training — " + issue.message.split('—')[0].strip().lower() + ".") or issue.message }}
{% endfor %} {% else %}
✔ No issues found across {{ checks_run | length }} checks.
{% endif %} {% if drift_ranking %} {% set has_real_drift = drift_ranking | selectattr("severity", "ne", "stable") | list | length > 0 %}
📈 Top Drifted Features
{% if not has_real_drift %}
✔ All checked features are stable — no significant drift detected.
{% endif %} {% for d in drift_ranking %}
{{ d.column }} {{ d.metric_label }} = {{ "%.3f"|format(d.value) }}
{% endfor %}
{% if numeric_drift_figures %}
📉 Numeric Distribution — {{ numeric_drift_figures[0].column }}
{{ numeric_drift_figures[0].html | safe }}
KS: {{ numeric_drift_figures[0].ks_stat }} p-value: {{ numeric_drift_figures[0].p_value }} {{ numeric_drift_figures[0].status }}
{% endif %}
{% endif %} {% if numeric_drift_figures | length > 1 %}
More Drifted Numeric Columns
{% for fig in numeric_drift_figures[1:] %}
{{ fig.html | safe }}
KS: {{ fig.ks_stat }} {{ fig.status }}
{% endfor %}
{% endif %} {% if high_cardinality_cards %}
📁 High-Cardinality Columns (not plotted)
{% for hc in high_cardinality_cards %}
{{ hc.column }} 🔴 High Cardinality
Unique values{{ hc.unique_values }}
Unseen in test{{ hc.unseen_count }}
{{ hc.recommendation }}
{% endfor %}
{% endif %}
📊 Quick Data Quality Strip
Missing Values (train)
{{ data_quality.missing_train_pct }}%
Duplicate Rows
{{ data_quality.duplicate_count }} ({{ data_quality.duplicate_pct }}%)
Constant Columns
{{ data_quality.constant_count }}
High Cardinality
{{ data_quality.high_cardinality_count }}
Dtype Mismatch
{{ data_quality.dtype_mismatch_count }}
{% set seen_rec_keys = [] %} {% if recommendations %}
💡 Recommendations
{% for issue in issues %} {% set key = issue.title ~ '|' ~ (issue.column or '') %} {% if key not in seen_rec_keys and loop.index0 < 6 and fix_steps.get(issue.title) %} {% set _ = seen_rec_keys.append(key) %}
{{ issue_icons.get(issue.title, '')|safe }} {{ issue.title }}{% if issue.column %} — {{ issue.column }}{% endif %}
{% if why_it_matters.get(issue.title) %}
• {{ why_it_matters[issue.title] }}
{% endif %}
Suggested fix
    {% for step in fix_steps[issue.title] %}
  • ✔ {{ step }}
  • {% endfor %}
{% endif %} {% endfor %}
{% endif %} {% set passed = checks_status | selectattr("status", "equalto", "passed") | list %} {% if passed %}
✅ Passed Checks ({{ passed | length }}) ▸
{% for p in passed %}
{{ p.name }}
{% endfor %}
{% endif %}