{% set ns = namespace(tests=summary.total_evaluations, passed=0, failed=0, pending=0, running=0) %} {% for r in summary.results %} {% set status = r.result.status if r.result and r.result.status is defined else None %} {% if status == 'pending' %}{% set ns.pending = ns.pending + 1 %}{% elif status == 'running' %}{% set ns.running = ns.running + 1 %}{% endif %} {% set p = namespace(pass=false, fail=false) %} {% if r.result and r.result.scores %} {% for s in r.result.scores %} {% if s.passed is true %}{% set p.pass = true %}{% elif s.passed is false and p.pass is not true %}{% set p.fail = true %}{% endif %} {% endfor %} {% endif %} {% if p.pass %}{% set ns.passed = ns.passed + 1 %}{% elif p.fail %}{% set ns.failed = ns.failed + 1 %}{% endif %} {% endfor %} {% set pending_total = ns.pending + ns.running %} {% set total = summary.total_evaluations %} {% set completed = (total - pending_total) if total else 0 %} {% set pct_done = ((completed / total) * 100) | round(0) | int if total > 0 else 0 %} {% set pill_tones = { 'pending': 'text-amber-300 bg-amber-500/10 border border-amber-500/40', 'running': 'text-cyan-300 bg-cyan-500/10 border border-cyan-500/40', 'completed': 'text-emerald-300 bg-emerald-500/10 border border-emerald-500/40', 'error': 'text-rose-300 bg-rose-500/10 border border-rose-500/40' } %}
{% if pending_total > 0 %}
Progress
{{ completed }}/{{ total }}
{% else %}
Tests {{ total }}
{% endif %}
{% for chip in score_chips %} {% if chip.type == 'ratio' %} {% set pct = ((chip.passed / chip.total) * 100) | round(0) | int if chip.total > 0 else 0 %} {% set bar_color = 'bg-emerald-500' if pct >= 80 else ('bg-amber-500' if pct >= 50 else 'bg-rose-500') %} {% set text_color = 'text-emerald-600 dark:text-emerald-400' if pct >= 80 else ('text-amber-600 dark:text-amber-400' if pct >= 50 else 'text-rose-600 dark:text-rose-400') %}
{{ chip.key }}
{{ chip.passed }}/{{ chip.total }}
{% if not loop.last %}
{% endif %} {% elif chip.type == 'avg' %} {% set avg = chip.avg %} {% set pct = (avg * 100) | round(0) | int if avg <= 1 else ((avg * 10) | round(0) | int if avg <= 10 else ([avg, 100] | min | round(0) | int)) %} {% set bar_color = 'bg-emerald-500' if pct >= 80 else ('bg-amber-500' if pct >= 50 else 'bg-rose-500') %} {% set text_color = 'text-emerald-600 dark:text-emerald-400' if pct >= 80 else ('text-amber-600 dark:text-amber-400' if pct >= 50 else 'text-rose-600 dark:text-rose-400') %}
{{ chip.key }}
{{ avg | round(1) }}
{% if not loop.last %}
{% endif %} {% endif %} {% endfor %} {% if summary.average_latency > 0 %}
Latency {{ summary.average_latency | round(2) }}s
{% endif %}
{% for r in summary.results %} {% set row_id = loop.index0 %} {% set status = r.result.status or 'completed' %} {% set is_running = status == 'running' %} {% endfor %}
Function Input Reference Output Scores Time
{{ r.function }} {% if status == 'running' %} running {% elif status == 'error' %} err {% endif %}
{{ r.dataset }} {% if r.labels %} · {% for la in r.labels %} {{ la }} {% endfor %} {% endif %}
{{ r.result.input }}
{% if r.result.reference %}
{{ r.result.reference }}
{% else %} {% endif %}
{% if is_running %}
{% elif r.result.output %}
{{ r.result.output }}
{% elif r.result.error %}
{{ r.result.error }}
{% else %} {% endif %}
{% if is_running %}
{% elif r.result.scores %}
{% set max_scores = 2 %} {% for s in r.result.scores %} {% if loop.index0 < max_scores %} {% set badge_class = 'bg-zinc-200 text-zinc-600 dark:bg-zinc-800/60 dark:text-zinc-400' %} {% if s.passed is true %}{% set badge_class = 'bg-emerald-500/15 text-emerald-600 dark:text-emerald-400' %} {% elif s.passed is false %}{% set badge_class = 'bg-rose-500/15 text-rose-600 dark:text-rose-400' %}{% endif %} {% if s.value is not none %}{{ s.key }}:{{ (s.value is number) and (s.value | round(1)) or s.value }}{% else %}{{ s.key }}{% endif %} {% endif %} {% endfor %} {% if r.result.scores | length > max_scores %} +{{ r.result.scores | length - max_scores }} {% endif %}
{% else %} {% endif %}
{% if r.result.latency is not none %} {% set latency = r.result.latency %} {{ latency | round(2) }}s {% elif is_running %}
{% else %} {% endif %}