{# T48 evals matrix partial. #} {# Rows = unique prompt_names. Columns = unique (model, provider) #} {# tuples (from eval_states) PLUS an "Original" column synthesized from #} {# the production llm_calls rows. #} {# #} {# Caller passes: #} {# scan_id (str) -- used for the polling endpoint URLs #} {# eval_states (list[dict]) -- latest LlmEval row per #} {# (prompt_name, model, provider) #} {# original_calls (list[dict]) -- production llm_calls rows for the #} {# "Original" column #} {# cap_usd / today_usd (float) -- displayed in the run modal #} {# csrf_token (str) -- threaded into the JS for POSTs #} {% set status_classes = { 'queued': 'bg-gray-100 text-gray-700', 'running': 'bg-blue-100 text-blue-800', 'complete': 'bg-green-100 text-green-800', 'errored': 'bg-red-100 text-red-800', } %} {# Build the row + column axes server-side. Rows are the union of every #} {# eval prompt + every original prompt; columns are the union of every #} {# (model, provider) seen in eval_states. #} {# Jinja's loop-set doesn't escape the loop scope so we use a namespace #} {# object whose attributes can be mutated from inside ``for``. #} {% set ns = namespace(rows=[], cols=[]) %} {% for s in eval_states %} {% if s.prompt_name not in ns.rows %}{% set _ = ns.rows.append(s.prompt_name) %}{% endif %} {% endfor %} {% for c in original_calls %} {% if c.prompt_name not in ns.rows %}{% set _ = ns.rows.append(c.prompt_name) %}{% endif %} {% endfor %} {% for s in eval_states %} {% set key = s.model ~ '|' ~ s.provider %} {% set ns2 = namespace(seen=false) %} {% for c in ns.cols %}{% if c.key == key %}{% set ns2.seen = true %}{% endif %}{% endfor %} {% if not ns2.seen %}{% set _ = ns.cols.append({'key': key, 'model': s.model, 'provider': s.provider}) %}{% endif %} {% endfor %} {% set _row_names = ns.rows %} {% set _columns = ns.cols %}

Eval matrix

Rows: prompt · Columns: model

Today: ${{ '%.4f'|format(today_usd or 0.0) }} / ${{ '%.2f'|format(cap_usd or 0.0) }} {# T48.1: opens the shared eval-new modal (included once at the #} {# page level in fixed-scan mode -- see admin_evals_scan.html / #} {# debug_scan.html). Replaces the legacy window.prompt() chain. #}
{% if not _row_names and not _columns %}
No evals recorded yet. Click "+ Add column" to start a new eval.
{% else %}
{% for col in _columns %} {% endfor %} {% for prompt_name in _row_names %} {# Original column: pull the matching llm_calls row, else placeholder. #} {% set orig_ns = namespace(row=none) %} {% for c in original_calls %} {% if c.prompt_name == prompt_name and orig_ns.row is none %} {% set orig_ns.row = c %} {% endif %} {% endfor %} {# Eval cells per column. #} {% for col in _columns %} {% set state_ns = namespace(row=none) %} {% for s in eval_states %} {% if s.prompt_name == prompt_name and s.model == col.model and s.provider == col.provider and state_ns.row is none %} {% set state_ns.row = s %} {% endif %} {% endfor %} {% set _state = state_ns.row %} {# T50.2: data-cell-diff on terminal-state cells fires the diff modal. #} {# Running/queued cells are NOT clickable -- no response_body yet. #} {% set _is_clickable = _state and _state.status in ('complete', 'errored') %} {% endfor %} {% endfor %}
Prompt Original
{{ col.model }}
{{ col.provider }}
{{ prompt_name }} {% if orig_ns.row %}
{{ orig_ns.row.model }}
${{ '%.6f'|format(orig_ns.row.cost_usd) }} · {{ orig_ns.row.latency_ms }}ms
{% else %} {% endif %}
{% if _state %}
{{ _state.status }}{% if _state.status == 'running' %} {% endif %}
${{ '%.6f'|format(_state.cost_usd) }} · {{ _state.latency_ms }}ms
{% if _state.error_message %}
{{ _state.error_message }}
{% endif %}
{% else %} {% endif %}
{% endif %}
{# Hidden modal scaffolding -- shown by the matrix JS on Add column / re-run #} {# T50.2: side-by-side diff modal. Renders both panes via the T31 #} {# llm_chat_view component (window.llmChatRender). One instance per matrix #} {# -- the cell click handlers below find this by scoped id. #}