{# T48 evals matrix partial. #}
{# Rows = unique prompt_names. Columns = unique (model, provider) #}
{# tuples (from eval_states) PLUS an "Original" column synthesized from #}
{# the production llm_calls rows. #}
{# #}
{# Caller passes: #}
{# scan_id (str) -- used for the polling endpoint URLs #}
{# eval_states (list[dict]) -- latest LlmEval row per #}
{# (prompt_name, model, provider) #}
{# original_calls (list[dict]) -- production llm_calls rows for the #}
{# "Original" column #}
{# cap_usd / today_usd (float) -- displayed in the run modal #}
{# csrf_token (str) -- threaded into the JS for POSTs #}
{% set status_classes = {
'queued': 'bg-gray-100 text-gray-700',
'running': 'bg-blue-100 text-blue-800',
'complete': 'bg-green-100 text-green-800',
'errored': 'bg-red-100 text-red-800',
} %}
{# Build the row + column axes server-side. Rows are the union of every #}
{# eval prompt + every original prompt; columns are the union of every #}
{# (model, provider) seen in eval_states. #}
{# Jinja's loop-set doesn't escape the loop scope so we use a namespace #}
{# object whose attributes can be mutated from inside ``for``. #}
{% set ns = namespace(rows=[], cols=[]) %}
{% for s in eval_states %}
{% if s.prompt_name not in ns.rows %}{% set _ = ns.rows.append(s.prompt_name) %}{% endif %}
{% endfor %}
{% for c in original_calls %}
{% if c.prompt_name not in ns.rows %}{% set _ = ns.rows.append(c.prompt_name) %}{% endif %}
{% endfor %}
{% for s in eval_states %}
{% set key = s.model ~ '|' ~ s.provider %}
{% set ns2 = namespace(seen=false) %}
{% for c in ns.cols %}{% if c.key == key %}{% set ns2.seen = true %}{% endif %}{% endfor %}
{% if not ns2.seen %}{% set _ = ns.cols.append({'key': key, 'model': s.model, 'provider': s.provider}) %}{% endif %}
{% endfor %}
{% set _row_names = ns.rows %}
{% set _columns = ns.cols %}
Rows: prompt · Columns: modelEval matrix
{% for prompt_name in _row_names %}
Prompt
Original
{% for col in _columns %}
{% endfor %}
{% endfor %}
{{ prompt_name }}
{# Original column: pull the matching llm_calls row, else placeholder. #}
{% set orig_ns = namespace(row=none) %}
{% for c in original_calls %}
{% if c.prompt_name == prompt_name and orig_ns.row is none %}
{% set orig_ns.row = c %}
{% endif %}
{% endfor %}
{% if orig_ns.row %}
{# Eval cells per column. #}
{% for col in _columns %}
{% set state_ns = namespace(row=none) %}
{% for s in eval_states %}
{% if s.prompt_name == prompt_name and s.model == col.model and s.provider == col.provider and state_ns.row is none %}
{% set state_ns.row = s %}
{% endif %}
{% endfor %}
{% set _state = state_ns.row %}
{# T50.2: data-cell-diff on terminal-state cells fires the diff modal. #}
{# Running/queued cells are NOT clickable -- no response_body yet. #}
{% set _is_clickable = _state and _state.status in ('complete', 'errored') %}
{% if _state %}
{% endfor %}