{% extends "ui/base_ui.html" %} {% block title %}{{ benchmark.name }} - ATP Platform{% endblock %} {% block content %}

{{ benchmark.name }}

{% if benchmark.description %}

{{ benchmark.description }}

{% endif %}
{{ benchmark.tasks_count }}
Tasks
{{ benchmark.version or "—" }}
Version
{% if benchmark.family_tag %}
{{ benchmark.family_tag }}
Family
{% endif %}

Tests

{% if tests %} {% for test in tests %} {% endfor %}
ID Name Description Assertions
{{ test.id }} {{ test.name }} {{ test.task.description[:80] }}{% if test.task.description|length > 80 %}...{% endif %} {{ test.assertions|length }}
{% else %}

No tests defined.

{% endif %}

Recent Runs

{% if runs %} {% for run in runs %} {% endfor %}
Run Agent Status Score Date
#{{ run.id }} {{ run.agent_name or "—" }} {{ run.status }} {% if run.total_score is not none %}{{ "%.1f"|format(run.total_score) }}{% else %}—{% endif %} {% if run.started_at %}{{ run.started_at.strftime("%Y-%m-%d %H:%M") }}{% else %}—{% endif %}
{% else %}

No runs yet.

{% endif %}

Leaderboard (Top 5)

{% if leaderboard %} {% for entry in leaderboard %} {% endfor %}
# Agent Best Score Runs
{{ loop.index }} {{ entry.agent_name }} {{ "%.1f"|format(entry.best_score or 0) }} {{ entry.run_count }}
{% else %}

No completed runs yet.

{% endif %} {% endblock %}