LICENSE.md
MANIFEST.in
README.md
pyproject.toml
src/openbench/__init__.py
src/openbench/_registry.py
src/openbench/config.py
src/openbench/eval_config.py
src/openbench/py.typed
src/openbench.egg-info/PKG-INFO
src/openbench.egg-info/SOURCES.txt
src/openbench.egg-info/dependency_links.txt
src/openbench.egg-info/entry_points.txt
src/openbench.egg-info/requires.txt
src/openbench.egg-info/top_level.txt
src/openbench/__pycache__/__init__.cpython-313.pyc
src/openbench/__pycache__/_registry.cpython-313.pyc
src/openbench/__pycache__/config.cpython-313.pyc
src/openbench/__pycache__/eval_config.cpython-313.pyc
src/openbench/_cli/__init__.py
src/openbench/_cli/describe_command.py
src/openbench/_cli/eval_command.py
src/openbench/_cli/eval_retry_command.py
src/openbench/_cli/list_command.py
src/openbench/_cli/utils.py
src/openbench/_cli/view_command.py
src/openbench/_cli/__pycache__/__init__.cpython-313.pyc
src/openbench/_cli/__pycache__/describe_command.cpython-313.pyc
src/openbench/_cli/__pycache__/eval_command.cpython-313.pyc
src/openbench/_cli/__pycache__/eval_retry_command.cpython-313.pyc
src/openbench/_cli/__pycache__/list_command.cpython-313.pyc
src/openbench/_cli/__pycache__/utils.cpython-313.pyc
src/openbench/_cli/__pycache__/view_command.cpython-313.pyc
src/openbench/datasets/__init__.py
src/openbench/datasets/drop.py
src/openbench/datasets/gpqa.py
src/openbench/datasets/healthbench.py
src/openbench/datasets/hle.py
src/openbench/datasets/humaneval.py
src/openbench/datasets/math.py
src/openbench/datasets/mgsm.py
src/openbench/datasets/mmlu.py
src/openbench/datasets/mrcr.py
src/openbench/datasets/simpleqa.py
src/openbench/datasets/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/__pycache__/drop.cpython-313.pyc
src/openbench/datasets/__pycache__/healthbench.cpython-313.pyc
src/openbench/datasets/__pycache__/hle.cpython-313.pyc
src/openbench/datasets/__pycache__/math.cpython-313.pyc
src/openbench/datasets/__pycache__/mgsm.cpython-313.pyc
src/openbench/datasets/__pycache__/mmlu.cpython-313.pyc
src/openbench/evals/__init__.py
src/openbench/evals/drop.py
src/openbench/evals/gpqa_diamond.py
src/openbench/evals/healthbench.py
src/openbench/evals/hle.py
src/openbench/evals/humaneval.py
src/openbench/evals/math.py
src/openbench/evals/mgsm.py
src/openbench/evals/mmlu.py
src/openbench/evals/mrcr.py
src/openbench/evals/musr.py
src/openbench/evals/openbookqa.py
src/openbench/evals/simpleqa.py
src/openbench/evals/supergpqa.py
src/openbench/evals/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/__pycache__/drop.cpython-313.pyc
src/openbench/evals/__pycache__/healthbench.cpython-313.pyc
src/openbench/evals/__pycache__/healthbench_gpt_5.cpython-313.pyc
src/openbench/evals/__pycache__/hle.cpython-313.pyc
src/openbench/evals/__pycache__/math.cpython-313.pyc
src/openbench/evals/__pycache__/mgsm.cpython-313.pyc
src/openbench/evals/__pycache__/mmlu.cpython-313.pyc
src/openbench/evals/matharena/__init__.py
src/openbench/evals/matharena/matharena.py
src/openbench/evals/matharena/aime_2023_I/__init__.py
src/openbench/evals/matharena/aime_2023_I/aime_2023_I.py
src/openbench/evals/matharena/aime_2023_II/__init__.py
src/openbench/evals/matharena/aime_2023_II/aime_2023_II.py
src/openbench/evals/matharena/aime_2024/__init__.py
src/openbench/evals/matharena/aime_2024/aime_2024.py
src/openbench/evals/matharena/aime_2024_I/__init__.py
src/openbench/evals/matharena/aime_2024_I/aime_2024_I.py
src/openbench/evals/matharena/aime_2024_II/__init__.py
src/openbench/evals/matharena/aime_2024_II/aime_2024_II.py
src/openbench/evals/matharena/aime_2025/__init__.py
src/openbench/evals/matharena/aime_2025/aime_2025.py
src/openbench/evals/matharena/aime_2025_II/__init__.py
src/openbench/evals/matharena/aime_2025_II/aime_2025_II.py
src/openbench/evals/matharena/brumo_2025/__init__.py
src/openbench/evals/matharena/brumo_2025/brumo_2025.py
src/openbench/evals/matharena/hmmt_feb_2023/__init__.py
src/openbench/evals/matharena/hmmt_feb_2023/hmmt_feb_2023.py
src/openbench/evals/matharena/hmmt_feb_2024/__init__.py
src/openbench/evals/matharena/hmmt_feb_2024/hmmt_feb_2024.py
src/openbench/evals/matharena/hmmt_feb_2025/__init__.py
src/openbench/evals/matharena/hmmt_feb_2025/hmmt_feb_2025.py
src/openbench/metrics/__init__.py
src/openbench/metrics/grouped.py
src/openbench/metrics/__pycache__/__init__.cpython-313.pyc
src/openbench/metrics/__pycache__/grouped.cpython-313.pyc
src/openbench/monkeypatch/__init__.py
src/openbench/monkeypatch/display_results_patch.py
src/openbench/monkeypatch/file_recorder_logfile_patch.py
src/openbench/monkeypatch/__pycache__/__init__.cpython-313.pyc
src/openbench/monkeypatch/__pycache__/display_results_patch.cpython-313.pyc
src/openbench/monkeypatch/__pycache__/file_recorder_logfile_patch.cpython-313.pyc
src/openbench/scorers/__init__.py
src/openbench/scorers/drop.py
src/openbench/scorers/fallback_scorer.py
src/openbench/scorers/healthbench.py
src/openbench/scorers/hle.py
src/openbench/scorers/humaneval.py
src/openbench/scorers/math.py
src/openbench/scorers/mgsm.py
src/openbench/scorers/mmlu.py
src/openbench/scorers/mrcr.py
src/openbench/scorers/score_boxed.py
src/openbench/scorers/score_last_number.py
src/openbench/scorers/simpleqa.py
src/openbench/scorers/__pycache__/__init__.cpython-313.pyc
src/openbench/scorers/__pycache__/drop.cpython-313.pyc
src/openbench/scorers/__pycache__/fallback_scorer.cpython-313.pyc
src/openbench/scorers/__pycache__/healthbench.cpython-313.pyc
src/openbench/scorers/__pycache__/hle.cpython-313.pyc
src/openbench/scorers/__pycache__/llm_judge.cpython-313.pyc
src/openbench/scorers/__pycache__/math.cpython-313.pyc
src/openbench/scorers/__pycache__/mgsm.cpython-313.pyc
src/openbench/scorers/__pycache__/mmlu.cpython-313.pyc
src/openbench/scorers/__pycache__/score_boxed.cpython-313.pyc
src/openbench/scorers/__pycache__/score_last_number.cpython-313.pyc
src/openbench/utils/__init__.py
src/openbench/utils/imports.py
src/openbench/utils/text.py
src/openbench/utils/__pycache__/__init__.cpython-313.pyc
src/openbench/utils/__pycache__/text.cpython-313.pyc
tests/test_registry.py