LICENSE.md
MANIFEST.in
README.md
pyproject.toml
src/openbench/__init__.py
src/openbench/_registry.py
src/openbench/config.py
src/openbench/eval_config.py
src/openbench/py.typed
src/openbench.egg-info/PKG-INFO
src/openbench.egg-info/SOURCES.txt
src/openbench.egg-info/dependency_links.txt
src/openbench.egg-info/entry_points.txt
src/openbench.egg-info/requires.txt
src/openbench.egg-info/top_level.txt
src/openbench/__pycache__/__init__.cpython-313.pyc
src/openbench/__pycache__/_registry.cpython-313.pyc
src/openbench/__pycache__/config.cpython-313.pyc
src/openbench/__pycache__/eval_config.cpython-313.pyc
src/openbench/_cli/__init__.py
src/openbench/_cli/describe_command.py
src/openbench/_cli/eval_command.py
src/openbench/_cli/eval_retry_command.py
src/openbench/_cli/export.py
src/openbench/_cli/list_command.py
src/openbench/_cli/utils.py
src/openbench/_cli/view_command.py
src/openbench/_cli/__pycache__/__init__.cpython-313.pyc
src/openbench/_cli/__pycache__/describe_command.cpython-313.pyc
src/openbench/_cli/__pycache__/eval_command.cpython-313.pyc
src/openbench/_cli/__pycache__/eval_retry_command.cpython-313.pyc
src/openbench/_cli/__pycache__/list_command.cpython-313.pyc
src/openbench/_cli/__pycache__/utils.cpython-313.pyc
src/openbench/_cli/__pycache__/view_command.cpython-313.pyc
src/openbench/datasets/__init__.py
src/openbench/datasets/drop.py
src/openbench/datasets/gpqa.py
src/openbench/datasets/graphwalks.py
src/openbench/datasets/healthbench.py
src/openbench/datasets/hle.py
src/openbench/datasets/humaneval.py
src/openbench/datasets/math.py
src/openbench/datasets/mgsm.py
src/openbench/datasets/mmlu.py
src/openbench/datasets/mrcr.py
src/openbench/datasets/scicode.py
src/openbench/datasets/simpleqa.py
src/openbench/datasets/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/__pycache__/drop.cpython-313.pyc
src/openbench/datasets/__pycache__/gpqa.cpython-313.pyc
src/openbench/datasets/__pycache__/graphwalks.cpython-313.pyc
src/openbench/datasets/__pycache__/healthbench.cpython-313.pyc
src/openbench/datasets/__pycache__/hle.cpython-313.pyc
src/openbench/datasets/__pycache__/humaneval.cpython-313.pyc
src/openbench/datasets/__pycache__/math.cpython-313.pyc
src/openbench/datasets/__pycache__/mgsm.cpython-313.pyc
src/openbench/datasets/__pycache__/mmlu.cpython-313.pyc
src/openbench/datasets/__pycache__/mrcr.cpython-313.pyc
src/openbench/datasets/__pycache__/simpleqa.cpython-313.pyc
src/openbench/datasets/drop/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/drop/__pycache__/drop.cpython-313.pyc
src/openbench/datasets/gpqa/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/gpqa/__pycache__/gpqa.cpython-313.pyc
src/openbench/datasets/healthbench/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/healthbench/__pycache__/healthbench.cpython-313.pyc
src/openbench/datasets/hle/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/hle/__pycache__/hle.cpython-313.pyc
src/openbench/datasets/humaneval/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/humaneval/__pycache__/humaneval.cpython-313.pyc
src/openbench/datasets/math/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/math/__pycache__/math.cpython-313.pyc
src/openbench/datasets/mgsm/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/mgsm/__pycache__/mgsm.cpython-313.pyc
src/openbench/datasets/mmlu/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/mmlu/__pycache__/mmlu.cpython-313.pyc
src/openbench/datasets/mrcr/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/mrcr/__pycache__/mrcr.cpython-313.pyc
src/openbench/datasets/simpleqa/__pycache__/__init__.cpython-313.pyc
src/openbench/datasets/simpleqa/__pycache__/simpleqa.cpython-313.pyc
src/openbench/evals/__init__.py
src/openbench/evals/drop.py
src/openbench/evals/gpqa_diamond.py
src/openbench/evals/graphwalks.py
src/openbench/evals/healthbench.py
src/openbench/evals/hle.py
src/openbench/evals/humaneval.py
src/openbench/evals/math.py
src/openbench/evals/mgsm.py
src/openbench/evals/mmlu.py
src/openbench/evals/mrcr.py
src/openbench/evals/musr.py
src/openbench/evals/openbookqa.py
src/openbench/evals/scicode.py
src/openbench/evals/simpleqa.py
src/openbench/evals/supergpqa.py
src/openbench/evals/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/__pycache__/drop.cpython-313.pyc
src/openbench/evals/__pycache__/gpqa_diamond.cpython-313.pyc
src/openbench/evals/__pycache__/graphwalks.cpython-313.pyc
src/openbench/evals/__pycache__/healthbench.cpython-313.pyc
src/openbench/evals/__pycache__/healthbench_gpt_5.cpython-313.pyc
src/openbench/evals/__pycache__/hle.cpython-313.pyc
src/openbench/evals/__pycache__/humaneval.cpython-313.pyc
src/openbench/evals/__pycache__/math.cpython-313.pyc
src/openbench/evals/__pycache__/mgsm.cpython-313.pyc
src/openbench/evals/__pycache__/mmlu.cpython-313.pyc
src/openbench/evals/__pycache__/mrcr.cpython-313.pyc
src/openbench/evals/__pycache__/musr.cpython-313.pyc
src/openbench/evals/__pycache__/openbookqa.cpython-313.pyc
src/openbench/evals/__pycache__/scicode.cpython-313.pyc
src/openbench/evals/__pycache__/simpleqa.cpython-313.pyc
src/openbench/evals/__pycache__/supergpqa.cpython-313.pyc
src/openbench/evals/matharena/__init__.py
src/openbench/evals/matharena/matharena.py
src/openbench/evals/matharena/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/__pycache__/matharena.cpython-313.pyc
src/openbench/evals/matharena/aime_2023_I/__init__.py
src/openbench/evals/matharena/aime_2023_I/aime_2023_I.py
src/openbench/evals/matharena/aime_2023_I/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/aime_2023_I/__pycache__/aime_2023_I.cpython-313.pyc
src/openbench/evals/matharena/aime_2023_II/__init__.py
src/openbench/evals/matharena/aime_2023_II/aime_2023_II.py
src/openbench/evals/matharena/aime_2023_II/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/aime_2023_II/__pycache__/aime_2023_II.cpython-313.pyc
src/openbench/evals/matharena/aime_2024/__init__.py
src/openbench/evals/matharena/aime_2024/aime_2024.py
src/openbench/evals/matharena/aime_2024/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/aime_2024/__pycache__/aime_2024.cpython-313.pyc
src/openbench/evals/matharena/aime_2024_I/__init__.py
src/openbench/evals/matharena/aime_2024_I/aime_2024_I.py
src/openbench/evals/matharena/aime_2024_I/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/aime_2024_I/__pycache__/aime_2024_I.cpython-313.pyc
src/openbench/evals/matharena/aime_2024_II/__init__.py
src/openbench/evals/matharena/aime_2024_II/aime_2024_II.py
src/openbench/evals/matharena/aime_2024_II/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/aime_2024_II/__pycache__/aime_2024_II.cpython-313.pyc
src/openbench/evals/matharena/aime_2025/__init__.py
src/openbench/evals/matharena/aime_2025/aime_2025.py
src/openbench/evals/matharena/aime_2025/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/aime_2025/__pycache__/aime_2025.cpython-313.pyc
src/openbench/evals/matharena/aime_2025_II/__init__.py
src/openbench/evals/matharena/aime_2025_II/aime_2025_II.py
src/openbench/evals/matharena/aime_2025_II/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/aime_2025_II/__pycache__/aime_2025_II.cpython-313.pyc
src/openbench/evals/matharena/brumo_2025/__init__.py
src/openbench/evals/matharena/brumo_2025/brumo_2025.py
src/openbench/evals/matharena/brumo_2025/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/brumo_2025/__pycache__/brumo_2025.cpython-313.pyc
src/openbench/evals/matharena/hmmt_feb_2023/__init__.py
src/openbench/evals/matharena/hmmt_feb_2023/hmmt_feb_2023.py
src/openbench/evals/matharena/hmmt_feb_2023/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/hmmt_feb_2023/__pycache__/hmmt_feb_2023.cpython-313.pyc
src/openbench/evals/matharena/hmmt_feb_2024/__init__.py
src/openbench/evals/matharena/hmmt_feb_2024/hmmt_feb_2024.py
src/openbench/evals/matharena/hmmt_feb_2024/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/hmmt_feb_2024/__pycache__/hmmt_feb_2024.cpython-313.pyc
src/openbench/evals/matharena/hmmt_feb_2025/__init__.py
src/openbench/evals/matharena/hmmt_feb_2025/hmmt_feb_2025.py
src/openbench/evals/matharena/hmmt_feb_2025/__pycache__/__init__.cpython-313.pyc
src/openbench/evals/matharena/hmmt_feb_2025/__pycache__/hmmt_feb_2025.cpython-313.pyc
src/openbench/metrics/__init__.py
src/openbench/metrics/grouped.py
src/openbench/metrics/__pycache__/__init__.cpython-313.pyc
src/openbench/metrics/__pycache__/grouped.cpython-313.pyc
src/openbench/model/__init__.py
src/openbench/model/__pycache__/__init__.cpython-313.pyc
src/openbench/model/_providers/__init__.py
src/openbench/model/_providers/ai21.py
src/openbench/model/_providers/baseten.py
src/openbench/model/_providers/cerebras.py
src/openbench/model/_providers/cohere.py
src/openbench/model/_providers/crusoe.py
src/openbench/model/_providers/deepinfra.py
src/openbench/model/_providers/friendli.py
src/openbench/model/_providers/huggingface.py
src/openbench/model/_providers/hyperbolic.py
src/openbench/model/_providers/lambda_ai.py
src/openbench/model/_providers/minimax.py
src/openbench/model/_providers/moonshot.py
src/openbench/model/_providers/nebius.py
src/openbench/model/_providers/nous.py
src/openbench/model/_providers/novita.py
src/openbench/model/_providers/parasail.py
src/openbench/model/_providers/reka.py
src/openbench/model/_providers/sambanova.py
src/openbench/model/_providers/__pycache__/__init__.cpython-313.pyc
src/openbench/model/_providers/__pycache__/cerebras.cpython-313.pyc
src/openbench/monkeypatch/__init__.py
src/openbench/monkeypatch/display_results_patch.py
src/openbench/monkeypatch/file_recorder_logfile_patch.py
src/openbench/monkeypatch/__pycache__/__init__.cpython-313.pyc
src/openbench/monkeypatch/__pycache__/display_results_patch.cpython-313.pyc
src/openbench/monkeypatch/__pycache__/file_recorder_logfile_patch.cpython-313.pyc
src/openbench/scorers/__init__.py
src/openbench/scorers/drop.py
src/openbench/scorers/fallback_scorer.py
src/openbench/scorers/graphwalks.py
src/openbench/scorers/healthbench.py
src/openbench/scorers/hle.py
src/openbench/scorers/humaneval.py
src/openbench/scorers/math.py
src/openbench/scorers/mgsm.py
src/openbench/scorers/mmlu.py
src/openbench/scorers/mrcr.py
src/openbench/scorers/scicode.py
src/openbench/scorers/score_boxed.py
src/openbench/scorers/score_last_number.py
src/openbench/scorers/simpleqa.py
src/openbench/scorers/__pycache__/__init__.cpython-313.pyc
src/openbench/scorers/__pycache__/drop.cpython-313.pyc
src/openbench/scorers/__pycache__/fallback_scorer.cpython-313.pyc
src/openbench/scorers/__pycache__/graphwalks.cpython-313.pyc
src/openbench/scorers/__pycache__/healthbench.cpython-313.pyc
src/openbench/scorers/__pycache__/hle.cpython-313.pyc
src/openbench/scorers/__pycache__/humaneval.cpython-313.pyc
src/openbench/scorers/__pycache__/llm_judge.cpython-313.pyc
src/openbench/scorers/__pycache__/math.cpython-313.pyc
src/openbench/scorers/__pycache__/mgsm.cpython-313.pyc
src/openbench/scorers/__pycache__/mmlu.cpython-313.pyc
src/openbench/scorers/__pycache__/mrcr.cpython-313.pyc
src/openbench/scorers/__pycache__/score_boxed.cpython-313.pyc
src/openbench/scorers/__pycache__/score_last_number.cpython-313.pyc
src/openbench/scorers/__pycache__/simpleqa.cpython-313.pyc
src/openbench/utils/__init__.py
src/openbench/utils/imports.py
src/openbench/utils/text.py
src/openbench/utils/__pycache__/__init__.cpython-313.pyc
src/openbench/utils/__pycache__/text.cpython-313.pyc
tests/test_registry.py