LICENSE.md
MANIFEST.in
README.md
pyproject.toml
src/openbench/__init__.py
src/openbench/_registry.py
src/openbench/config.py
src/openbench/eval_config.py
src/openbench/py.typed
src/openbench.egg-info/PKG-INFO
src/openbench.egg-info/SOURCES.txt
src/openbench.egg-info/dependency_links.txt
src/openbench.egg-info/entry_points.txt
src/openbench.egg-info/requires.txt
src/openbench.egg-info/top_level.txt
src/openbench/_cli/__init__.py
src/openbench/_cli/describe_command.py
src/openbench/_cli/eval_command.py
src/openbench/_cli/eval_retry_command.py
src/openbench/_cli/export.py
src/openbench/_cli/list_command.py
src/openbench/_cli/utils.py
src/openbench/_cli/view_command.py
src/openbench/datasets/__init__.py
src/openbench/datasets/boolq.py
src/openbench/datasets/browsecomp.py
src/openbench/datasets/cti_bench.py
src/openbench/datasets/drop.py
src/openbench/datasets/gpqa.py
src/openbench/datasets/graphwalks.py
src/openbench/datasets/healthbench.py
src/openbench/datasets/hle.py
src/openbench/datasets/humaneval.py
src/openbench/datasets/math.py
src/openbench/datasets/mgsm.py
src/openbench/datasets/mmlu.py
src/openbench/datasets/mmlu_pro.py
src/openbench/datasets/mmmu.py
src/openbench/datasets/mrcr.py
src/openbench/datasets/rootly_gmcq.py
src/openbench/datasets/scicode.py
src/openbench/datasets/simpleqa.py
src/openbench/datasets/jsonschemabench/__init__.py
src/openbench/datasets/jsonschemabench/jsonschemabench.py
src/openbench/datasets/jsonschemabench/openai_compatible_ids.txt
src/openbench/evals/__init__.py
src/openbench/evals/boolq.py
src/openbench/evals/browsecomp.py
src/openbench/evals/cti_bench.py
src/openbench/evals/drop.py
src/openbench/evals/gpqa_diamond.py
src/openbench/evals/graphwalks.py
src/openbench/evals/healthbench.py
src/openbench/evals/hle.py
src/openbench/evals/humaneval.py
src/openbench/evals/jsonschemabench.py
src/openbench/evals/math.py
src/openbench/evals/mgsm.py
src/openbench/evals/mmlu.py
src/openbench/evals/mmlu_pro.py
src/openbench/evals/mmmu.py
src/openbench/evals/mrcr.py
src/openbench/evals/musr.py
src/openbench/evals/openbookqa.py
src/openbench/evals/rootly_gmcq.py
src/openbench/evals/scicode.py
src/openbench/evals/simpleqa.py
src/openbench/evals/supergpqa.py
src/openbench/evals/matharena/__init__.py
src/openbench/evals/matharena/matharena.py
src/openbench/evals/matharena/aime_2023_I/__init__.py
src/openbench/evals/matharena/aime_2023_I/aime_2023_I.py
src/openbench/evals/matharena/aime_2023_II/__init__.py
src/openbench/evals/matharena/aime_2023_II/aime_2023_II.py
src/openbench/evals/matharena/aime_2024/__init__.py
src/openbench/evals/matharena/aime_2024/aime_2024.py
src/openbench/evals/matharena/aime_2024_I/__init__.py
src/openbench/evals/matharena/aime_2024_I/aime_2024_I.py
src/openbench/evals/matharena/aime_2024_II/__init__.py
src/openbench/evals/matharena/aime_2024_II/aime_2024_II.py
src/openbench/evals/matharena/aime_2025/__init__.py
src/openbench/evals/matharena/aime_2025/aime_2025.py
src/openbench/evals/matharena/aime_2025_II/__init__.py
src/openbench/evals/matharena/aime_2025_II/aime_2025_II.py
src/openbench/evals/matharena/brumo_2025/__init__.py
src/openbench/evals/matharena/brumo_2025/brumo_2025.py
src/openbench/evals/matharena/hmmt_feb_2023/__init__.py
src/openbench/evals/matharena/hmmt_feb_2023/hmmt_feb_2023.py
src/openbench/evals/matharena/hmmt_feb_2024/__init__.py
src/openbench/evals/matharena/hmmt_feb_2024/hmmt_feb_2024.py
src/openbench/evals/matharena/hmmt_feb_2025/__init__.py
src/openbench/evals/matharena/hmmt_feb_2025/hmmt_feb_2025.py
src/openbench/metrics/__init__.py
src/openbench/metrics/grouped.py
src/openbench/model/__init__.py
src/openbench/model/_providers/__init__.py
src/openbench/model/_providers/ai21.py
src/openbench/model/_providers/baseten.py
src/openbench/model/_providers/cerebras.py
src/openbench/model/_providers/cohere.py
src/openbench/model/_providers/crusoe.py
src/openbench/model/_providers/deepinfra.py
src/openbench/model/_providers/friendli.py
src/openbench/model/_providers/huggingface.py
src/openbench/model/_providers/hyperbolic.py
src/openbench/model/_providers/lambda_ai.py
src/openbench/model/_providers/minimax.py
src/openbench/model/_providers/moonshot.py
src/openbench/model/_providers/nebius.py
src/openbench/model/_providers/nous.py
src/openbench/model/_providers/novita.py
src/openbench/model/_providers/parasail.py
src/openbench/model/_providers/reka.py
src/openbench/model/_providers/sambanova.py
src/openbench/model/_providers/vercel.py
src/openbench/monkeypatch/__init__.py
src/openbench/monkeypatch/display_results_patch.py
src/openbench/monkeypatch/file_recorder_logfile_patch.py
src/openbench/scorers/__init__.py
src/openbench/scorers/browsecomp.py
src/openbench/scorers/cti_bench.py
src/openbench/scorers/drop.py
src/openbench/scorers/fallback_scorer.py
src/openbench/scorers/graphwalks.py
src/openbench/scorers/healthbench.py
src/openbench/scorers/hle.py
src/openbench/scorers/humaneval.py
src/openbench/scorers/json_schema.py
src/openbench/scorers/math.py
src/openbench/scorers/mcq.py
src/openbench/scorers/mgsm.py
src/openbench/scorers/mmlu.py
src/openbench/scorers/mmlu_pro.py
src/openbench/scorers/mrcr.py
src/openbench/scorers/musr.py
src/openbench/scorers/robust_boxed.py
src/openbench/scorers/rootly_gmcq.py
src/openbench/scorers/scicode.py
src/openbench/scorers/score_boxed.py
src/openbench/scorers/score_last_number.py
src/openbench/scorers/simpleqa.py
src/openbench/utils/__init__.py
src/openbench/utils/image.py
src/openbench/utils/imports.py
src/openbench/utils/text.py
tests/test_json_schema_scorer.py
tests/test_registry.py
tests/test_robust_scorers.py