LICENSE
MANIFEST.in
README.md
pyproject.toml
setup.py
src/lighteval/__init__.py
src/lighteval/__main__.py
src/lighteval/check_logs.py
src/lighteval/cli_args.py
src/lighteval/data.py
src/lighteval/main_accelerate.py
src/lighteval/main_baseline.py
src/lighteval/main_custom.py
src/lighteval/main_endpoint.py
src/lighteval/main_inspect.py
src/lighteval/main_nanotron.py
src/lighteval/main_sglang.py
src/lighteval/main_tasks.py
src/lighteval/main_vllm.py
src/lighteval/pipeline.py
src/lighteval.egg-info/PKG-INFO
src/lighteval.egg-info/SOURCES.txt
src/lighteval.egg-info/dependency_links.txt
src/lighteval.egg-info/entry_points.txt
src/lighteval.egg-info/requires.txt
src/lighteval.egg-info/top_level.txt
src/lighteval/logging/evaluation_tracker.py
src/lighteval/logging/info_loggers.py
src/lighteval/metrics/__init__.py
src/lighteval/metrics/dynamic_metrics.py
src/lighteval/metrics/metrics.py
src/lighteval/metrics/metrics_corpus.py
src/lighteval/metrics/metrics_sample.py
src/lighteval/metrics/normalizations.py
src/lighteval/metrics/sample_preparator.py
src/lighteval/metrics/harness_compatibility/drop.py
src/lighteval/metrics/harness_compatibility/truthful_qa.py
src/lighteval/metrics/imports/__init__.py
src/lighteval/metrics/imports/bert_scorer.py
src/lighteval/metrics/imports/data_stats_metric.py
src/lighteval/metrics/imports/data_stats_utils.py
src/lighteval/metrics/imports/summac.py
src/lighteval/metrics/utils/extractive_match_utils.py
src/lighteval/metrics/utils/judge_utils.py
src/lighteval/metrics/utils/linguistic_tokenizers.py
src/lighteval/metrics/utils/llm_as_judge.py
src/lighteval/metrics/utils/math_comparison.py
src/lighteval/metrics/utils/metric_utils.py
src/lighteval/metrics/utils/stderr.py
src/lighteval/models/abstract_model.py
src/lighteval/models/model_input.py
src/lighteval/models/model_loader.py
src/lighteval/models/model_output.py
src/lighteval/models/utils.py
src/lighteval/models/custom/custom_model.py
src/lighteval/models/dummy/dummy_model.py
src/lighteval/models/endpoints/endpoint_model.py
src/lighteval/models/endpoints/inference_providers_model.py
src/lighteval/models/endpoints/litellm_model.py
src/lighteval/models/endpoints/tgi_model.py
src/lighteval/models/nanotron/nanotron_model.py
src/lighteval/models/sglang/sglang_model.py
src/lighteval/models/transformers/adapter_model.py
src/lighteval/models/transformers/delta_model.py
src/lighteval/models/transformers/transformers_model.py
src/lighteval/models/transformers/vlm_transformers_model.py
src/lighteval/models/vllm/vllm_model.py
src/lighteval/tasks/__init__.py
src/lighteval/tasks/default_prompts.py
src/lighteval/tasks/lighteval_task.py
src/lighteval/tasks/prompt_manager.py
src/lighteval/tasks/registry.py
src/lighteval/tasks/requests.py
src/lighteval/tasks/multilingual/__init__.py
src/lighteval/tasks/multilingual/adapters.py
src/lighteval/tasks/multilingual/tasks/acva.py
src/lighteval/tasks/multilingual/tasks/afri_mgsm.py
src/lighteval/tasks/multilingual/tasks/afri_mmlu.py
src/lighteval/tasks/multilingual/tasks/afri_xnli.py
src/lighteval/tasks/multilingual/tasks/arabic.py
src/lighteval/tasks/multilingual/tasks/arabic_arc.py
src/lighteval/tasks/multilingual/tasks/arabic_mmlu.py
src/lighteval/tasks/multilingual/tasks/arcd.py
src/lighteval/tasks/multilingual/tasks/belebele.py
src/lighteval/tasks/multilingual/tasks/c3.py
src/lighteval/tasks/multilingual/tasks/ceval.py
src/lighteval/tasks/multilingual/tasks/chegeka.py
src/lighteval/tasks/multilingual/tasks/chinese_squad.py
src/lighteval/tasks/multilingual/tasks/cmath.py
src/lighteval/tasks/multilingual/tasks/cmmlu.py
src/lighteval/tasks/multilingual/tasks/cmnli.py
src/lighteval/tasks/multilingual/tasks/cmrc2018.py
src/lighteval/tasks/multilingual/tasks/copa_indic.py
src/lighteval/tasks/multilingual/tasks/enem.py
src/lighteval/tasks/multilingual/tasks/exams.py
src/lighteval/tasks/multilingual/tasks/faquad.py
src/lighteval/tasks/multilingual/tasks/filipino.py
src/lighteval/tasks/multilingual/tasks/flores200.py
src/lighteval/tasks/multilingual/tasks/fquad_v2.py
src/lighteval/tasks/multilingual/tasks/french.py
src/lighteval/tasks/multilingual/tasks/french_boolq.py
src/lighteval/tasks/multilingual/tasks/french_triviqa.py
src/lighteval/tasks/multilingual/tasks/german_rag.py
src/lighteval/tasks/multilingual/tasks/germanquad.py
src/lighteval/tasks/multilingual/tasks/global_mmlu.py
src/lighteval/tasks/multilingual/tasks/hellaswag_hin.py
src/lighteval/tasks/multilingual/tasks/hellaswag_tel.py
src/lighteval/tasks/multilingual/tasks/hellaswag_tha.py
src/lighteval/tasks/multilingual/tasks/hellaswag_tur.py
src/lighteval/tasks/multilingual/tasks/hindi_arc.py
src/lighteval/tasks/multilingual/tasks/hindi_boolq.py
src/lighteval/tasks/multilingual/tasks/indicqa.py
src/lighteval/tasks/multilingual/tasks/kenswquad.py
src/lighteval/tasks/multilingual/tasks/m3exams.py
src/lighteval/tasks/multilingual/tasks/mathlogicqa_rus.py
src/lighteval/tasks/multilingual/tasks/meta_mmlu.py
src/lighteval/tasks/multilingual/tasks/mgsm.py
src/lighteval/tasks/multilingual/tasks/mintaka.py
src/lighteval/tasks/multilingual/tasks/mkqa.py
src/lighteval/tasks/multilingual/tasks/mlmm_arc_challenge.py
src/lighteval/tasks/multilingual/tasks/mlmm_hellaswag.py
src/lighteval/tasks/multilingual/tasks/mlmm_mmlu.py
src/lighteval/tasks/multilingual/tasks/mlmm_truthfulqa.py
src/lighteval/tasks/multilingual/tasks/mlqa.py
src/lighteval/tasks/multilingual/tasks/oab_exams.py
src/lighteval/tasks/multilingual/tasks/ocnli.py
src/lighteval/tasks/multilingual/tasks/openai_mmlu.py
src/lighteval/tasks/multilingual/tasks/openbook_ara.py
src/lighteval/tasks/multilingual/tasks/openbook_es.py
src/lighteval/tasks/multilingual/tasks/openbook_rus.py
src/lighteval/tasks/multilingual/tasks/oz.py
src/lighteval/tasks/multilingual/tasks/parus.py
src/lighteval/tasks/multilingual/tasks/paws_x.py
src/lighteval/tasks/multilingual/tasks/piqa_ar.py
src/lighteval/tasks/multilingual/tasks/rcb.py
src/lighteval/tasks/multilingual/tasks/sber_squad.py
src/lighteval/tasks/multilingual/tasks/serbian_eval.py
src/lighteval/tasks/multilingual/tasks/soqal.py
src/lighteval/tasks/multilingual/tasks/squad_es.py
src/lighteval/tasks/multilingual/tasks/squad_it.py
src/lighteval/tasks/multilingual/tasks/swahili_arc.py
src/lighteval/tasks/multilingual/tasks/thai_exams.py
src/lighteval/tasks/multilingual/tasks/thaiqa.py
src/lighteval/tasks/multilingual/tasks/tquad_v2.py
src/lighteval/tasks/multilingual/tasks/turkic.py
src/lighteval/tasks/multilingual/tasks/turkish_arc.py
src/lighteval/tasks/multilingual/tasks/turkish_mmlu.py
src/lighteval/tasks/multilingual/tasks/tydiqa.py
src/lighteval/tasks/multilingual/tasks/worldtree_rus.py
src/lighteval/tasks/multilingual/tasks/xcodah.py
src/lighteval/tasks/multilingual/tasks/xcopa.py
src/lighteval/tasks/multilingual/tasks/xcsqa.py
src/lighteval/tasks/multilingual/tasks/xnli.py
src/lighteval/tasks/multilingual/tasks/xnli2.py
src/lighteval/tasks/multilingual/tasks/xnli_indic.py
src/lighteval/tasks/multilingual/tasks/xquad.py
src/lighteval/tasks/multilingual/tasks/xstory.py
src/lighteval/tasks/multilingual/tasks/xwinograd.py
src/lighteval/tasks/multilingual/utils/__init__.py
src/lighteval/tasks/multilingual/utils/adapters_utils.py
src/lighteval/tasks/multilingual/utils/task_utils.py
src/lighteval/tasks/tasks/agieval.py
src/lighteval/tasks/tasks/aime.py
src/lighteval/tasks/tasks/aimo.py
src/lighteval/tasks/tasks/anli.py
src/lighteval/tasks/tasks/arc.py
src/lighteval/tasks/tasks/arc_agi_2.py
src/lighteval/tasks/tasks/arithmetic.py
src/lighteval/tasks/tasks/asdiv.py
src/lighteval/tasks/tasks/babi_qa.py
src/lighteval/tasks/tasks/bbq.py
src/lighteval/tasks/tasks/bigbench.py
src/lighteval/tasks/tasks/bigbench_hard.py
src/lighteval/tasks/tasks/blimp.py
src/lighteval/tasks/tasks/bold.py
src/lighteval/tasks/tasks/boolq.py
src/lighteval/tasks/tasks/civil_comments.py
src/lighteval/tasks/tasks/commonsenseqa.py
src/lighteval/tasks/tasks/coqa.py
src/lighteval/tasks/tasks/covid_dialogue.py
src/lighteval/tasks/tasks/custom_task_classification_grammar_task.py
src/lighteval/tasks/tasks/drop_qa.py
src/lighteval/tasks/tasks/dyck_language.py
src/lighteval/tasks/tasks/entity_data_imputation.py
src/lighteval/tasks/tasks/entitymatching.py
src/lighteval/tasks/tasks/ethics.py
src/lighteval/tasks/tasks/glue.py
src/lighteval/tasks/tasks/gpqa.py
src/lighteval/tasks/tasks/gsm8k.py
src/lighteval/tasks/tasks/gsm_plus.py
src/lighteval/tasks/tasks/headqa.py
src/lighteval/tasks/tasks/hellaswag.py
src/lighteval/tasks/tasks/imdb.py
src/lighteval/tasks/tasks/jeopardy.py
src/lighteval/tasks/tasks/lambada.py
src/lighteval/tasks/tasks/legal_summarization.py
src/lighteval/tasks/tasks/legalsupport.py
src/lighteval/tasks/tasks/lexglue.py
src/lighteval/tasks/tasks/lextreme.py
src/lighteval/tasks/tasks/logiqa.py
src/lighteval/tasks/tasks/lsat_qa.py
src/lighteval/tasks/tasks/math.py
src/lighteval/tasks/tasks/math_500.py
src/lighteval/tasks/tasks/mathqa.py
src/lighteval/tasks/tasks/med.py
src/lighteval/tasks/tasks/med_dialog.py
src/lighteval/tasks/tasks/mgsm.py
src/lighteval/tasks/tasks/mmlu.py
src/lighteval/tasks/tasks/mmlu_pro.py
src/lighteval/tasks/tasks/mmlu_redux.py
src/lighteval/tasks/tasks/mmmu_pro.py
src/lighteval/tasks/tasks/musr.py
src/lighteval/tasks/tasks/narrativeqa.py
src/lighteval/tasks/tasks/natural_questions.py
src/lighteval/tasks/tasks/numeracy.py
src/lighteval/tasks/tasks/openbookqa.py
src/lighteval/tasks/tasks/piqa.py
src/lighteval/tasks/tasks/prost.py
src/lighteval/tasks/tasks/pubmedqa.py
src/lighteval/tasks/tasks/qa4mre.py
src/lighteval/tasks/tasks/qasper.py
src/lighteval/tasks/tasks/quac.py
src/lighteval/tasks/tasks/race_high.py
src/lighteval/tasks/tasks/raft.py
src/lighteval/tasks/tasks/real_toxicity_prompts.py
src/lighteval/tasks/tasks/sacrebleu.py
src/lighteval/tasks/tasks/sciq.py
src/lighteval/tasks/tasks/simpleqa.py
src/lighteval/tasks/tasks/siqa.py
src/lighteval/tasks/tasks/slr_bench.py
src/lighteval/tasks/tasks/squad_v2.py
src/lighteval/tasks/tasks/storycloze.py
src/lighteval/tasks/tasks/summarization.py
src/lighteval/tasks/tasks/swag.py
src/lighteval/tasks/tasks/synthetic_reasoning.py
src/lighteval/tasks/tasks/the_pile.py
src/lighteval/tasks/tasks/toxigen.py
src/lighteval/tasks/tasks/triviaqa.py
src/lighteval/tasks/tasks/truthfulqa.py
src/lighteval/tasks/tasks/twitterAAE.py
src/lighteval/tasks/tasks/unscramble.py
src/lighteval/tasks/tasks/webqs.py
src/lighteval/tasks/tasks/wikifact.py
src/lighteval/tasks/tasks/wikitext.py
src/lighteval/tasks/tasks/winogrande.py
src/lighteval/tasks/tasks/xcopa.py
src/lighteval/tasks/tasks/xstory_cloze.py
src/lighteval/tasks/tasks/xwinograd.py
src/lighteval/tasks/tasks/hle/main.py
src/lighteval/tasks/tasks/ifbench/evaluation_lib.py
src/lighteval/tasks/tasks/ifbench/instructions.py
src/lighteval/tasks/tasks/ifbench/instructions_registry.py
src/lighteval/tasks/tasks/ifbench/main.py
src/lighteval/tasks/tasks/ifeval/instructions.py
src/lighteval/tasks/tasks/ifeval/instructions_registry.py
src/lighteval/tasks/tasks/ifeval/instructions_utils.py
src/lighteval/tasks/tasks/ifeval/main.py
src/lighteval/tasks/tasks/lcb/codegen_metrics.py
src/lighteval/tasks/tasks/lcb/main.py
src/lighteval/tasks/tasks/mix_eval/judge_prompts.py
src/lighteval/tasks/tasks/mix_eval/main.py
src/lighteval/tasks/tasks/mix_eval/prompts.py
src/lighteval/tasks/tasks/mt_bench/judge_prompt_templates.py
src/lighteval/tasks/tasks/mt_bench/main.py
src/lighteval/tasks/tasks/olympiade_bench/main.py
src/lighteval/tasks/tasks/tiny_benchmarks/main.py
src/lighteval/tasks/templates/__init__.py
src/lighteval/tasks/templates/boolq.py
src/lighteval/tasks/templates/continuation.py
src/lighteval/tasks/templates/copa.py
src/lighteval/tasks/templates/hellaswag.py
src/lighteval/tasks/templates/multichoice.py
src/lighteval/tasks/templates/nli.py
src/lighteval/tasks/templates/qa.py
src/lighteval/tasks/templates/translation.py
src/lighteval/tasks/templates/utils/__init__.py
src/lighteval/tasks/templates/utils/adapter_utils.py
src/lighteval/tasks/templates/utils/formatting_utils.py
src/lighteval/tasks/templates/utils/formulation.py
src/lighteval/tasks/templates/utils/translation_literals.py
src/lighteval/utils/__init__.py
src/lighteval/utils/cache_management.py
src/lighteval/utils/imports.py
src/lighteval/utils/language.py
src/lighteval/utils/parallelism.py
src/lighteval/utils/timeout.py
src/lighteval/utils/utils.py
tests/test_dependencies.py
tests/test_unit_base_metrics.py
tests/test_unit_harness_metrics.py
tests/test_unit_harness_prompts.py