LICENSE
README.md
pyproject.toml
setup.py
lmms_eval/__init__.py
lmms_eval/__main__.py
lmms_eval/evaluator.py
lmms_eval/evaluator_utils.py
lmms_eval/imports.py
lmms_eval/launch_server.py
lmms_eval/logging_utils.py
lmms_eval/protocol.py
lmms_eval/utils.py
lmms_eval.egg-info/PKG-INFO
lmms_eval.egg-info/SOURCES.txt
lmms_eval.egg-info/dependency_links.txt
lmms_eval.egg-info/entry_points.txt
lmms_eval.egg-info/requires.txt
lmms_eval.egg-info/top_level.txt
lmms_eval/api/__init__.py
lmms_eval/api/filter.py
lmms_eval/api/group.py
lmms_eval/api/instance.py
lmms_eval/api/metrics.py
lmms_eval/api/model.py
lmms_eval/api/reasoning.py
lmms_eval/api/registry.py
lmms_eval/api/samplers.py
lmms_eval/api/task.py
lmms_eval/baselines/__init__.py
lmms_eval/baselines/loader.py
lmms_eval/baselines/registry.py
lmms_eval/caching/__init__.py
lmms_eval/caching/cache.py
lmms_eval/caching/fs_detect.py
lmms_eval/caching/response_cache.py
lmms_eval/cli/__init__.py
lmms_eval/cli/dispatch.py
lmms_eval/cli/mcp_cmd.py
lmms_eval/cli/models_cmd.py
lmms_eval/cli/power_cmd.py
lmms_eval/cli/power_utils.py
lmms_eval/cli/serve_cmd.py
lmms_eval/cli/tasks_cmd.py
lmms_eval/cli/ui_cmd.py
lmms_eval/cli/version_cmd.py
lmms_eval/cli/wizard.py
lmms_eval/entrypoints/__init__.py
lmms_eval/entrypoints/client.py
lmms_eval/entrypoints/http_server.py
lmms_eval/entrypoints/job_scheduler.py
lmms_eval/entrypoints/protocol.py
lmms_eval/entrypoints/server_args.py
lmms_eval/filters/__init__.py
lmms_eval/filters/decontamination.py
lmms_eval/filters/extraction.py
lmms_eval/filters/selection.py
lmms_eval/filters/transformation.py
lmms_eval/llm_judge/__init__.py
lmms_eval/llm_judge/base.py
lmms_eval/llm_judge/factory.py
lmms_eval/llm_judge/prompt.py
lmms_eval/llm_judge/protocol.py
lmms_eval/llm_judge/utils.py
lmms_eval/llm_judge/launcher/__init__.py
lmms_eval/llm_judge/launcher/base.py
lmms_eval/llm_judge/launcher/sglang.py
lmms_eval/llm_judge/providers/__init__.py
lmms_eval/llm_judge/providers/async_azure_openai.py
lmms_eval/llm_judge/providers/async_openai.py
lmms_eval/llm_judge/providers/azure_openai.py
lmms_eval/llm_judge/providers/bedrock.py
lmms_eval/llm_judge/providers/dummy.py
lmms_eval/llm_judge/providers/local.py
lmms_eval/llm_judge/providers/openai.py
lmms_eval/loggers/__init__.py
lmms_eval/loggers/evaluation_tracker.py
lmms_eval/loggers/utils.py
lmms_eval/loggers/wandb_logger.py
lmms_eval/mcp/__init__.py
lmms_eval/mcp/client.py
lmms_eval/mcp/schemas.py
lmms_eval/mcp/server.py
lmms_eval/mcp/tools.py
lmms_eval/models/__init__.py
lmms_eval/models/registry_v2.py
lmms_eval/models/whisper_tt.py
lmms_eval/models/chat/_chat_base.py
lmms_eval/models/chat/async_hf_model.py
lmms_eval/models/chat/async_openai.py
lmms_eval/models/chat/bagel_lmms_engine.py
lmms_eval/models/chat/baichuan_omni.py
lmms_eval/models/chat/fastvideo.py
lmms_eval/models/chat/huggingface.py
lmms_eval/models/chat/internvl_hf.py
lmms_eval/models/chat/litellm.py
lmms_eval/models/chat/llava_hf.py
lmms_eval/models/chat/llava_onevision1_5.py
lmms_eval/models/chat/llava_onevision2.py
lmms_eval/models/chat/longvila.py
lmms_eval/models/chat/minicpm_o.py
lmms_eval/models/chat/nanovlm.py
lmms_eval/models/chat/omnivinci.py
lmms_eval/models/chat/openai.py
lmms_eval/models/chat/phi4_multimodal.py
lmms_eval/models/chat/qwen2_5_omni.py
lmms_eval/models/chat/qwen2_5_vl.py
lmms_eval/models/chat/qwen3_5.py
lmms_eval/models/chat/qwen3_omni.py
lmms_eval/models/chat/qwen3_vl.py
lmms_eval/models/chat/sglang.py
lmms_eval/models/chat/thyme.py
lmms_eval/models/chat/vllm.py
lmms_eval/models/chat/vllm_generate.py
lmms_eval/models/model_utils/__init__.py
lmms_eval/models/model_utils/audio_processing.py
lmms_eval/models/model_utils/concurrency_control.py
lmms_eval/models/model_utils/efficiency_metrics.py
lmms_eval/models/model_utils/gen_metrics.py
lmms_eval/models/model_utils/load_video.py
lmms_eval/models/model_utils/media_encoder.py
lmms_eval/models/model_utils/progress.py
lmms_eval/models/model_utils/reasoning_model_utils.py
lmms_eval/models/model_utils/usage_metrics.py
lmms_eval/models/model_utils/cambrians/qwen2_monkey_patch.py
lmms_eval/models/model_utils/qwen/qwen_generate_utils.py
lmms_eval/models/model_utils/thyme/sandbox.py
lmms_eval/models/model_utils/thyme/utils.py
lmms_eval/models/simple/aero.py
lmms_eval/models/simple/aria.py
lmms_eval/models/simple/audio_flamingo_3.py
lmms_eval/models/simple/auroracap.py
lmms_eval/models/simple/bagel.py
lmms_eval/models/simple/bagel_unig2u.py
lmms_eval/models/simple/baichuan_omni.py
lmms_eval/models/simple/batch_gpt4.py
lmms_eval/models/simple/cambrian.py
lmms_eval/models/simple/cambrians.py
lmms_eval/models/simple/cambrians_vsc.py
lmms_eval/models/simple/cambrians_vsc_streaming.py
lmms_eval/models/simple/cambrians_vsr.py
lmms_eval/models/simple/claude.py
lmms_eval/models/simple/cogvlm2.py
lmms_eval/models/simple/dummy.py
lmms_eval/models/simple/egogpt.py
lmms_eval/models/simple/from_log.py
lmms_eval/models/simple/fuyu.py
lmms_eval/models/simple/gemini_api.py
lmms_eval/models/simple/gemma3.py
lmms_eval/models/simple/glm4v.py
lmms_eval/models/simple/gpt4o_audio.py
lmms_eval/models/simple/gpt4v.py
lmms_eval/models/simple/idefics2.py
lmms_eval/models/simple/illume_plus.py
lmms_eval/models/simple/instructblip.py
lmms_eval/models/simple/internvideo2.py
lmms_eval/models/simple/internvideo2_5.py
lmms_eval/models/simple/internvl.py
lmms_eval/models/simple/internvl2.py
lmms_eval/models/simple/internvl3.py
lmms_eval/models/simple/internvl3_5.py
lmms_eval/models/simple/kimi_audio.py
lmms_eval/models/simple/litellm.py
lmms_eval/models/simple/llama4_scout.py
lmms_eval/models/simple/llama_vid.py
lmms_eval/models/simple/llama_vision.py
lmms_eval/models/simple/llava.py
lmms_eval/models/simple/llava_hf.py
lmms_eval/models/simple/llava_onevision.py
lmms_eval/models/simple/llava_onevision1_5.py
lmms_eval/models/simple/llava_onevision_moviechat.py
lmms_eval/models/simple/llava_sglang.py
lmms_eval/models/simple/llava_vid.py
lmms_eval/models/simple/longva.py
lmms_eval/models/simple/mantis.py
lmms_eval/models/simple/minicpm_o.py
lmms_eval/models/simple/minicpm_v.py
lmms_eval/models/simple/minimonkey.py
lmms_eval/models/simple/mmada.py
lmms_eval/models/simple/moviechat.py
lmms_eval/models/simple/mplug_owl_video.py
lmms_eval/models/simple/ola.py
lmms_eval/models/simple/omnivinci.py
lmms_eval/models/simple/openai.py
lmms_eval/models/simple/oryx.py
lmms_eval/models/simple/ovis_u1.py
lmms_eval/models/simple/penguinvl.py
lmms_eval/models/simple/phi3v.py
lmms_eval/models/simple/phi4_multimodal.py
lmms_eval/models/simple/plm.py
lmms_eval/models/simple/qwen2_5_omni.py
lmms_eval/models/simple/qwen2_5_vl.py
lmms_eval/models/simple/qwen2_5_vl_interleave.py
lmms_eval/models/simple/qwen2_audio.py
lmms_eval/models/simple/qwen2_vl.py
lmms_eval/models/simple/qwen3_5.py
lmms_eval/models/simple/qwen3_omni.py
lmms_eval/models/simple/qwen3_vl.py
lmms_eval/models/simple/qwen_image_edit.py
lmms_eval/models/simple/qwen_vl.py
lmms_eval/models/simple/qwen_vl_api.py
lmms_eval/models/simple/reka.py
lmms_eval/models/simple/ross.py
lmms_eval/models/simple/sam3.py
lmms_eval/models/simple/slime.py
lmms_eval/models/simple/srt_api.py
lmms_eval/models/simple/tinyllava.py
lmms_eval/models/simple/uni_moe_2_omni.py
lmms_eval/models/simple/video_chatgpt.py
lmms_eval/models/simple/video_llava.py
lmms_eval/models/simple/video_salmonn_2.py
lmms_eval/models/simple/videochat2.py
lmms_eval/models/simple/videochat_flash.py
lmms_eval/models/simple/videollama3.py
lmms_eval/models/simple/vila.py
lmms_eval/models/simple/vita.py
lmms_eval/models/simple/vllm.py
lmms_eval/models/simple/vora.py
lmms_eval/models/simple/whisper.py
lmms_eval/models/simple/whisper_vllm.py
lmms_eval/models/simple/xcomposer2_4KHD.py
lmms_eval/models/simple/xcomposer2d5.py
lmms_eval/tasks/__init__.py
lmms_eval/tasks/asr_wer_utils.py
lmms_eval/tasks/3dsrbench/3dsrbench.yaml
lmms_eval/tasks/3dsrbench/3dsrbench_circular.yaml
lmms_eval/tasks/3dsrbench/utils.py
lmms_eval/tasks/FALCONBench/FALCONBench_mcq.yaml
lmms_eval/tasks/FALCONBench/FALCONBench_mcq_temploc.yaml
lmms_eval/tasks/FALCONBench/FALCONBench_oq.yaml
lmms_eval/tasks/FALCONBench/FALCONBench_oq_temploc.yaml
lmms_eval/tasks/FALCONBench/utils.py
lmms_eval/tasks/VisualPuzzles/VisualPuzzles_cot.yaml
lmms_eval/tasks/VisualPuzzles/VisualPuzzles_direct.yaml
lmms_eval/tasks/VisualPuzzles/utils.py
lmms_eval/tasks/WISE/WISE.yaml
lmms_eval/tasks/WISE/utils.py
lmms_eval/tasks/_task_utils/default_template_yaml.py
lmms_eval/tasks/_task_utils/file_utils.py
lmms_eval/tasks/_task_utils/gpt_eval_utils.py
lmms_eval/tasks/_task_utils/lance_video_resolver.py
lmms_eval/tasks/_task_utils/math_verify_utils.py
lmms_eval/tasks/_task_utils/mcq_extract.py
lmms_eval/tasks/_task_utils/media_resolver.py
lmms_eval/tasks/_task_utils/mmmu_mcq_utils.py
lmms_eval/tasks/_task_utils/reasoning_utils.py
lmms_eval/tasks/_task_utils/video_loader.py
lmms_eval/tasks/_task_utils/vqa_eval_metric.py
lmms_eval/tasks/activitynetqa/activitynetqa_generation.yaml
lmms_eval/tasks/activitynetqa/utils.py
lmms_eval/tasks/ai2d/ai2d.yaml
lmms_eval/tasks/ai2d/ai2d_lite.yaml
lmms_eval/tasks/ai2d/ai2d_no_mask.yaml
lmms_eval/tasks/ai2d/upload_ai2d.py
lmms_eval/tasks/ai2d/utils.py
lmms_eval/tasks/ai2d/reasoning/ai2d_reasoning.yaml
lmms_eval/tasks/ai2d/reasoning/utils.py
lmms_eval/tasks/aime/aime24_figures.yaml
lmms_eval/tasks/aime/aime24_figures_agg64.yaml
lmms_eval/tasks/aime/aime24_nofigures.yaml
lmms_eval/tasks/aime/aime24_nofigures_agg64.yaml
lmms_eval/tasks/aime/aime24_nofigures_agg8.yaml
lmms_eval/tasks/aime/aime25_nofigures.yaml
lmms_eval/tasks/aime/aime25_nofigures_agg64.yaml
lmms_eval/tasks/aime/aime25_nofigures_agg8.yaml
lmms_eval/tasks/aime/aime_2024_agg8.yaml
lmms_eval/tasks/aime/aime_2024_rebase.yaml
lmms_eval/tasks/aime/aime_figures.yaml
lmms_eval/tasks/aime/aime_nofigures.yaml
lmms_eval/tasks/aime/utils.py
lmms_eval/tasks/aime/reasoning/aime24_nofigures_agg8.yaml
lmms_eval/tasks/aime/reasoning/aime25_nofigures_agg8.yaml
lmms_eval/tasks/aime/reasoning/utils.py
lmms_eval/tasks/air_bench/air_bench_chat.yaml
lmms_eval/tasks/air_bench/air_bench_chat_mixed.yaml
lmms_eval/tasks/air_bench/air_bench_chat_music.yaml
lmms_eval/tasks/air_bench/air_bench_chat_sound.yaml
lmms_eval/tasks/air_bench/air_bench_chat_speech.yaml
lmms_eval/tasks/air_bench/air_bench_foundation.yaml
lmms_eval/tasks/air_bench/air_bench_foundation_music.yaml
lmms_eval/tasks/air_bench/air_bench_foundation_sound.yaml
lmms_eval/tasks/air_bench/air_bench_foundation_speech.yaml
lmms_eval/tasks/air_bench/utils.py
lmms_eval/tasks/alpaca_audio/alpaca_audio.yaml
lmms_eval/tasks/alpaca_audio/utils.py
lmms_eval/tasks/amber_g/amber_g.yaml
lmms_eval/tasks/amber_g/utils.py
lmms_eval/tasks/ami/ami.yaml
lmms_eval/tasks/ami/ami_test.yaml
lmms_eval/tasks/ami/ami_train.yaml
lmms_eval/tasks/ami/ami_validation.yaml
lmms_eval/tasks/ami/utils.py
lmms_eval/tasks/arc/arc_challenge.yaml
lmms_eval/tasks/arc/arc_easy.yaml
lmms_eval/tasks/arc_agi_1/arc_agi_1.yaml
lmms_eval/tasks/arc_agi_1/utils.py
lmms_eval/tasks/arc_agi_2/arc_agi_2.yaml
lmms_eval/tasks/arc_agi_2/utils.py
lmms_eval/tasks/auxsolidmath/__init__.py
lmms_eval/tasks/auxsolidmath/auxsolidmath.yaml
lmms_eval/tasks/auxsolidmath/auxsolidmath_easy.yaml
lmms_eval/tasks/auxsolidmath/auxsolidmath_hard.yaml
lmms_eval/tasks/auxsolidmath/utils.py
lmms_eval/tasks/av_asr/av_asr.yaml
lmms_eval/tasks/av_asr/utils.py
lmms_eval/tasks/av_odyssey/av_odyssey.yaml
lmms_eval/tasks/av_odyssey/utils.py
lmms_eval/tasks/av_speakerbench/av_speakerbench_audio.yaml
lmms_eval/tasks/av_speakerbench/av_speakerbench_audiovisual.yaml
lmms_eval/tasks/av_speakerbench/av_speakerbench_visual.yaml
lmms_eval/tasks/av_speakerbench/utils.py
lmms_eval/tasks/babyvision/__init__.py
lmms_eval/tasks/babyvision/babyvision.yaml
lmms_eval/tasks/babyvision/prompt.py
lmms_eval/tasks/babyvision/utils.py
lmms_eval/tasks/babyvision_gen/__init__.py
lmms_eval/tasks/babyvision_gen/babyvision_gen.yaml
lmms_eval/tasks/babyvision_gen/prompt.py
lmms_eval/tasks/babyvision_gen/utils.py
lmms_eval/tasks/benchmark_aliases/anet_qa.yaml
lmms_eval/tasks/benchmark_aliases/egosch_a.yaml
lmms_eval/tasks/benchmark_aliases/mmmu_a.yaml
lmms_eval/tasks/blink/blink.yaml
lmms_eval/tasks/blink/blink_art_style.yaml
lmms_eval/tasks/blink/blink_counting.yaml
lmms_eval/tasks/blink/blink_forensic_detection.yaml
lmms_eval/tasks/blink/blink_functional_correspondence.yaml
lmms_eval/tasks/blink/blink_iq_test.yaml
lmms_eval/tasks/blink/blink_jigsaw.yaml
lmms_eval/tasks/blink/blink_multi_view_reasoning.yaml
lmms_eval/tasks/blink/blink_object_localization.yaml
lmms_eval/tasks/blink/blink_relative_depth.yaml
lmms_eval/tasks/blink/blink_relative_reflectance.yaml
lmms_eval/tasks/blink/blink_semantic_correspondence.yaml
lmms_eval/tasks/blink/blink_spatial_relation.yaml
lmms_eval/tasks/blink/blink_visual_correspondence.yaml
lmms_eval/tasks/blink/blink_visual_similarity.yaml
lmms_eval/tasks/blink/utils.py
lmms_eval/tasks/browsecomp/browsecomp.yaml
lmms_eval/tasks/browsecomp/utils.py
lmms_eval/tasks/camerabench_vqa/camerabench_vqa.yaml
lmms_eval/tasks/camerabench_vqa/utils.py
lmms_eval/tasks/capability/capability.yaml
lmms_eval/tasks/capability/capability_OCR.yaml
lmms_eval/tasks/capability/capability_action.yaml
lmms_eval/tasks/capability/capability_camera_angle.yaml
lmms_eval/tasks/capability/capability_camera_movement.yaml
lmms_eval/tasks/capability/capability_character_identification.yaml
lmms_eval/tasks/capability/capability_dynamic_object_number.yaml
lmms_eval/tasks/capability/capability_event.yaml
lmms_eval/tasks/capability/capability_object_category.yaml
lmms_eval/tasks/capability/capability_object_color.yaml
lmms_eval/tasks/capability/capability_object_number.yaml
lmms_eval/tasks/capability/capability_scene.yaml
lmms_eval/tasks/capability/capability_spatial_relation.yaml
lmms_eval/tasks/capability/capability_style.yaml
lmms_eval/tasks/capability/prompt.py
lmms_eval/tasks/capability/utils.py
lmms_eval/tasks/captionqa/captionqa.yaml
lmms_eval/tasks/captionqa/captionqa_document.yaml
lmms_eval/tasks/captionqa/captionqa_ecommerce.yaml
lmms_eval/tasks/captionqa/captionqa_embodiedai.yaml
lmms_eval/tasks/captionqa/captionqa_natural.yaml
lmms_eval/tasks/captionqa/utils.py
lmms_eval/tasks/charades_sta/charades.yaml
lmms_eval/tasks/charades_sta/eval_tvg.py
lmms_eval/tasks/charades_sta/utils.py
lmms_eval/tasks/chartqa/chartqa.yaml
lmms_eval/tasks/chartqa/chartqa_lite.yaml
lmms_eval/tasks/chartqa/upload_chartqa.py
lmms_eval/tasks/chartqa/utils.py
lmms_eval/tasks/chartqa/reasoning/chartqa_reasoning.yaml
lmms_eval/tasks/chartqa/reasoning/utils.py
lmms_eval/tasks/chartqapro/chartqapro.yaml
lmms_eval/tasks/chartqapro/utils.py
lmms_eval/tasks/charxiv/charxiv.yaml
lmms_eval/tasks/charxiv/charxiv_val_descriptive.yaml
lmms_eval/tasks/charxiv/charxiv_val_reasoning.yaml
lmms_eval/tasks/charxiv/constant.py
lmms_eval/tasks/charxiv/descriptive_utils.py
lmms_eval/tasks/charxiv/reasoning_utils.py
lmms_eval/tasks/charxiv/utils.py
lmms_eval/tasks/charxiv/reasoning/charxiv.yaml
lmms_eval/tasks/charxiv/reasoning/charxiv_val_descriptive.yaml
lmms_eval/tasks/charxiv/reasoning/charxiv_val_reasoning.yaml
lmms_eval/tasks/charxiv/reasoning/utils.py
lmms_eval/tasks/cinepile/cinepile.yaml
lmms_eval/tasks/cinepile/utils.py
lmms_eval/tasks/clotho_aqa/clotho_aqa.yaml
lmms_eval/tasks/clotho_aqa/clotho_aqa_test.yaml
lmms_eval/tasks/clotho_aqa/clotho_aqa_val.yaml
lmms_eval/tasks/clotho_aqa/clotho_asqa_test_v2.yaml
lmms_eval/tasks/clotho_aqa/utils.py
lmms_eval/tasks/cmmmu/_cmmmu.yaml
lmms_eval/tasks/cmmmu/cmmmu_test.yaml
lmms_eval/tasks/cmmmu/cmmmu_val.yaml
lmms_eval/tasks/cmmmu/utils.py
lmms_eval/tasks/cn_college_listen_mcq/__init__.py
lmms_eval/tasks/cn_college_listen_mcq/cn_college_listen_mcq_test.yaml
lmms_eval/tasks/cn_college_listen_mcq/utils.py
lmms_eval/tasks/coco_cap/coco2014_cap.yaml
lmms_eval/tasks/coco_cap/coco2014_cap_test.yaml
lmms_eval/tasks/coco_cap/coco2014_cap_val.yaml
lmms_eval/tasks/coco_cap/coco2017_cap.yaml
lmms_eval/tasks/coco_cap/coco2017_cap_test.yaml
lmms_eval/tasks/coco_cap/coco2017_cap_val.yaml
lmms_eval/tasks/coco_cap/coco2017_cap_val_lite.yaml
lmms_eval/tasks/coco_cap/coco_cap.yaml
lmms_eval/tasks/coco_cap/coco_karpathy.yaml
lmms_eval/tasks/coco_cap/coco_karpathy_test.yaml
lmms_eval/tasks/coco_cap/coco_karpathy_val.yaml
lmms_eval/tasks/coco_cap/utils.py
lmms_eval/tasks/coco_cap_chair/coco_cap_chair.yaml
lmms_eval/tasks/coco_cap_chair/utils.py
lmms_eval/tasks/common_voice_15/common_voice_15.yaml
lmms_eval/tasks/common_voice_15/common_voice_15_en.yaml
lmms_eval/tasks/common_voice_15/common_voice_15_fr.yaml
lmms_eval/tasks/common_voice_15/common_voice_15_zh-CN.yaml
lmms_eval/tasks/common_voice_15/utils.py
lmms_eval/tasks/conbench/conbench.yaml
lmms_eval/tasks/conbench/utils.py
lmms_eval/tasks/corecognition/__init__.py
lmms_eval/tasks/corecognition/corecognition.yaml
lmms_eval/tasks/corecognition/corecognition_stage_concrete_operational.yaml
lmms_eval/tasks/corecognition/corecognition_stage_formal_operational.yaml
lmms_eval/tasks/corecognition/corecognition_stage_sensorimotor.yaml
lmms_eval/tasks/corecognition/utils.py
lmms_eval/tasks/countbench/countbench.yaml
lmms_eval/tasks/countbench/utils.py
lmms_eval/tasks/countbenchqa/countbenchqa.yaml
lmms_eval/tasks/countbenchqa/utils.py
lmms_eval/tasks/countbenchqa/reasoning/countbenchqa_reasoning.yaml
lmms_eval/tasks/countbenchqa/reasoning/utils.py
lmms_eval/tasks/countix/countix.yaml
lmms_eval/tasks/countix/utils.py
lmms_eval/tasks/cover/cover.yaml
lmms_eval/tasks/cover/generate_qa.py
lmms_eval/tasks/cover/utils.py
lmms_eval/tasks/covost2/covost2.yaml
lmms_eval/tasks/covost2/covost2_en_zh.yaml
lmms_eval/tasks/covost2/covost2_en_zh_dev.yaml
lmms_eval/tasks/covost2/covost2_en_zh_test.yaml
lmms_eval/tasks/covost2/covost2_zh_en.yaml
lmms_eval/tasks/covost2/covost2_zh_en_dev.yaml
lmms_eval/tasks/covost2/covost2_zh_en_test.yaml
lmms_eval/tasks/covost2/utils.py
lmms_eval/tasks/crosspoint_bench/crosspoint_bench.yaml
lmms_eval/tasks/crosspoint_bench/utils.py
lmms_eval/tasks/crpe_relation/crpe_relation.yaml
lmms_eval/tasks/crpe_relation/utils.py
lmms_eval/tasks/csbench/csbench.yaml
lmms_eval/tasks/csbench/csbench_assertion.yaml
lmms_eval/tasks/csbench/csbench_mcq.yaml
lmms_eval/tasks/csbench/utils.py
lmms_eval/tasks/cuva/cuva.yaml
lmms_eval/tasks/cuva/cuva_test.yaml
lmms_eval/tasks/cuva/utils.py
lmms_eval/tasks/cv_bench/cv_bench.yaml
lmms_eval/tasks/cv_bench/cv_bench_2d.yaml
lmms_eval/tasks/cv_bench/cv_bench_3d.yaml
lmms_eval/tasks/cv_bench/utils.py
lmms_eval/tasks/cv_bench/reasoning/cv_bench_2d_reasoning.yaml
lmms_eval/tasks/cv_bench/reasoning/cv_bench_3d_reasoning.yaml
lmms_eval/tasks/cv_bench/reasoning/cv_bench_reasoning.yaml
lmms_eval/tasks/cv_bench/reasoning/utils.py
lmms_eval/tasks/cvrr/_cvrr.yaml
lmms_eval/tasks/cvrr/cvrr_fine_grained_action_understanding.yaml
lmms_eval/tasks/cvrr/cvrr_interpretation_of_social_context.yaml
lmms_eval/tasks/cvrr/cvrr_interpretation_of_visual_context.yaml
lmms_eval/tasks/cvrr/cvrr_multiple_actions_in_a_single_video.yaml
lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_existent_scene_depictions.yaml
lmms_eval/tasks/cvrr/cvrr_non_existent_actions_with_non_existent_scene_depictions.yaml
lmms_eval/tasks/cvrr/cvrr_object_instance_count.yaml
lmms_eval/tasks/cvrr/cvrr_partial_actions.yaml
lmms_eval/tasks/cvrr/cvrr_time_order_understanding.yaml
lmms_eval/tasks/cvrr/cvrr_understanding_emotional_context.yaml
lmms_eval/tasks/cvrr/cvrr_unusual_and_physically_anomalous_activities.yaml
lmms_eval/tasks/cvrr/utils.py
lmms_eval/tasks/detailcaps/detailcaps.yaml
lmms_eval/tasks/detailcaps/utils.py
lmms_eval/tasks/docvqa/docvqa.yaml
lmms_eval/tasks/docvqa/docvqa_test.yaml
lmms_eval/tasks/docvqa/docvqa_val.yaml
lmms_eval/tasks/docvqa/docvqa_val_lite.yaml
lmms_eval/tasks/docvqa/utils.py
lmms_eval/tasks/docvqa/reasoning/docvqa_reasoning.yaml
lmms_eval/tasks/docvqa/reasoning/docvqa_test_reasoning.yaml
lmms_eval/tasks/docvqa/reasoning/docvqa_val_reasoning.yaml
lmms_eval/tasks/docvqa/reasoning/utils.py
lmms_eval/tasks/dream_tts_mcq/__init__.py
lmms_eval/tasks/dream_tts_mcq/dream_tts_mcq_test.yaml
lmms_eval/tasks/dream_tts_mcq/utils.py
lmms_eval/tasks/dtcbench/dtcbench.yaml
lmms_eval/tasks/dtcbench/utils.py
lmms_eval/tasks/dude/dude.yaml
lmms_eval/tasks/dude/utils.py
lmms_eval/tasks/dynamath/reasoning/_generate_config.py
lmms_eval/tasks/dynamath/reasoning/dynamath.yaml
lmms_eval/tasks/dynamath/reasoning/utils.py
lmms_eval/tasks/egoplan/egoplan.yaml
lmms_eval/tasks/egoplan/utils.py
lmms_eval/tasks/egoplan2/egoplan2.yaml
lmms_eval/tasks/egoplan2/utils.py
lmms_eval/tasks/egoschema/egoschema.yaml
lmms_eval/tasks/egoschema/egoschema_mcppl.yaml
lmms_eval/tasks/egoschema/egoschema_subset.yaml
lmms_eval/tasks/egoschema/egoschema_subset_mcppl.yaml
lmms_eval/tasks/egoschema/utils.py
lmms_eval/tasks/egotaskqa/egotaskqa.yaml
lmms_eval/tasks/egotaskqa/utils.py
lmms_eval/tasks/egotempo/egotempo.yaml
lmms_eval/tasks/egotempo/utils.py
lmms_eval/tasks/egothink/egothink.yaml
lmms_eval/tasks/egothink/egothink_activity.yaml
lmms_eval/tasks/egothink/egothink_affordance.yaml
lmms_eval/tasks/egothink/egothink_assistance.yaml
lmms_eval/tasks/egothink/egothink_attribute.yaml
lmms_eval/tasks/egothink/egothink_comparing.yaml
lmms_eval/tasks/egothink/egothink_counting.yaml
lmms_eval/tasks/egothink/egothink_existence.yaml
lmms_eval/tasks/egothink/egothink_forecasting.yaml
lmms_eval/tasks/egothink/egothink_location.yaml
lmms_eval/tasks/egothink/egothink_navigation.yaml
lmms_eval/tasks/egothink/egothink_situated.yaml
lmms_eval/tasks/egothink/egothink_spatial.yaml
lmms_eval/tasks/egothink/utils.py
lmms_eval/tasks/embspatial/embspatial.yaml
lmms_eval/tasks/embspatial/utils.py
lmms_eval/tasks/emma/emma_all.yaml
lmms_eval/tasks/emma/emma_mini_all.yaml
lmms_eval/tasks/emma/utils.py
lmms_eval/tasks/erqa/erqa.yaml
lmms_eval/tasks/erqa/utils.py
lmms_eval/tasks/europal_asr/europal_asr.yaml
lmms_eval/tasks/europal_asr/europal_asr_test.yaml
lmms_eval/tasks/europal_asr/europal_asr_validation.yaml
lmms_eval/tasks/europal_asr/utils.py
lmms_eval/tasks/extremewhenbench/extremewhenbench.yaml
lmms_eval/tasks/extremewhenbench/utils.py
lmms_eval/tasks/ferret/ferret.yaml
lmms_eval/tasks/ferret/rule.json
lmms_eval/tasks/ferret/utils.py
lmms_eval/tasks/fleurs/fleurs.yaml
lmms_eval/tasks/fleurs/fleurs_cmn_hans_cn.yaml
lmms_eval/tasks/fleurs/fleurs_en.yaml
lmms_eval/tasks/fleurs/fleurs_yue_hant_hk.yaml
lmms_eval/tasks/fleurs/utils.py
lmms_eval/tasks/flickr30k/flickr30k.yaml
lmms_eval/tasks/flickr30k/flickr30k_test.yaml
lmms_eval/tasks/flickr30k/flickr30k_test_lite.yaml
lmms_eval/tasks/flickr30k/utils.py
lmms_eval/tasks/fsc147/fsc147.yaml
lmms_eval/tasks/fsc147/utils.py
lmms_eval/tasks/funqa/funqa.yaml
lmms_eval/tasks/funqa/funqa_test.yaml
lmms_eval/tasks/funqa/utils.py
lmms_eval/tasks/gedit_bench/__init__.py
lmms_eval/tasks/gedit_bench/gedit_bench.yaml
lmms_eval/tasks/gedit_bench/utils.py
lmms_eval/tasks/gedit_bench/viescore/__init__.py
lmms_eval/tasks/gedit_bench/viescore/openai_backend.py
lmms_eval/tasks/gedit_bench/viescore/parse_prompt.py
lmms_eval/tasks/gedit_bench/viescore/utils.py
lmms_eval/tasks/gedit_bench/viescore/vie_prompts.py
lmms_eval/tasks/geometry3k/__init__.py
lmms_eval/tasks/geometry3k/geometry3k.yaml
lmms_eval/tasks/geometry3k/utils.py
lmms_eval/tasks/gigaspeech/gigaspeech.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_dev.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_l_dev.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_l_test.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_m_dev.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_m_test.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_s_dev.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_s_test.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_test.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_xl_dev.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_xl_test.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_xs_dev.yaml
lmms_eval/tasks/gigaspeech/gigaspeech_xs_test.yaml
lmms_eval/tasks/gigaspeech/utils.py
lmms_eval/tasks/gigaspeech/whisper_normalizer/basic.py
lmms_eval/tasks/gigaspeech/whisper_normalizer/english.json
lmms_eval/tasks/gigaspeech/whisper_normalizer/english.py
lmms_eval/tasks/gpqa/cot_n_shot/_generate_configs.py
lmms_eval/tasks/gpqa/cot_n_shot/gpqa_diamond_cot_n_shot.yaml
lmms_eval/tasks/gpqa/cot_n_shot/gpqa_extended_cot_n_shot.yaml
lmms_eval/tasks/gpqa/cot_n_shot/gpqa_main_cot_n_shot.yaml
lmms_eval/tasks/gpqa/cot_n_shot/utils.py
lmms_eval/tasks/gpqa/cot_zeroshot/_generate_configs.py
lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_diamond_cot_zeroshot.yaml
lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_extended_cot_zeroshot.yaml
lmms_eval/tasks/gpqa/cot_zeroshot/gpqa_main_cot_zeroshot.yaml
lmms_eval/tasks/gpqa/cot_zeroshot/utils.py
lmms_eval/tasks/gpqa/generative/_generate_configs.py
lmms_eval/tasks/gpqa/generative/gpqa_diamond_generative_n_shot.yaml
lmms_eval/tasks/gpqa/generative/gpqa_extended_generative_n_shot.yaml
lmms_eval/tasks/gpqa/generative/gpqa_main_generative_n_shot.yaml
lmms_eval/tasks/gpqa/generative/utils.py
lmms_eval/tasks/gpqa/n_shot/_generate_configs.py
lmms_eval/tasks/gpqa/n_shot/gpqa_diamond_n_shot.yaml
lmms_eval/tasks/gpqa/n_shot/gpqa_extended_n_shot.yaml
lmms_eval/tasks/gpqa/n_shot/gpqa_main_n_shot.yaml
lmms_eval/tasks/gpqa/n_shot/utils.py
lmms_eval/tasks/gpqa/openai/gpqa_diamond_openai.yaml
lmms_eval/tasks/gpqa/openai/gpqa_diamond_openai_agg64.yaml
lmms_eval/tasks/gpqa/openai/gpqa_diamond_openai_maj64_cov64.yaml
lmms_eval/tasks/gpqa/openai/utils.py
lmms_eval/tasks/gpqa/reasoning/gpqa_thinking.yaml
lmms_eval/tasks/gpqa/reasoning/utils.py
lmms_eval/tasks/gpqa/zeroshot/_generate_configs.py
lmms_eval/tasks/gpqa/zeroshot/gpqa_diamond_zeroshot.yaml
lmms_eval/tasks/gpqa/zeroshot/gpqa_extended_zeroshot.yaml
lmms_eval/tasks/gpqa/zeroshot/gpqa_main_zeroshot.yaml
lmms_eval/tasks/gpqa/zeroshot/utils.py
lmms_eval/tasks/gqa/gqa.yaml
lmms_eval/tasks/gqa/gqa_lite.yaml
lmms_eval/tasks/gqa/utils.py
lmms_eval/tasks/gqa_ru/gqa_ru.yaml
lmms_eval/tasks/gqa_ru/utils.py
lmms_eval/tasks/groundingme/groundingme.yaml
lmms_eval/tasks/groundingme/utils.py
lmms_eval/tasks/gsm8k/gsm8k-cot-llama.yaml
lmms_eval/tasks/gsm8k/gsm8k-cot-self-consistency.yaml
lmms_eval/tasks/gsm8k/gsm8k-cot-zeroshot.yaml
lmms_eval/tasks/gsm8k/gsm8k-cot.yaml
lmms_eval/tasks/gsm8k/gsm8k.yaml
lmms_eval/tasks/hallusion_bench/evaluate_hb.py
lmms_eval/tasks/hallusion_bench/hallusion_bench_image.yaml
lmms_eval/tasks/hallusion_bench/utils.py
lmms_eval/tasks/hd_epic/__init__.py
lmms_eval/tasks/hd_epic/_group_hd_epic.yaml
lmms_eval/tasks/hd_epic/_hd_epic_base.yaml
lmms_eval/tasks/hd_epic/generate_task_yamls.py
lmms_eval/tasks/hd_epic/hd_epic_to_hf.py
lmms_eval/tasks/hd_epic/utils.py
lmms_eval/tasks/hd_epic/3d_perception/_group_hd_epic_3d_perception.yaml
lmms_eval/tasks/hd_epic/3d_perception/hd_epic_3d_perception_fixture_interaction_counting.yaml
lmms_eval/tasks/hd_epic/3d_perception/hd_epic_3d_perception_fixture_location.yaml
lmms_eval/tasks/hd_epic/3d_perception/hd_epic_3d_perception_object_contents_retrieval.yaml
lmms_eval/tasks/hd_epic/3d_perception/hd_epic_3d_perception_object_location.yaml
lmms_eval/tasks/hd_epic/3d_perception/utils.py
lmms_eval/tasks/hd_epic/fine_grained/_group_hd_epic_fine_grained.yaml
lmms_eval/tasks/hd_epic/fine_grained/hd_epic_fine_grained_action_localization.yaml
lmms_eval/tasks/hd_epic/fine_grained/hd_epic_fine_grained_action_recognition.yaml
lmms_eval/tasks/hd_epic/fine_grained/hd_epic_fine_grained_how_recognition.yaml
lmms_eval/tasks/hd_epic/fine_grained/hd_epic_fine_grained_why_recognition.yaml
lmms_eval/tasks/hd_epic/fine_grained/utils.py
lmms_eval/tasks/hd_epic/gaze/_group_hd_epic_gaze.yaml
lmms_eval/tasks/hd_epic/gaze/hd_epic_gaze_gaze_estimation.yaml
lmms_eval/tasks/hd_epic/gaze/hd_epic_gaze_interaction_anticipation.yaml
lmms_eval/tasks/hd_epic/gaze/utils.py
lmms_eval/tasks/hd_epic/ingredient/_group_hd_epic_ingredient.yaml
lmms_eval/tasks/hd_epic/ingredient/hd_epic_ingredient_exact_ingredient_recognition.yaml
lmms_eval/tasks/hd_epic/ingredient/hd_epic_ingredient_ingredient_adding_localization.yaml
lmms_eval/tasks/hd_epic/ingredient/hd_epic_ingredient_ingredient_recognition.yaml
lmms_eval/tasks/hd_epic/ingredient/hd_epic_ingredient_ingredient_retrieval.yaml
lmms_eval/tasks/hd_epic/ingredient/hd_epic_ingredient_ingredient_weight.yaml
lmms_eval/tasks/hd_epic/ingredient/hd_epic_ingredient_ingredients_order.yaml
lmms_eval/tasks/hd_epic/ingredient/utils.py
lmms_eval/tasks/hd_epic/nutrition/_group_hd_epic_nutrition.yaml
lmms_eval/tasks/hd_epic/nutrition/hd_epic_nutrition_image_nutrition_estimation.yaml
lmms_eval/tasks/hd_epic/nutrition/hd_epic_nutrition_nutrition_change.yaml
lmms_eval/tasks/hd_epic/nutrition/hd_epic_nutrition_video_nutrition_estimation.yaml
lmms_eval/tasks/hd_epic/nutrition/utils.py
lmms_eval/tasks/hd_epic/object_motion/_group_hd_epic_object_motion.yaml
lmms_eval/tasks/hd_epic/object_motion/hd_epic_object_motion_object_movement_counting.yaml
lmms_eval/tasks/hd_epic/object_motion/hd_epic_object_motion_object_movement_itinerary.yaml
lmms_eval/tasks/hd_epic/object_motion/hd_epic_object_motion_stationary_object_localization.yaml
lmms_eval/tasks/hd_epic/object_motion/utils.py
lmms_eval/tasks/hd_epic/recipe/_group_hd_epic_recipe.yaml
lmms_eval/tasks/hd_epic/recipe/hd_epic_recipe_following_activity_recognition.yaml
lmms_eval/tasks/hd_epic/recipe/hd_epic_recipe_multi_recipe_recognition.yaml
lmms_eval/tasks/hd_epic/recipe/hd_epic_recipe_multi_step_localization.yaml
lmms_eval/tasks/hd_epic/recipe/hd_epic_recipe_prep_localization.yaml
lmms_eval/tasks/hd_epic/recipe/hd_epic_recipe_recipe_recognition.yaml
lmms_eval/tasks/hd_epic/recipe/hd_epic_recipe_rough_step_localization.yaml
lmms_eval/tasks/hd_epic/recipe/hd_epic_recipe_step_localization.yaml
lmms_eval/tasks/hd_epic/recipe/hd_epic_recipe_step_recognition.yaml
lmms_eval/tasks/hd_epic/recipe/utils.py
lmms_eval/tasks/hellaswag/hellaswag.yaml
lmms_eval/tasks/hellaswag/utils.py
lmms_eval/tasks/hipho/hipho.yaml
lmms_eval/tasks/hipho/hipho_apho_2025.yaml
lmms_eval/tasks/hipho/hipho_eupho_2024.yaml
lmms_eval/tasks/hipho/hipho_eupho_2025.yaml
lmms_eval/tasks/hipho/hipho_f_ma_2024.yaml
lmms_eval/tasks/hipho/hipho_f_ma_2025.yaml
lmms_eval/tasks/hipho/hipho_ipho_2024.yaml
lmms_eval/tasks/hipho/hipho_ipho_2025.yaml
lmms_eval/tasks/hipho/hipho_nbpho_2024.yaml
lmms_eval/tasks/hipho/hipho_nbpho_2025.yaml
lmms_eval/tasks/hipho/hipho_panmechanics_2024.yaml
lmms_eval/tasks/hipho/hipho_panmechanics_2025.yaml
lmms_eval/tasks/hipho/hipho_panpho_2024.yaml
lmms_eval/tasks/hipho/hipho_panpho_2025.yaml
lmms_eval/tasks/hipho/utils.py
lmms_eval/tasks/holisafe/holisafe.yaml
lmms_eval/tasks/holisafe/holisafe_llm_judge.yaml
lmms_eval/tasks/holisafe/utils.py
lmms_eval/tasks/hrbench/hrbench.yaml
lmms_eval/tasks/hrbench/hrbench4k.yaml
lmms_eval/tasks/hrbench/hrbench8k.yaml
lmms_eval/tasks/hrbench/hrbench_evals.py
lmms_eval/tasks/hrbench/utils.py
lmms_eval/tasks/iconqa/iconqa.yaml
lmms_eval/tasks/iconqa/iconqa_test.yaml
lmms_eval/tasks/iconqa/iconqa_val.yaml
lmms_eval/tasks/iconqa/utils.py
lmms_eval/tasks/ifeval/ifeval.yaml
lmms_eval/tasks/ifeval/instructions.py
lmms_eval/tasks/ifeval/instructions_registry.py
lmms_eval/tasks/ifeval/instructions_util.py
lmms_eval/tasks/ifeval/utils.py
lmms_eval/tasks/ii_bench/ii_bench.yaml
lmms_eval/tasks/ii_bench/utils.py
lmms_eval/tasks/illusionbench/__init__.py
lmms_eval/tasks/illusionbench/convert_dataset.py
lmms_eval/tasks/illusionbench/illusionbench.yaml
lmms_eval/tasks/illusionbench/illusionbench_all.yaml
lmms_eval/tasks/illusionbench/utils.py
lmms_eval/tasks/illusionvqa/illusionvqa.yaml
lmms_eval/tasks/illusionvqa/illusionvqa_comprehension.yaml
lmms_eval/tasks/illusionvqa/illusionvqa_soft_localization.yaml
lmms_eval/tasks/illusionvqa/utils.py
lmms_eval/tasks/imgedit/__init__.py
lmms_eval/tasks/imgedit/imgedit.yaml
lmms_eval/tasks/imgedit/prompt.py
lmms_eval/tasks/imgedit/utils.py
lmms_eval/tasks/infovqa/infovqa.yaml
lmms_eval/tasks/infovqa/infovqa_test.yaml
lmms_eval/tasks/infovqa/infovqa_val.yaml
lmms_eval/tasks/infovqa/infovqa_val_lite.yaml
lmms_eval/tasks/infovqa/utils.py
lmms_eval/tasks/infovqa/reasoning/infovqa_reasoning.yaml
lmms_eval/tasks/infovqa/reasoning/infovqa_test_reasoning.yaml
lmms_eval/tasks/infovqa/reasoning/infovqa_val_reasoning.yaml
lmms_eval/tasks/infovqa/reasoning/utils.py
lmms_eval/tasks/internal_eval/d170_cn.yaml
lmms_eval/tasks/internal_eval/d170_cn_utils.py
lmms_eval/tasks/internal_eval/d170_en.yaml
lmms_eval/tasks/internal_eval/d170_en_utils.py
lmms_eval/tasks/internal_eval/dc100_en.yaml
lmms_eval/tasks/internal_eval/dc100_en_utils.py
lmms_eval/tasks/internal_eval/dc200_cn.yaml
lmms_eval/tasks/internal_eval/dc200_cn_utils.py
lmms_eval/tasks/internal_eval/internal_eval.yaml
lmms_eval/tasks/internal_eval/utils.py
lmms_eval/tasks/jmmmu/jmmmu.yaml
lmms_eval/tasks/jmmmu/jmmmu_accounting.yaml
lmms_eval/tasks/jmmmu/jmmmu_agriculture.yaml
lmms_eval/tasks/jmmmu/jmmmu_architecture_and_engineering.yaml
lmms_eval/tasks/jmmmu/jmmmu_basic_medical_science.yaml
lmms_eval/tasks/jmmmu/jmmmu_biology.yaml
lmms_eval/tasks/jmmmu/jmmmu_chemistry.yaml
lmms_eval/tasks/jmmmu/jmmmu_clinical_medicine.yaml
lmms_eval/tasks/jmmmu/jmmmu_computer_science.yaml
lmms_eval/tasks/jmmmu/jmmmu_design.yaml
lmms_eval/tasks/jmmmu/jmmmu_diagnostics_and_laboratory_medicine.yaml
lmms_eval/tasks/jmmmu/jmmmu_economics.yaml
lmms_eval/tasks/jmmmu/jmmmu_electronics.yaml
lmms_eval/tasks/jmmmu/jmmmu_energy_and_power.yaml
lmms_eval/tasks/jmmmu/jmmmu_finance.yaml
lmms_eval/tasks/jmmmu/jmmmu_japanese_art.yaml
lmms_eval/tasks/jmmmu/jmmmu_japanese_heritage.yaml
lmms_eval/tasks/jmmmu/jmmmu_japanese_history.yaml
lmms_eval/tasks/jmmmu/jmmmu_manage.yaml
lmms_eval/tasks/jmmmu/jmmmu_marketing.yaml
lmms_eval/tasks/jmmmu/jmmmu_materials.yaml
lmms_eval/tasks/jmmmu/jmmmu_math.yaml
lmms_eval/tasks/jmmmu/jmmmu_mechanical_engineering.yaml
lmms_eval/tasks/jmmmu/jmmmu_music.yaml
lmms_eval/tasks/jmmmu/jmmmu_pharmacy.yaml
lmms_eval/tasks/jmmmu/jmmmu_physics.yaml
lmms_eval/tasks/jmmmu/jmmmu_psychology.yaml
lmms_eval/tasks/jmmmu/jmmmu_public_health.yaml
lmms_eval/tasks/jmmmu/jmmmu_world_history.yaml
lmms_eval/tasks/jmmmu/utils.py
lmms_eval/tasks/jmmmu_pro/jmmmu_pro.yaml
lmms_eval/tasks/jmmmu_pro/jmmmu_pro_cot.yaml
lmms_eval/tasks/jmmmu_pro/jmmmu_pro_standard.yaml
lmms_eval/tasks/jmmmu_pro/jmmmu_pro_standard_cot.yaml
lmms_eval/tasks/jmmmu_pro/jmmmu_pro_vision.yaml
lmms_eval/tasks/jmmmu_pro/jmmmu_pro_vision_cot.yaml
lmms_eval/tasks/jmmmu_pro/utils.py
lmms_eval/tasks/jumpscore/jumpscore.yaml
lmms_eval/tasks/jumpscore/utils.py
lmms_eval/tasks/k12/k12.yaml
lmms_eval/tasks/k12/utils.py
lmms_eval/tasks/kris_bench/__init__.py
lmms_eval/tasks/kris_bench/kris_bench.yaml
lmms_eval/tasks/kris_bench/prompt.py
lmms_eval/tasks/kris_bench/utils.py
lmms_eval/tasks/lemonade/lemonade.yaml
lmms_eval/tasks/lemonade/utils.py
lmms_eval/tasks/librispeech/cn_tn.py
lmms_eval/tasks/librispeech/librispeech.yaml
lmms_eval/tasks/librispeech/librispeech_dev_clean.yaml
lmms_eval/tasks/librispeech/librispeech_dev_other.yaml
lmms_eval/tasks/librispeech/librispeech_long.yaml
lmms_eval/tasks/librispeech/librispeech_test_clean.yaml
lmms_eval/tasks/librispeech/librispeech_test_clean_long.yaml
lmms_eval/tasks/librispeech/librispeech_test_other.yaml
lmms_eval/tasks/librispeech/librispeech_test_other_long.yaml
lmms_eval/tasks/librispeech/utils.py
lmms_eval/tasks/librispeech/whisper_normalizer/basic.py
lmms_eval/tasks/librispeech/whisper_normalizer/english.json
lmms_eval/tasks/librispeech/whisper_normalizer/english.py
lmms_eval/tasks/live_bench/live_bench.yaml
lmms_eval/tasks/live_bench/live_bench_2406.yaml
lmms_eval/tasks/live_bench/live_bench_2407.yaml
lmms_eval/tasks/live_bench/live_bench_2409.yaml
lmms_eval/tasks/live_bench/utils.py
lmms_eval/tasks/live_bench/utils_v2.py
lmms_eval/tasks/livexiv_tqa/livexiv_tqa.yaml
lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v1.yaml
lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v2.yaml
lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v3.yaml
lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v4.yaml
lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v5.yaml
lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v6.yaml
lmms_eval/tasks/livexiv_tqa/utils.py
lmms_eval/tasks/livexiv_vqa/livexiv_vqa.yaml
lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v1.yaml
lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v2.yaml
lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v3.yaml
lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v4.yaml
lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v5.yaml
lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v6.yaml
lmms_eval/tasks/livexiv_vqa/utils.py
lmms_eval/tasks/llava-bench-coco/llava-bench-coco.yaml
lmms_eval/tasks/llava-bench-coco/rule.json
lmms_eval/tasks/llava-bench-coco/utils.py
lmms_eval/tasks/llava-in-the-wild/llava-in-the-wild.yaml
lmms_eval/tasks/llava-in-the-wild/llava-in-the-wild_ko.yaml
lmms_eval/tasks/llava-in-the-wild/rule.json
lmms_eval/tasks/llava-in-the-wild/rule_ko.json
lmms_eval/tasks/llava-in-the-wild/utils.py
lmms_eval/tasks/llava-in-the-wild/utils_ko.py
lmms_eval/tasks/llava_interleave_bench/in_domain.yaml
lmms_eval/tasks/llava_interleave_bench/interleave_bench.yaml
lmms_eval/tasks/llava_interleave_bench/multi_view_in_domain.yaml
lmms_eval/tasks/llava_interleave_bench/out_of_domain.yaml
lmms_eval/tasks/llava_interleave_bench/utils.py
lmms_eval/tasks/llava_wilder/llava_wilder_small.yaml
lmms_eval/tasks/llava_wilder/utils.py
lmms_eval/tasks/logicvista/reasoning/logicvista_thinking.yaml
lmms_eval/tasks/logicvista/reasoning/utils.py
lmms_eval/tasks/longtimescope/longtimescope.yaml
lmms_eval/tasks/longtimescope/utils.py
lmms_eval/tasks/longvideobench/longvideobench_test_i.yaml
lmms_eval/tasks/longvideobench/longvideobench_test_v.yaml
lmms_eval/tasks/longvideobench/longvideobench_val_i.yaml
lmms_eval/tasks/longvideobench/longvideobench_val_v.yaml
lmms_eval/tasks/longvideobench/utils.py
lmms_eval/tasks/longvideobench/no_visual/longvideobench_no_visual.yaml
lmms_eval/tasks/longvideobench/no_visual/utils.py
lmms_eval/tasks/longvideobench/random_choice/longvideobench_random_choice.yaml
lmms_eval/tasks/longvideobench/random_choice/utils.py
lmms_eval/tasks/longvt/longvt_non_think.yaml
lmms_eval/tasks/longvt/longvt_reasoning.yaml
lmms_eval/tasks/longvt/longvt_tool.yaml
lmms_eval/tasks/longvt/utils.py
lmms_eval/tasks/longvt/no_visual/longvt_no_visual.yaml
lmms_eval/tasks/longvt/no_visual/utils.py
lmms_eval/tasks/lsdbench/lsdbench.yaml
lmms_eval/tasks/lsdbench/utils.py
lmms_eval/tasks/lvbench/lvbench.yaml
lmms_eval/tasks/lvbench/utils.py
lmms_eval/tasks/lvbench/no_visual/lvbench_no_visual.yaml
lmms_eval/tasks/lvbench/no_visual/utils.py
lmms_eval/tasks/lvbench/random_choice/lvbench_random_choice.yaml
lmms_eval/tasks/lvbench/random_choice/utils.py
lmms_eval/tasks/mantis/mantis.yaml
lmms_eval/tasks/mantis/utils.py
lmms_eval/tasks/mathcanvas/mathcanvas.yaml
lmms_eval/tasks/mathcanvas/mathcanvas_algebra.yaml
lmms_eval/tasks/mathcanvas/mathcanvas_analytic_geometry.yaml
lmms_eval/tasks/mathcanvas/mathcanvas_calculus_and_vector.yaml
lmms_eval/tasks/mathcanvas/mathcanvas_plane_geometry.yaml
lmms_eval/tasks/mathcanvas/mathcanvas_solid_geometry.yaml
lmms_eval/tasks/mathcanvas/mathcanvas_statistics.yaml
lmms_eval/tasks/mathcanvas/mathcanvas_transformational_geometry.yaml
lmms_eval/tasks/mathcanvas/mathcanvas_trigonometry.yaml
lmms_eval/tasks/mathcanvas/utils.py
lmms_eval/tasks/mathkangaroo/mathkangaroo.yaml
lmms_eval/tasks/mathkangaroo/utils.py
lmms_eval/tasks/mathverse/mathverse.yaml
lmms_eval/tasks/mathverse/mathverse_evals.py
lmms_eval/tasks/mathverse/mathverse_testmini.yaml
lmms_eval/tasks/mathverse/mathverse_testmini_text.yaml
lmms_eval/tasks/mathverse/mathverse_testmini_text_dominant.yaml
lmms_eval/tasks/mathverse/mathverse_testmini_text_lite.yaml
lmms_eval/tasks/mathverse/mathverse_testmini_text_only.yaml
lmms_eval/tasks/mathverse/mathverse_testmini_vision.yaml
lmms_eval/tasks/mathverse/mathverse_testmini_vision_dominant.yaml
lmms_eval/tasks/mathverse/mathverse_testmini_vision_intensive.yaml
lmms_eval/tasks/mathverse/mathverse_testmini_vision_only.yaml
lmms_eval/tasks/mathverse/utils.py
lmms_eval/tasks/mathverse/reasoning/mathverse_testmini.yaml
lmms_eval/tasks/mathverse/reasoning/utils.py
lmms_eval/tasks/mathvision/eval_utils.py
lmms_eval/tasks/mathvision/mathvision_reason_test.yaml
lmms_eval/tasks/mathvision/mathvision_reason_testmini.yaml
lmms_eval/tasks/mathvision/mathvision_test.yaml
lmms_eval/tasks/mathvision/mathvision_testmini.yaml
lmms_eval/tasks/mathvision/utils.py
lmms_eval/tasks/mathvision/reasoning/mathvision_reason_test.yaml
lmms_eval/tasks/mathvision/reasoning/mathvision_reason_testmini.yaml
lmms_eval/tasks/mathvision/reasoning/utils.py
lmms_eval/tasks/mathvista/mathvista.yaml
lmms_eval/tasks/mathvista/mathvista_evals.py
lmms_eval/tasks/mathvista/mathvista_test.yaml
lmms_eval/tasks/mathvista/mathvista_testmini.yaml
lmms_eval/tasks/mathvista/mathvista_testmini_cot.yaml
lmms_eval/tasks/mathvista/mathvista_testmini_format.yaml
lmms_eval/tasks/mathvista/mathvista_testmini_solution.yaml
lmms_eval/tasks/mathvista/utils.py
lmms_eval/tasks/mathvista/reasoning/mathvista_testmini_cot.yaml
lmms_eval/tasks/mathvista/reasoning/utils.py
lmms_eval/tasks/medqa/medqa.yaml
lmms_eval/tasks/medqa/utils.py
lmms_eval/tasks/megabench/evaluator.py
lmms_eval/tasks/megabench/image_video_utils.py
lmms_eval/tasks/megabench/megabench.yaml
lmms_eval/tasks/megabench/megabench_core.yaml
lmms_eval/tasks/megabench/megabench_core_si.yaml
lmms_eval/tasks/megabench/megabench_open.yaml
lmms_eval/tasks/megabench/megabench_open_si.yaml
lmms_eval/tasks/megabench/requirements.txt
lmms_eval/tasks/megabench/utils.py
lmms_eval/tasks/megabench/breakdown/all_task_meta.json
lmms_eval/tasks/megabench/breakdown/analysis_utils.py
lmms_eval/tasks/megabench/breakdown/derive_breakdown_results.py
lmms_eval/tasks/megabench/metrics/__init__.py
lmms_eval/tasks/megabench/metrics/aggregation_type.py
lmms_eval/tasks/megabench/metrics/metric_type.py
lmms_eval/tasks/megabench/metrics/response_parse_type.py
lmms_eval/tasks/megabench/metrics/aggregation/mean_agg.py
lmms_eval/tasks/megabench/metrics/aggregation/min_agg.py
lmms_eval/tasks/megabench/metrics/aggregation/unsupported_agg.py
lmms_eval/tasks/megabench/metrics/parsing/answer_str_parse.py
lmms_eval/tasks/megabench/metrics/parsing/dummy_parse.py
lmms_eval/tasks/megabench/metrics/parsing/json_parse.py
lmms_eval/tasks/megabench/metrics/parsing/common/parsers.py
lmms_eval/tasks/megabench/metrics/parsing/common/utils.py
lmms_eval/tasks/megabench/metrics/scoring/ascii_art_vlm_judge.py
lmms_eval/tasks/megabench/metrics/scoring/chess_jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/constrained_generation.py
lmms_eval/tasks/megabench/metrics/scoring/coordinate_sequence_match.py
lmms_eval/tasks/megabench/metrics/scoring/dict_equality.py
lmms_eval/tasks/megabench/metrics/scoring/dict_exact_match_agg_recall.py
lmms_eval/tasks/megabench/metrics/scoring/dict_jaccard_agg_jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/dict_nbbox_iou_tuple_agg_jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/dict_set_equality_agg_jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/exact_str_match.py
lmms_eval/tasks/megabench/metrics/scoring/exact_str_match_case_insensitive.py
lmms_eval/tasks/megabench/metrics/scoring/general_numerical_match.py
lmms_eval/tasks/megabench/metrics/scoring/geo_proximity.py
lmms_eval/tasks/megabench/metrics/scoring/gleu.py
lmms_eval/tasks/megabench/metrics/scoring/jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/latex_expr_equality.py
lmms_eval/tasks/megabench/metrics/scoring/longest_common_list_prefix_ratio.py
lmms_eval/tasks/megabench/metrics/scoring/mse.py
lmms_eval/tasks/megabench/metrics/scoring/multi_ref_phrase.py
lmms_eval/tasks/megabench/metrics/scoring/nbbox_iou.py
lmms_eval/tasks/megabench/metrics/scoring/near_str_match.py
lmms_eval/tasks/megabench/metrics/scoring/nli_entailment.py
lmms_eval/tasks/megabench/metrics/scoring/normalized_similarity_damerau_levenshtein.py
lmms_eval/tasks/megabench/metrics/scoring/number_rel_diff_ratio.py
lmms_eval/tasks/megabench/metrics/scoring/positive_int_match.py
lmms_eval/tasks/megabench/metrics/scoring/program_judge.py
lmms_eval/tasks/megabench/metrics/scoring/sacrebleu_bleu.py
lmms_eval/tasks/megabench/metrics/scoring/sequence_equality.py
lmms_eval/tasks/megabench/metrics/scoring/set_equality.py
lmms_eval/tasks/megabench/metrics/scoring/set_precision.py
lmms_eval/tasks/megabench/metrics/scoring/simple_str_match.py
lmms_eval/tasks/megabench/metrics/scoring/symbolic_planning.py
lmms_eval/tasks/megabench/metrics/scoring/unsupported_scoring.py
lmms_eval/tasks/megabench/metrics/scoring/vlm_as_judge.py
lmms_eval/tasks/megabench/metrics/scoring/xml_nbbox_iou.py
lmms_eval/tasks/megabench/metrics/scoring/xml_norm_point_distance.py
lmms_eval/tasks/megabench/metrics/scoring/xml_norm_point_in_bbox.py
lmms_eval/tasks/megabench/metrics/scoring/common/conversions.py
lmms_eval/tasks/megabench/metrics/scoring/common/metrics.py
lmms_eval/tasks/megabench/metrics/scoring/common/transformations.py
lmms_eval/tasks/metavqa/metavqa.yaml
lmms_eval/tasks/metavqa/utils.py
lmms_eval/tasks/mia_bench/mia_bench.yaml
lmms_eval/tasks/mia_bench/utils.py
lmms_eval/tasks/mindcube/mindcube_full.yaml
lmms_eval/tasks/mindcube/mindcube_full_among.yaml
lmms_eval/tasks/mindcube/mindcube_full_around.yaml
lmms_eval/tasks/mindcube/mindcube_full_rotation.yaml
lmms_eval/tasks/mindcube/mindcube_tiny.yaml
lmms_eval/tasks/mindcube/mindcube_tiny_among.yaml
lmms_eval/tasks/mindcube/mindcube_tiny_around.yaml
lmms_eval/tasks/mindcube/mindcube_tiny_rotation.yaml
lmms_eval/tasks/mindcube/utils.py
lmms_eval/tasks/minerva/minerva.yaml
lmms_eval/tasks/minerva/utils.py
lmms_eval/tasks/mirb/mirb.yaml
lmms_eval/tasks/mirb/utils.py
lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2_text_freeform.yaml
lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2_text_freeform_hard.yaml
lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2text.yaml
lmms_eval/tasks/mix_evals/audio2text/mix_evals_audio2text_hard.yaml
lmms_eval/tasks/mix_evals/audio2text/utils.py
lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text.yaml
lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform.yaml
lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_freeform_hard.yaml
lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_hard.yaml
lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc.yaml
lmms_eval/tasks/mix_evals/image2text/mix_evals_image2text_mc_hard.yaml
lmms_eval/tasks/mix_evals/image2text/utils.py
lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text.yaml
lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform.yaml
lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_freeform_hard.yaml
lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_hard.yaml
lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc.yaml
lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_mc_hard.yaml
lmms_eval/tasks/mix_evals/video2text/mix_evals_video2text_openended.yaml
lmms_eval/tasks/mix_evals/video2text/utils.py
lmms_eval/tasks/mle_bench/mle_bench.yaml
lmms_eval/tasks/mle_bench/mle_bench_large.yaml
lmms_eval/tasks/mle_bench/mle_bench_medium.yaml
lmms_eval/tasks/mle_bench/mle_bench_small.yaml
lmms_eval/tasks/mle_bench/utils.py
lmms_eval/tasks/mlvu/mlvu_dev.yaml
lmms_eval/tasks/mlvu/mlvu_test.yaml
lmms_eval/tasks/mlvu/utils.py
lmms_eval/tasks/mmar/mmar.yaml
lmms_eval/tasks/mmar/utils.py
lmms_eval/tasks/mmau/mmau.yaml
lmms_eval/tasks/mmau/mmau_test.yaml
lmms_eval/tasks/mmau/mmau_test_mini.yaml
lmms_eval/tasks/mmau/utils.py
lmms_eval/tasks/mmbench/cc_utils.py
lmms_eval/tasks/mmbench/cn_utils.py
lmms_eval/tasks/mmbench/en_utils.py
lmms_eval/tasks/mmbench/ko_utils.py
lmms_eval/tasks/mmbench/mmbench.yaml
lmms_eval/tasks/mmbench/mmbench_cc.yaml
lmms_eval/tasks/mmbench/mmbench_cn.yaml
lmms_eval/tasks/mmbench/mmbench_cn_dev.yaml
lmms_eval/tasks/mmbench/mmbench_cn_dev_lite.yaml
lmms_eval/tasks/mmbench/mmbench_cn_test.yaml
lmms_eval/tasks/mmbench/mmbench_en.yaml
lmms_eval/tasks/mmbench/mmbench_en_dev.yaml
lmms_eval/tasks/mmbench/mmbench_en_dev_lite.yaml
lmms_eval/tasks/mmbench/mmbench_en_dev_static.yaml
lmms_eval/tasks/mmbench/mmbench_en_test.yaml
lmms_eval/tasks/mmbench/mmbench_evals.py
lmms_eval/tasks/mmbench/mmbench_ko_dev.yaml
lmms_eval/tasks/mmbench/mmbench_ru_dev.yaml
lmms_eval/tasks/mmbench/ru_utils.py
lmms_eval/tasks/mmbench/en_reasoning/mmbench_en_dev_reasoning.yaml
lmms_eval/tasks/mmbench/en_reasoning/mmbench_en_reasoning.yaml
lmms_eval/tasks/mmbench/en_reasoning/utils.py
lmms_eval/tasks/mmbench/reasoning/mmbench_cn_dev_reasoning.yaml
lmms_eval/tasks/mmbench/reasoning/mmbench_cn_test_reasoning.yaml
lmms_eval/tasks/mmbench/reasoning/mmbench_en_dev_reasoning.yaml
lmms_eval/tasks/mmbench/reasoning/mmbench_en_test_reasoning.yaml
lmms_eval/tasks/mmbench/reasoning/mmbench_reasoning.yaml
lmms_eval/tasks/mmbench/reasoning/utils.py
lmms_eval/tasks/mme/mme.yaml
lmms_eval/tasks/mme/utils.py
lmms_eval/tasks/mme_cc/mme_cc.yaml
lmms_eval/tasks/mme_cc/utils.py
lmms_eval/tasks/mme_cot/mme_cot_direct.yaml
lmms_eval/tasks/mme_cot/mme_cot_reason.yaml
lmms_eval/tasks/mme_cot/utils.py
lmms_eval/tasks/mme_realworld/mme_realworld.yaml
lmms_eval/tasks/mme_realworld/mme_realworld_cn.yaml
lmms_eval/tasks/mme_realworld/mme_realworld_lite.yaml
lmms_eval/tasks/mme_realworld/utils.py
lmms_eval/tasks/mme_realworld/reasoning/mme_realworld_cn_reasoning.yaml
lmms_eval/tasks/mme_realworld/reasoning/mme_realworld_reasoning.yaml
lmms_eval/tasks/mme_realworld/reasoning/utils.py
lmms_eval/tasks/mme_sci/mme_sci.yaml
lmms_eval/tasks/mme_sci/utils.py
lmms_eval/tasks/mme_sci_image/mme_sci_image.yaml
lmms_eval/tasks/mme_sci_image/utils.py
lmms_eval/tasks/mmie/mmie.yaml
lmms_eval/tasks/mmie/utils.py
lmms_eval/tasks/mmlongbench/mmlongbench.yaml
lmms_eval/tasks/mmlongbench/utils.py
lmms_eval/tasks/mmlongbench_doc/mmlongbench_doc.yaml
lmms_eval/tasks/mmlongbench_doc/utils.py
lmms_eval/tasks/mmlu/_generate_configs.py
lmms_eval/tasks/mmlu/continuation/_mmlu.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_government_and_politics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_international_law.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_management.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_marketing.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_sociology.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_virology.yaml
lmms_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml
lmms_eval/tasks/mmlu/default/_mmlu.yaml
lmms_eval/tasks/mmlu/default/_mmlu_humanities.yaml
lmms_eval/tasks/mmlu/default/_mmlu_other.yaml
lmms_eval/tasks/mmlu/default/_mmlu_social_sciences.yaml
lmms_eval/tasks/mmlu/default/_mmlu_stem.yaml
lmms_eval/tasks/mmlu/default/mmlu_abstract_algebra.yaml
lmms_eval/tasks/mmlu/default/mmlu_anatomy.yaml
lmms_eval/tasks/mmlu/default/mmlu_astronomy.yaml
lmms_eval/tasks/mmlu/default/mmlu_business_ethics.yaml
lmms_eval/tasks/mmlu/default/mmlu_clinical_knowledge.yaml
lmms_eval/tasks/mmlu/default/mmlu_college_biology.yaml
lmms_eval/tasks/mmlu/default/mmlu_college_chemistry.yaml
lmms_eval/tasks/mmlu/default/mmlu_college_computer_science.yaml
lmms_eval/tasks/mmlu/default/mmlu_college_mathematics.yaml
lmms_eval/tasks/mmlu/default/mmlu_college_medicine.yaml
lmms_eval/tasks/mmlu/default/mmlu_college_physics.yaml
lmms_eval/tasks/mmlu/default/mmlu_computer_security.yaml
lmms_eval/tasks/mmlu/default/mmlu_conceptual_physics.yaml
lmms_eval/tasks/mmlu/default/mmlu_econometrics.yaml
lmms_eval/tasks/mmlu/default/mmlu_electrical_engineering.yaml
lmms_eval/tasks/mmlu/default/mmlu_elementary_mathematics.yaml
lmms_eval/tasks/mmlu/default/mmlu_formal_logic.yaml
lmms_eval/tasks/mmlu/default/mmlu_global_facts.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_biology.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_chemistry.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_computer_science.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_european_history.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_geography.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_government_and_politics.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_macroeconomics.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_mathematics.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_microeconomics.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_physics.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_psychology.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_statistics.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_us_history.yaml
lmms_eval/tasks/mmlu/default/mmlu_high_school_world_history.yaml
lmms_eval/tasks/mmlu/default/mmlu_human_aging.yaml
lmms_eval/tasks/mmlu/default/mmlu_human_sexuality.yaml
lmms_eval/tasks/mmlu/default/mmlu_international_law.yaml
lmms_eval/tasks/mmlu/default/mmlu_jurisprudence.yaml
lmms_eval/tasks/mmlu/default/mmlu_logical_fallacies.yaml
lmms_eval/tasks/mmlu/default/mmlu_machine_learning.yaml
lmms_eval/tasks/mmlu/default/mmlu_management.yaml
lmms_eval/tasks/mmlu/default/mmlu_marketing.yaml
lmms_eval/tasks/mmlu/default/mmlu_medical_genetics.yaml
lmms_eval/tasks/mmlu/default/mmlu_miscellaneous.yaml
lmms_eval/tasks/mmlu/default/mmlu_moral_disputes.yaml
lmms_eval/tasks/mmlu/default/mmlu_moral_scenarios.yaml
lmms_eval/tasks/mmlu/default/mmlu_nutrition.yaml
lmms_eval/tasks/mmlu/default/mmlu_philosophy.yaml
lmms_eval/tasks/mmlu/default/mmlu_prehistory.yaml
lmms_eval/tasks/mmlu/default/mmlu_professional_accounting.yaml
lmms_eval/tasks/mmlu/default/mmlu_professional_law.yaml
lmms_eval/tasks/mmlu/default/mmlu_professional_medicine.yaml
lmms_eval/tasks/mmlu/default/mmlu_professional_psychology.yaml
lmms_eval/tasks/mmlu/default/mmlu_public_relations.yaml
lmms_eval/tasks/mmlu/default/mmlu_security_studies.yaml
lmms_eval/tasks/mmlu/default/mmlu_sociology.yaml
lmms_eval/tasks/mmlu/default/mmlu_us_foreign_policy.yaml
lmms_eval/tasks/mmlu/default/mmlu_virology.yaml
lmms_eval/tasks/mmlu/default/mmlu_world_religions.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/_cot_prompts.json
lmms_eval/tasks/mmlu/flan_cot_fewshot/_mmlu.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_abstract_algebra.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_anatomy.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_astronomy.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_business_ethics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_clinical_knowledge.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_biology.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_chemistry.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_computer_science.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_mathematics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_medicine.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_college_physics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_computer_security.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_conceptual_physics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_econometrics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_electrical_engineering.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_elementary_mathematics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_formal_logic.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_global_facts.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_biology.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_chemistry.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_computer_science.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_european_history.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_geography.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_government_and_politics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_macroeconomics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_mathematics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_microeconomics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_physics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_psychology.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_statistics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_us_history.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_high_school_world_history.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_human_aging.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_human_sexuality.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_international_law.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_jurisprudence.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_logical_fallacies.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_machine_learning.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_management.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_marketing.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_medical_genetics.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_miscellaneous.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_moral_disputes.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_moral_scenarios.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_nutrition.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_philosophy.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_prehistory.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_accounting.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_law.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_medicine.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_professional_psychology.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_public_relations.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_security_studies.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_sociology.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_us_foreign_policy.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_virology.yaml
lmms_eval/tasks/mmlu/flan_cot_fewshot/mmlu_world_religions.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/_mmlu.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_abstract_algebra.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_anatomy.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_astronomy.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_business_ethics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_clinical_knowledge.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_biology.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_chemistry.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_computer_science.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_mathematics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_medicine.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_college_physics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_computer_security.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_conceptual_physics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_econometrics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_electrical_engineering.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_elementary_mathematics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_formal_logic.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_global_facts.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_biology.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_chemistry.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_computer_science.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_european_history.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_geography.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_government_and_politics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_macroeconomics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_mathematics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_microeconomics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_physics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_psychology.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_statistics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_us_history.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_high_school_world_history.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_aging.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_human_sexuality.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_international_law.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_jurisprudence.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_logical_fallacies.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_machine_learning.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_management.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_marketing.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_medical_genetics.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_miscellaneous.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_disputes.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_moral_scenarios.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_nutrition.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_philosophy.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_prehistory.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_accounting.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_law.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_medicine.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_professional_psychology.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_public_relations.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_security_studies.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_sociology.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_us_foreign_policy.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_virology.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/mmlu_world_religions.yaml
lmms_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
lmms_eval/tasks/mmlu/flan_n_shot/generative/_mmlu.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_abstract_algebra.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_anatomy.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_astronomy.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_business_ethics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_clinical_knowledge.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_biology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_chemistry.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_computer_science.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_mathematics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_medicine.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_college_physics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_computer_security.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_conceptual_physics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_econometrics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_electrical_engineering.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_elementary_mathematics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_formal_logic.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_global_facts.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_biology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_chemistry.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_computer_science.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_european_history.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_geography.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_government_and_politics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_macroeconomics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_mathematics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_microeconomics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_physics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_psychology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_statistics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_us_history.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_high_school_world_history.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_aging.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_human_sexuality.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_international_law.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_jurisprudence.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_logical_fallacies.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_machine_learning.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_management.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_marketing.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_medical_genetics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_miscellaneous.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_disputes.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_moral_scenarios.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_nutrition.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_philosophy.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_prehistory.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_accounting.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_law.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_medicine.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_professional_psychology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_public_relations.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_security_studies.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_sociology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_us_foreign_policy.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_virology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/mmlu_world_religions.yaml
lmms_eval/tasks/mmlu/flan_n_shot/generative/utils.py
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/_mmlu.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_abstract_algebra.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_anatomy.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_astronomy.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_business_ethics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_clinical_knowledge.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_biology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_chemistry.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_computer_science.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_mathematics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_medicine.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_college_physics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_computer_security.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_conceptual_physics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_econometrics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_electrical_engineering.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_elementary_mathematics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_formal_logic.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_global_facts.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_biology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_chemistry.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_computer_science.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_european_history.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_geography.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_government_and_politics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_macroeconomics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_mathematics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_microeconomics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_physics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_psychology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_statistics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_us_history.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_high_school_world_history.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_aging.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_human_sexuality.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_international_law.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_jurisprudence.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_logical_fallacies.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_machine_learning.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_management.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_marketing.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_medical_genetics.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_miscellaneous.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_disputes.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_moral_scenarios.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_nutrition.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_philosophy.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_prehistory.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_accounting.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_law.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_medicine.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_professional_psychology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_public_relations.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_security_studies.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_sociology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_us_foreign_policy.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_virology.yaml
lmms_eval/tasks/mmlu/flan_n_shot/loglikelihood/mmlu_world_religions.yaml
lmms_eval/tasks/mmlu/generative/_mmlu.yaml
lmms_eval/tasks/mmlu/generative/mmlu_abstract_algebra.yaml
lmms_eval/tasks/mmlu/generative/mmlu_anatomy.yaml
lmms_eval/tasks/mmlu/generative/mmlu_astronomy.yaml
lmms_eval/tasks/mmlu/generative/mmlu_business_ethics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_clinical_knowledge.yaml
lmms_eval/tasks/mmlu/generative/mmlu_college_biology.yaml
lmms_eval/tasks/mmlu/generative/mmlu_college_chemistry.yaml
lmms_eval/tasks/mmlu/generative/mmlu_college_computer_science.yaml
lmms_eval/tasks/mmlu/generative/mmlu_college_mathematics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_college_medicine.yaml
lmms_eval/tasks/mmlu/generative/mmlu_college_physics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_computer_security.yaml
lmms_eval/tasks/mmlu/generative/mmlu_conceptual_physics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_econometrics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_electrical_engineering.yaml
lmms_eval/tasks/mmlu/generative/mmlu_elementary_mathematics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_formal_logic.yaml
lmms_eval/tasks/mmlu/generative/mmlu_global_facts.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_biology.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_chemistry.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_computer_science.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_european_history.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_geography.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_government_and_politics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_macroeconomics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_mathematics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_microeconomics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_physics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_psychology.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_statistics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_us_history.yaml
lmms_eval/tasks/mmlu/generative/mmlu_high_school_world_history.yaml
lmms_eval/tasks/mmlu/generative/mmlu_human_aging.yaml
lmms_eval/tasks/mmlu/generative/mmlu_human_sexuality.yaml
lmms_eval/tasks/mmlu/generative/mmlu_international_law.yaml
lmms_eval/tasks/mmlu/generative/mmlu_jurisprudence.yaml
lmms_eval/tasks/mmlu/generative/mmlu_logical_fallacies.yaml
lmms_eval/tasks/mmlu/generative/mmlu_machine_learning.yaml
lmms_eval/tasks/mmlu/generative/mmlu_management.yaml
lmms_eval/tasks/mmlu/generative/mmlu_marketing.yaml
lmms_eval/tasks/mmlu/generative/mmlu_medical_genetics.yaml
lmms_eval/tasks/mmlu/generative/mmlu_miscellaneous.yaml
lmms_eval/tasks/mmlu/generative/mmlu_moral_disputes.yaml
lmms_eval/tasks/mmlu/generative/mmlu_moral_scenarios.yaml
lmms_eval/tasks/mmlu/generative/mmlu_nutrition.yaml
lmms_eval/tasks/mmlu/generative/mmlu_philosophy.yaml
lmms_eval/tasks/mmlu/generative/mmlu_prehistory.yaml
lmms_eval/tasks/mmlu/generative/mmlu_professional_accounting.yaml
lmms_eval/tasks/mmlu/generative/mmlu_professional_law.yaml
lmms_eval/tasks/mmlu/generative/mmlu_professional_medicine.yaml
lmms_eval/tasks/mmlu/generative/mmlu_professional_psychology.yaml
lmms_eval/tasks/mmlu/generative/mmlu_public_relations.yaml
lmms_eval/tasks/mmlu/generative/mmlu_security_studies.yaml
lmms_eval/tasks/mmlu/generative/mmlu_sociology.yaml
lmms_eval/tasks/mmlu/generative/mmlu_us_foreign_policy.yaml
lmms_eval/tasks/mmlu/generative/mmlu_virology.yaml
lmms_eval/tasks/mmlu/generative/mmlu_world_religions.yaml
lmms_eval/tasks/mmlu_pro/_mmlu_pro.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_biology.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_business.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_chemistry.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_computer_science.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_economics.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_engineering.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_health.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_history.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_law.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_math.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_other.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_philosophy.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_physics.yaml
lmms_eval/tasks/mmlu_pro/mmlu_pro_psychology.yaml
lmms_eval/tasks/mmlu_pro/utils.py
lmms_eval/tasks/mmmu/mmmu.yaml
lmms_eval/tasks/mmmu/mmmu_group_img.yaml
lmms_eval/tasks/mmmu/mmmu_group_img_test.yaml
lmms_eval/tasks/mmmu/mmmu_group_img_val.yaml
lmms_eval/tasks/mmmu/mmmu_test.yaml
lmms_eval/tasks/mmmu/mmmu_val.yaml
lmms_eval/tasks/mmmu/mmmu_val_pass64.yaml
lmms_eval/tasks/mmmu/mmmu_val_qwen.yaml
lmms_eval/tasks/mmmu/mmmu_val_reasoning.yaml
lmms_eval/tasks/mmmu/utils.py
lmms_eval/tasks/mmmu/utils_group_img.py
lmms_eval/tasks/mmmu/reasoning/mmmu_val_reasoning.yaml
lmms_eval/tasks/mmmu/reasoning/utils.py
lmms_eval/tasks/mmmu_pro/mmmu_pro.yaml
lmms_eval/tasks/mmmu_pro/mmmu_pro_composite.yaml
lmms_eval/tasks/mmmu_pro/mmmu_pro_composite_cot.yaml
lmms_eval/tasks/mmmu_pro/mmmu_pro_cot.yaml
lmms_eval/tasks/mmmu_pro/mmmu_pro_standard.yaml
lmms_eval/tasks/mmmu_pro/mmmu_pro_standard_cot.yaml
lmms_eval/tasks/mmmu_pro/mmmu_pro_vision.yaml
lmms_eval/tasks/mmmu_pro/mmmu_pro_vision_cot.yaml
lmms_eval/tasks/mmmu_pro/utils.py
lmms_eval/tasks/mmmu_pro/reasoning/mmmu_pro_cot.yaml
lmms_eval/tasks/mmmu_pro/reasoning/mmmu_pro_standard_cot.yaml
lmms_eval/tasks/mmmu_pro/reasoning/mmmu_pro_vision_cot.yaml
lmms_eval/tasks/mmmu_pro/reasoning/utils.py
lmms_eval/tasks/mmrefine/mmrefine.yaml
lmms_eval/tasks/mmrefine/mmrefine_evals.py
lmms_eval/tasks/mmrefine/prompts.py
lmms_eval/tasks/mmrefine/utils.py
lmms_eval/tasks/mmsearch/constants.py
lmms_eval/tasks/mmsearch/get_final_scores.py
lmms_eval/tasks/mmsearch/lmms_eval_utils.py
lmms_eval/tasks/mmsearch/mmsearch.yaml
lmms_eval/tasks/mmsearch/mmsearch_end2end.yaml
lmms_eval/tasks/mmsearch/mmsearch_rerank.yaml
lmms_eval/tasks/mmsearch/mmsearch_summarization.yaml
lmms_eval/tasks/mmsearch/prompts/prompt.py
lmms_eval/tasks/mmsearch/prompts/prompt_w_imagesearch.py
lmms_eval/tasks/mmsearch/retrieve_content/retriever.py
lmms_eval/tasks/mmsearch/retrieve_content/tokenization/__init__.py
lmms_eval/tasks/mmsearch/retrieve_content/tokenization/tokenizers.py
lmms_eval/tasks/mmsearch/retrieve_content/tokenization/utils.py
lmms_eval/tasks/mmsearch/score/f1_score.py
lmms_eval/tasks/mmsearch/score/req_score.py
lmms_eval/tasks/mmsearch/score/result_summary.py
lmms_eval/tasks/mmsearch/utils/image_utils.py
lmms_eval/tasks/mmsearch/utils/lmms_eval_utils.py
lmms_eval/tasks/mmsearch/utils/prompt_utils.py
lmms_eval/tasks/mmsearch/utils/utils.py
lmms_eval/tasks/mmsearch/utils/web_content_utils.py
lmms_eval/tasks/mmsearch_plus/_default_template_mmsearch_plus.yaml
lmms_eval/tasks/mmsearch_plus/_mmsearch_plus.yaml
lmms_eval/tasks/mmsearch_plus/decrypt_utils.py
lmms_eval/tasks/mmsearch_plus/mmsearch_plus_vqa.yaml
lmms_eval/tasks/mmsearch_plus/utils.py
lmms_eval/tasks/mmsi_bench/msr_bench.yaml
lmms_eval/tasks/mmsi_bench/utils.py
lmms_eval/tasks/mmsi_video/mmsi_video.yaml
lmms_eval/tasks/mmsi_video/mmsi_video_sc.yaml
lmms_eval/tasks/mmsi_video/mmsi_video_u50.yaml
lmms_eval/tasks/mmsi_video/utils.py
lmms_eval/tasks/mmstar/mmstar.yaml
lmms_eval/tasks/mmstar/mmstar_ko.yaml
lmms_eval/tasks/mmstar/mmstar_oc.yaml
lmms_eval/tasks/mmstar/mmstar_qwen.yaml
lmms_eval/tasks/mmstar/utils.py
lmms_eval/tasks/mmstar/reasoning/mmstar_reasoning.yaml
lmms_eval/tasks/mmstar/reasoning/utils.py
lmms_eval/tasks/mmsu/mmsu.yaml
lmms_eval/tasks/mmsu/utils.py
lmms_eval/tasks/mmt/mmt.yaml
lmms_eval/tasks/mmt/mmt_mi.yaml
lmms_eval/tasks/mmt/mmt_mi_test.yaml
lmms_eval/tasks/mmt/mmt_mi_val.yaml
lmms_eval/tasks/mmt/mmt_test.yaml
lmms_eval/tasks/mmt/mmt_val.yaml
lmms_eval/tasks/mmt/utils.py
lmms_eval/tasks/mmupd/mmaad_base.yaml
lmms_eval/tasks/mmupd/mmaad_instruction.yaml
lmms_eval/tasks/mmupd/mmaad_option.yaml
lmms_eval/tasks/mmupd/mmiasd_base.yaml
lmms_eval/tasks/mmupd/mmiasd_instruction.yaml
lmms_eval/tasks/mmupd/mmiasd_option.yaml
lmms_eval/tasks/mmupd/mmivqd_base.yaml
lmms_eval/tasks/mmupd/mmivqd_instruction.yaml
lmms_eval/tasks/mmupd/mmivqd_option.yaml
lmms_eval/tasks/mmupd/mmupd.yaml
lmms_eval/tasks/mmupd/mmupd_base.yaml
lmms_eval/tasks/mmupd/mmupd_evals.py
lmms_eval/tasks/mmupd/mmupd_instruction.yaml
lmms_eval/tasks/mmupd/mmupd_option.yaml
lmms_eval/tasks/mmupd/utils.py
lmms_eval/tasks/mmvet/mmvet.yaml
lmms_eval/tasks/mmvet/utils.py
lmms_eval/tasks/mmvetv2/mmvetv2.yaml
lmms_eval/tasks/mmvetv2/mmvetv2_group_img.yaml
lmms_eval/tasks/mmvetv2/utils.py
lmms_eval/tasks/mmvp/mmvp.yaml
lmms_eval/tasks/mmvp/utils.py
lmms_eval/tasks/mmvu/mmvu_val.yaml
lmms_eval/tasks/mmvu/mmvu_val_cot copy.yaml
lmms_eval/tasks/mmvu/utils.py
lmms_eval/tasks/mmworld/mmworld.yaml
lmms_eval/tasks/mmworld/utils.py
lmms_eval/tasks/motionbench/motionbench.yaml
lmms_eval/tasks/motionbench/motionbench_full.yaml
lmms_eval/tasks/motionbench/utils.py
lmms_eval/tasks/moviechat/moviechat_breakpoint.yaml
lmms_eval/tasks/moviechat/moviechat_global.yaml
lmms_eval/tasks/moviechat/utils.py
lmms_eval/tasks/mtvqa/mtvqa.yaml
lmms_eval/tasks/mtvqa/utils.py
lmms_eval/tasks/muchomusic/muchomusic.yaml
lmms_eval/tasks/muchomusic/utils.py
lmms_eval/tasks/muirbench/muirbench.yaml
lmms_eval/tasks/muirbench/utils.py
lmms_eval/tasks/multidocvqa/multidocvqa.yaml
lmms_eval/tasks/multidocvqa/multidocvqa_test.yaml
lmms_eval/tasks/multidocvqa/multidocvqa_val.yaml
lmms_eval/tasks/multidocvqa/utils.py
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/arabic_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/bengali_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/chinese_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/french_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/hindi_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/japanese_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/rule.json
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/russian_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/spanish_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/urdu_llava_in_the_wild.yaml
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/utils.py
lmms_eval/tasks/multimodal_rewardbench/multimodal_rewardbench.yaml
lmms_eval/tasks/multimodal_rewardbench/utils.py
lmms_eval/tasks/mvbench/mvbench.yaml
lmms_eval/tasks/mvbench/mvbench_action_antonym.yaml
lmms_eval/tasks/mvbench/mvbench_action_count.yaml
lmms_eval/tasks/mvbench/mvbench_action_localization.yaml
lmms_eval/tasks/mvbench/mvbench_action_prediction.yaml
lmms_eval/tasks/mvbench/mvbench_action_sequence.yaml
lmms_eval/tasks/mvbench/mvbench_character_order.yaml
lmms_eval/tasks/mvbench/mvbench_counterfactual_inference.yaml
lmms_eval/tasks/mvbench/mvbench_egocentric_navigation.yaml
lmms_eval/tasks/mvbench/mvbench_episodic_reasoning.yaml
lmms_eval/tasks/mvbench/mvbench_fine_grained_action.yaml
lmms_eval/tasks/mvbench/mvbench_fine_grained_pose.yaml
lmms_eval/tasks/mvbench/mvbench_moving_attribute.yaml
lmms_eval/tasks/mvbench/mvbench_moving_count.yaml
lmms_eval/tasks/mvbench/mvbench_moving_direction.yaml
lmms_eval/tasks/mvbench/mvbench_object_existence.yaml
lmms_eval/tasks/mvbench/mvbench_object_interaction.yaml
lmms_eval/tasks/mvbench/mvbench_object_shuffle.yaml
lmms_eval/tasks/mvbench/mvbench_scene_transition.yaml
lmms_eval/tasks/mvbench/mvbench_state_change.yaml
lmms_eval/tasks/mvbench/mvbench_unexpected_action.yaml
lmms_eval/tasks/mvbench/utils.py
lmms_eval/tasks/mvp/mvp_mini.yaml
lmms_eval/tasks/mvp/mvp_mini_human_object_interactions.yaml
lmms_eval/tasks/mvp/mvp_mini_intuitive_physics.yaml
lmms_eval/tasks/mvp/mvp_mini_robot_object_interactions.yaml
lmms_eval/tasks/mvp/mvp_mini_temporal_reasoning.yaml
lmms_eval/tasks/mvp/utils.py
lmms_eval/tasks/naturalbench/naturalbench.yaml
lmms_eval/tasks/naturalbench/utils.py
lmms_eval/tasks/neptune/neptune.yaml
lmms_eval/tasks/neptune/neptune_full_i.yaml
lmms_eval/tasks/neptune/neptune_full_v.yaml
lmms_eval/tasks/neptune/neptune_mma_i.yaml
lmms_eval/tasks/neptune/neptune_mma_v.yaml
lmms_eval/tasks/neptune/neptune_mmh_i.yaml
lmms_eval/tasks/neptune/neptune_mmh_v.yaml
lmms_eval/tasks/neptune/utils.py
lmms_eval/tasks/nextqa/nextqa.yaml
lmms_eval/tasks/nextqa/nextqa_mc_test.yaml
lmms_eval/tasks/nextqa/nextqa_oe_test.yaml
lmms_eval/tasks/nextqa/nextqa_oe_val.yaml
lmms_eval/tasks/nextqa/utils.py
lmms_eval/tasks/nocaps/nocaps.yaml
lmms_eval/tasks/nocaps/nocaps_test.yaml
lmms_eval/tasks/nocaps/nocaps_val.yaml
lmms_eval/tasks/nocaps/nocaps_val_lite.yaml
lmms_eval/tasks/nocaps/utils.py
lmms_eval/tasks/ocrbench/ocrbench.yaml
lmms_eval/tasks/ocrbench/upload_ocrbench.py
lmms_eval/tasks/ocrbench/utils.py
lmms_eval/tasks/ocrbench/reasoning/ocrbench_reasoning.yaml
lmms_eval/tasks/ocrbench/reasoning/utils.py
lmms_eval/tasks/ocrbench_v2/IoUscore_metric.py
lmms_eval/tasks/ocrbench_v2/TEDS_metric.py
lmms_eval/tasks/ocrbench_v2/__init__.py
lmms_eval/tasks/ocrbench_v2/ocrbench_v2.yaml
lmms_eval/tasks/ocrbench_v2/page_ocr_metric.py
lmms_eval/tasks/ocrbench_v2/parallel.py
lmms_eval/tasks/ocrbench_v2/spotting_metric.py
lmms_eval/tasks/ocrbench_v2/upload_ocrbench_v2.py
lmms_eval/tasks/ocrbench_v2/utils.py
lmms_eval/tasks/ocrbench_v2/vqa_metric.py
lmms_eval/tasks/ocrbench_v2/reasoning/ocrbench_v2_reasoning.yaml
lmms_eval/tasks/ocrbench_v2/reasoning/utils.py
lmms_eval/tasks/ocrbench_v2/spotting_eval/__init__.py
lmms_eval/tasks/ocrbench_v2/spotting_eval/readme.txt
lmms_eval/tasks/ocrbench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py
lmms_eval/tasks/ocrbench_v2/spotting_eval/script.py
lmms_eval/tasks/officeqa/officeqa.yaml
lmms_eval/tasks/officeqa/utils.py
lmms_eval/tasks/ok_vqa/_generate_config.py
lmms_eval/tasks/ok_vqa/_ok_vqa.yaml
lmms_eval/tasks/ok_vqa/ok_vqa_val2014.yaml
lmms_eval/tasks/ok_vqa/ok_vqa_val2014_lite.yaml
lmms_eval/tasks/ok_vqa/utils.py
lmms_eval/tasks/olympiadbench/cn_utils.py
lmms_eval/tasks/olympiadbench/en_utils.py
lmms_eval/tasks/olympiadbench/olympiadbench.yaml
lmms_eval/tasks/olympiadbench/olympiadbench_OE_MM_maths_en_COMP.yaml
lmms_eval/tasks/olympiadbench/olympiadbench_OE_MM_physics_en_COMP.yaml
lmms_eval/tasks/olympiadbench/olympiadbench_evals.py
lmms_eval/tasks/olympiadbench/testmini_utils.py
lmms_eval/tasks/olympiadbench_mimo/en_utils.py
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_all_boxed.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_boxed.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_evals.py
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_image_math_en.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_image_math_zh.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_image_physics_en.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_image_physics_zh.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_math_en.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_math_en_no_proof.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_math_zh.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_math_zh_no_proof.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_official.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_official_en.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_official_en_no_proof.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_official_no_proof.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_official_zh.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_official_zh_no_proof.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_physics_en.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_physics_en_no_proof.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_physics_zh.yaml
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_physics_zh_no_proof.yaml
lmms_eval/tasks/olympiadbench_mimo/utils.py
lmms_eval/tasks/olympiadbench_mimo/zh_utils.py
lmms_eval/tasks/olympiadbench_mimo/reasoning/olympiadbench_math_en_reasoning.yaml
lmms_eval/tasks/olympiadbench_mimo/reasoning/olympiadbench_math_zh_reasoning.yaml
lmms_eval/tasks/olympiadbench_mimo/reasoning/olympiadbench_physics_en_reasoning.yaml
lmms_eval/tasks/olympiadbench_mimo/reasoning/olympiadbench_physics_zh_reasoning.yaml
lmms_eval/tasks/olympiadbench_mimo/reasoning/olympiadbench_reasoning.yaml
lmms_eval/tasks/olympiadbench_mimo/reasoning/utils.py
lmms_eval/tasks/omni_bench/omni_bench.yaml
lmms_eval/tasks/omni_bench/omni_bench_audio_transcript.yaml
lmms_eval/tasks/omni_bench/omni_bench_image_caption.yaml
lmms_eval/tasks/omni_bench/utils.py
lmms_eval/tasks/omni_spatial/omni_spatial.yaml
lmms_eval/tasks/omni_spatial/utils.py
lmms_eval/tasks/omnidocbench/omnidocbench.yaml
lmms_eval/tasks/omnidocbench/utils.py
lmms_eval/tasks/omnispatial/omnispatial.yaml
lmms_eval/tasks/omnispatial/utils.py
lmms_eval/tasks/open_asr/openasr.yaml
lmms_eval/tasks/open_asr/openasr_ami.yaml
lmms_eval/tasks/open_asr/openasr_common_voice.yaml
lmms_eval/tasks/open_asr/openasr_earnings22.yaml
lmms_eval/tasks/open_asr/openasr_gigaspeech.yaml
lmms_eval/tasks/open_asr/openasr_librispeech.yaml
lmms_eval/tasks/open_asr/openasr_librispeech_test_clean.yaml
lmms_eval/tasks/open_asr/openasr_librispeech_test_other.yaml
lmms_eval/tasks/open_asr/openasr_spgispeech.yaml
lmms_eval/tasks/open_asr/openasr_tedlium.yaml
lmms_eval/tasks/open_asr/openasr_voxpopuli.yaml
lmms_eval/tasks/open_asr/utils.py
lmms_eval/tasks/openai_math/openai_math.yaml
lmms_eval/tasks/openai_math/openai_math_agg64.yaml
lmms_eval/tasks/openai_math/openai_math_cov64.yaml
lmms_eval/tasks/openai_math/openai_math_cov64_train.yaml
lmms_eval/tasks/openai_math/openai_math_maj64_cov64.yaml
lmms_eval/tasks/openai_math/openai_math_maj64_cov64_train.yaml
lmms_eval/tasks/openai_math/openai_math_train.yaml
lmms_eval/tasks/openai_math/utils.py
lmms_eval/tasks/openhermes/openhermes.yaml
lmms_eval/tasks/openhermes/utils.py
lmms_eval/tasks/openslr_librispeech/openslr_librispeech.yaml
lmms_eval/tasks/openslr_librispeech/openslr_librispeech_other.yaml
lmms_eval/tasks/openxvqa/openxvqa.yaml
lmms_eval/tasks/openxvqa/utils.py
lmms_eval/tasks/osi_bench/__init__.py
lmms_eval/tasks/osi_bench/osi_bench.yaml
lmms_eval/tasks/osi_bench/osi_bench_frames.yaml
lmms_eval/tasks/osi_bench/utils.py
lmms_eval/tasks/osworld_g/osworld_g.yaml
lmms_eval/tasks/osworld_g/utils.py
lmms_eval/tasks/ovobench/constant.py
lmms_eval/tasks/ovobench/ovo_backward.yaml
lmms_eval/tasks/ovobench/ovo_forward.yaml
lmms_eval/tasks/ovobench/ovo_realtime.yaml
lmms_eval/tasks/ovobench/ovobench.yaml
lmms_eval/tasks/ovobench/utils.py
lmms_eval/tasks/ovobench/score_utils/score.py
lmms_eval/tasks/ovr_kinetics/ovr_kinetics.yaml
lmms_eval/tasks/ovr_kinetics/utils.py
lmms_eval/tasks/paibench_u/paibench_u.yaml
lmms_eval/tasks/paibench_u/utils.py
lmms_eval/tasks/people_speech/people_speech_val.yaml
lmms_eval/tasks/people_speech/utils.py
lmms_eval/tasks/perceptioncomp/perceptioncomp.yaml
lmms_eval/tasks/perceptioncomp/perceptioncomp_reasoning.yaml
lmms_eval/tasks/perceptioncomp/utils.py
lmms_eval/tasks/perceptiontest/test/perceptiontest_mc.yaml
lmms_eval/tasks/perceptiontest/test/perceptiontest_mcppl.yaml
lmms_eval/tasks/perceptiontest/test/utils.py
lmms_eval/tasks/perceptiontest/val/perceptiontest_mc.yaml
lmms_eval/tasks/perceptiontest/val/perceptiontest_mcppl.yaml
lmms_eval/tasks/perceptiontest/val/utils.py
lmms_eval/tasks/physical_ai_understanding/physical_ai_understanding.yaml
lmms_eval/tasks/physical_ai_understanding/utils.py
lmms_eval/tasks/phyx/phyx.yaml
lmms_eval/tasks/phyx/phyx_evals.py
lmms_eval/tasks/phyx/phyx_mc.yaml
lmms_eval/tasks/phyx/phyx_mini_mc.yaml
lmms_eval/tasks/phyx/phyx_mini_oe.yaml
lmms_eval/tasks/phyx/phyx_oe.yaml
lmms_eval/tasks/phyx/utils.py
lmms_eval/tasks/phyx/reasoning/phyx_mc_reasoning.yaml
lmms_eval/tasks/phyx/reasoning/phyx_mini_mc_reasoning.yaml
lmms_eval/tasks/phyx/reasoning/phyx_mini_oe_reasoning.yaml
lmms_eval/tasks/phyx/reasoning/phyx_oe_reasoning.yaml
lmms_eval/tasks/phyx/reasoning/phyx_reasoning.yaml
lmms_eval/tasks/phyx/reasoning/utils.py
lmms_eval/tasks/pixmo_count/pixmo_count.yaml
lmms_eval/tasks/pixmo_count/utils.py
lmms_eval/tasks/pixmo_count/reasoning/pixmo_count_reasoning.yaml
lmms_eval/tasks/pixmo_count/reasoning/utils.py
lmms_eval/tasks/plm_videobench/eval_utils.py
lmms_eval/tasks/plm_videobench/fgqa/fgqa_test.yaml
lmms_eval/tasks/plm_videobench/fgqa/fgqa_utils.py
lmms_eval/tasks/plm_videobench/rcap/rcap_test.yaml
lmms_eval/tasks/plm_videobench/rcap/rcap_utils.py
lmms_eval/tasks/plm_videobench/rdcap/rdcap_test.yaml
lmms_eval/tasks/plm_videobench/rdcap/rdcap_utils.py
lmms_eval/tasks/plm_videobench/rtloc/rtloc_test.yaml
lmms_eval/tasks/plm_videobench/rtloc/rtloc_utils.py
lmms_eval/tasks/plm_videobench/sgqa/sgqa_test.yaml
lmms_eval/tasks/plm_videobench/sgqa/sgqa_utils.py
lmms_eval/tasks/pointbench/pointbench.yaml
lmms_eval/tasks/pointbench/utils.py
lmms_eval/tasks/pope/pope.yaml
lmms_eval/tasks/pope/pope_adv.yaml
lmms_eval/tasks/pope/pope_full.yaml
lmms_eval/tasks/pope/pope_pop.yaml
lmms_eval/tasks/pope/pope_random.yaml
lmms_eval/tasks/pope/utils.py
lmms_eval/tasks/prismm_bench/prismm_bench_identification.yaml
lmms_eval/tasks/prismm_bench/prismm_bench_identification_whole_doc.yaml
lmms_eval/tasks/prismm_bench/prismm_bench_identification_whole_page.yaml
lmms_eval/tasks/prismm_bench/prismm_bench_pair_match.yaml
lmms_eval/tasks/prismm_bench/prismm_bench_remedy.yaml
lmms_eval/tasks/prismm_bench/prismm_bench_remedy_whole_doc.yaml
lmms_eval/tasks/prismm_bench/prismm_bench_remedy_whole_page.yaml
lmms_eval/tasks/prismm_bench/utils.py
lmms_eval/tasks/pushupbench/pushupbench.yaml
lmms_eval/tasks/pushupbench/utils.py
lmms_eval/tasks/qbench/abench_dev.yaml
lmms_eval/tasks/qbench/qbench2_dev.yaml
lmms_eval/tasks/qbench/qbench_dev.yaml
lmms_eval/tasks/qbench/qbenchs_dev.yaml
lmms_eval/tasks/qbench/utils.py
lmms_eval/tasks/realunify/__init__.py
lmms_eval/tasks/realunify/_default_template.yaml
lmms_eval/tasks/realunify/realunify.yaml
lmms_eval/tasks/realunify/realunify_attentional_focusing.yaml
lmms_eval/tasks/realunify/realunify_mental_reconstruction.yaml
lmms_eval/tasks/realunify/realunify_mental_tracking.yaml
lmms_eval/tasks/realunify/utils.py
lmms_eval/tasks/realworldqa/realworldqa.yaml
lmms_eval/tasks/realworldqa/utils.py
lmms_eval/tasks/realworldqa/reasoning/realworldqa_reasoning.yaml
lmms_eval/tasks/realworldqa/reasoning/utils.py
lmms_eval/tasks/refcoco/_generate_config.py
lmms_eval/tasks/refcoco/_refcoco.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_rec_test.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_rec_testA.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_rec_testB.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_rec_val.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_test.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_testA.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_testB.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_val.yaml
lmms_eval/tasks/refcoco/refcoco_bbox_val_lite.yaml
lmms_eval/tasks/refcoco/refcoco_seg_test.yaml
lmms_eval/tasks/refcoco/refcoco_seg_testA.yaml
lmms_eval/tasks/refcoco/refcoco_seg_testB.yaml
lmms_eval/tasks/refcoco/refcoco_seg_val.yaml
lmms_eval/tasks/refcoco/utils.py
lmms_eval/tasks/refcoco/utils_rec.py
lmms_eval/tasks/refcoco+/_generate_config.py
lmms_eval/tasks/refcoco+/_refcoco.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testA.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_testB.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_rec_val.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_testA.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_testB.yaml
lmms_eval/tasks/refcoco+/refcoco+_bbox_val.yaml
lmms_eval/tasks/refcoco+/refcoco+_seg_testA.yaml
lmms_eval/tasks/refcoco+/refcoco+_seg_testB.yaml
lmms_eval/tasks/refcoco+/refcoco+_seg_val.yaml
lmms_eval/tasks/refcoco+/utils.py
lmms_eval/tasks/refcoco+/utils_rec.py
lmms_eval/tasks/refcocog/_generate_config.py
lmms_eval/tasks/refcocog/_refcoco.yaml
lmms_eval/tasks/refcocog/refcocog_bbox_rec_test.yaml
lmms_eval/tasks/refcocog/refcocog_bbox_rec_val.yaml
lmms_eval/tasks/refcocog/refcocog_bbox_test.yaml
lmms_eval/tasks/refcocog/refcocog_bbox_val.yaml
lmms_eval/tasks/refcocog/refcocog_seg_test.yaml
lmms_eval/tasks/refcocog/refcocog_seg_val.yaml
lmms_eval/tasks/refcocog/utils.py
lmms_eval/tasks/refcocog/utils_rec.py
lmms_eval/tasks/refspatial/refspatial.yaml
lmms_eval/tasks/refspatial/refspatial_location.yaml
lmms_eval/tasks/refspatial/refspatial_placement.yaml
lmms_eval/tasks/refspatial/refspatial_unseen.yaml
lmms_eval/tasks/refspatial/utils.py
lmms_eval/tasks/repcount/repcount.yaml
lmms_eval/tasks/repcount/utils.py
lmms_eval/tasks/revsi/revsi.yaml
lmms_eval/tasks/revsi/revsi_16_frame.yaml
lmms_eval/tasks/revsi/revsi_32_frame.yaml
lmms_eval/tasks/revsi/revsi_64_frame.yaml
lmms_eval/tasks/revsi/revsi_all_frame.yaml
lmms_eval/tasks/revsi/utils.py
lmms_eval/tasks/robo_spatial/_default_template.yaml
lmms_eval/tasks/robo_spatial/pre_process.py
lmms_eval/tasks/robo_spatial/robo_spatial_all.yaml
lmms_eval/tasks/robo_spatial/robo_spatial_compatibility.yaml
lmms_eval/tasks/robo_spatial/robo_spatial_configuration.yaml
lmms_eval/tasks/robo_spatial/robo_spatial_context.yaml
lmms_eval/tasks/robo_spatial/utils.py
lmms_eval/tasks/saco/compute_metrics.py
lmms_eval/tasks/saco/saco_gold.yaml
lmms_eval/tasks/saco/saco_gold_attributes.yaml
lmms_eval/tasks/saco/saco_gold_crowded.yaml
lmms_eval/tasks/saco/saco_gold_food.yaml
lmms_eval/tasks/saco/saco_gold_metaclip.yaml
lmms_eval/tasks/saco/saco_gold_sa1b.yaml
lmms_eval/tasks/saco/saco_gold_sport.yaml
lmms_eval/tasks/saco/saco_gold_wiki_common.yaml
lmms_eval/tasks/saco/utils.py
lmms_eval/tasks/safety_redteam/safety_jailbreakbench_benign.yaml
lmms_eval/tasks/safety_redteam/safety_jailbreakbench_harmful.yaml
lmms_eval/tasks/safety_redteam/safety_redteam.yaml
lmms_eval/tasks/safety_redteam/utils.py
lmms_eval/tasks/salbench/o3.yaml
lmms_eval/tasks/salbench/o3_box.yaml
lmms_eval/tasks/salbench/o3_box_img.yaml
lmms_eval/tasks/salbench/p3.yaml
lmms_eval/tasks/salbench/p3_box.yaml
lmms_eval/tasks/salbench/p3_box_img.yaml
lmms_eval/tasks/salbench/utils.py
lmms_eval/tasks/sat/sat.yaml
lmms_eval/tasks/sat/utils.py
lmms_eval/tasks/scibench/scibench.yaml
lmms_eval/tasks/scibench/scibench_chemistry.yaml
lmms_eval/tasks/scibench/scibench_math.yaml
lmms_eval/tasks/scibench/scibench_physics.yaml
lmms_eval/tasks/scibench/utils.py
lmms_eval/tasks/scienceqa/scienceqa.yaml
lmms_eval/tasks/scienceqa/scienceqa_full.yaml
lmms_eval/tasks/scienceqa/scienceqa_img.yaml
lmms_eval/tasks/scienceqa/utils.py
lmms_eval/tasks/scivideobench/scivideobench.yaml
lmms_eval/tasks/scivideobench/utils.py
lmms_eval/tasks/screenspot/_screenspot.yaml
lmms_eval/tasks/screenspot/screenspot_rec_test.yaml
lmms_eval/tasks/screenspot/screenspot_reg_test.yaml
lmms_eval/tasks/screenspot/utils.py
lmms_eval/tasks/screenspot/utils_rec.py
lmms_eval/tasks/screenspot_pro/screenspot_pro.yaml
lmms_eval/tasks/screenspot_pro/utils.py
lmms_eval/tasks/screenspot_v2/screenspot_v2.yaml
lmms_eval/tasks/screenspot_v2/utils.py
lmms_eval/tasks/seedbench/ko_utils.py
lmms_eval/tasks/seedbench/seedbench.yaml
lmms_eval/tasks/seedbench/seedbench_ko.yaml
lmms_eval/tasks/seedbench/seedbench_lite.yaml
lmms_eval/tasks/seedbench/seedbench_ppl.yaml
lmms_eval/tasks/seedbench/utils.py
lmms_eval/tasks/seedbench/reasoning/seedbench_reasoning.yaml
lmms_eval/tasks/seedbench/reasoning/utils.py
lmms_eval/tasks/seedbench_2/seedbench_2.yaml
lmms_eval/tasks/seedbench_2/utils.py
lmms_eval/tasks/seedbench_2_plus/seedbench_2_plus.yaml
lmms_eval/tasks/seedbench_2_plus/utils.py
lmms_eval/tasks/seedbench_2_plus/reasoning/seedbench_2_plus_reasoning.yaml
lmms_eval/tasks/seedbench_2_plus/reasoning/utils.py
lmms_eval/tasks/seephys/seephys.yaml
lmms_eval/tasks/seephys/seephys_evals.py
lmms_eval/tasks/seephys/seephys_utils.py
lmms_eval/tasks/simplevqa/simplevqa.yaml
lmms_eval/tasks/simplevqa/utils.py
lmms_eval/tasks/sitebench/merge_results.py
lmms_eval/tasks/sitebench/site_image.yaml
lmms_eval/tasks/sitebench/site_video.yaml
lmms_eval/tasks/sitebench/utils.py
lmms_eval/tasks/sitebench/multi_image_input/site_video_multiimage.yaml
lmms_eval/tasks/sitebench/multi_image_input/utils.py
lmms_eval/tasks/snsbench/metrics.py
lmms_eval/tasks/snsbench/snsbench.yaml
lmms_eval/tasks/snsbench/utils.py
lmms_eval/tasks/song_describer/song_describer.yaml
lmms_eval/tasks/song_describer/song_describer_train.yaml
lmms_eval/tasks/song_describer/song_describer_validation.yaml
lmms_eval/tasks/song_describer/utils.py
lmms_eval/tasks/sparbench/sparbench.yaml
lmms_eval/tasks/sparbench/sparbench_tiny.yaml
lmms_eval/tasks/sparbench/utils.py
lmms_eval/tasks/spatial457/__init__.py
lmms_eval/tasks/spatial457/spatial457.yaml
lmms_eval/tasks/spatial457/spatial457_l1_single.yaml
lmms_eval/tasks/spatial457/spatial457_l2_objects.yaml
lmms_eval/tasks/spatial457/spatial457_l3_2d_spatial.yaml
lmms_eval/tasks/spatial457/spatial457_l4_occ.yaml
lmms_eval/tasks/spatial457/spatial457_l4_pose.yaml
lmms_eval/tasks/spatial457/spatial457_l5_6d_spatial.yaml
lmms_eval/tasks/spatial457/spatial457_l5_collision.yaml
lmms_eval/tasks/spatial457/utils.py
lmms_eval/tasks/spatial_dise/spatial_dise.yaml
lmms_eval/tasks/spatial_dise/spatial_dise_separate.yaml
lmms_eval/tasks/spatial_dise/utils.py
lmms_eval/tasks/spatialtreebench/spatialtreebench.yaml
lmms_eval/tasks/spatialtreebench/spatree_hierarchy.json
lmms_eval/tasks/spatialtreebench/utils.py
lmms_eval/tasks/spatialtreebench/metrics/rule_metrics.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/parse_output.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/result_init.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/evaluator.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/cogmap/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/cogmap/cogmap_evaluator.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/cogmap/cogmap_metrics.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/cogmap/graph_operations.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/core/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/core/base_metrics.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/core/extractors.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/core/io_utils.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/utils/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/utils/io_utils.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/utils/spatial_utils.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/utils/text_utils.py
lmms_eval/tasks/spatialviz/spatialviz.yaml
lmms_eval/tasks/spatialviz/utils.py
lmms_eval/tasks/ssv2/ssv2.yaml
lmms_eval/tasks/ssv2/utils.py
lmms_eval/tasks/stare/stare.yaml
lmms_eval/tasks/stare/stare_2d_text_instruct.yaml
lmms_eval/tasks/stare/stare_2d_text_instruct_vsim.yaml
lmms_eval/tasks/stare/stare_2d_va.yaml
lmms_eval/tasks/stare/stare_2d_va_vsim.yaml
lmms_eval/tasks/stare/stare_3d_text_instruct.yaml
lmms_eval/tasks/stare/stare_3d_text_instruct_vsim.yaml
lmms_eval/tasks/stare/stare_3d_va.yaml
lmms_eval/tasks/stare/stare_3d_va_vsim.yaml
lmms_eval/tasks/stare/stare_folding_nets.yaml
lmms_eval/tasks/stare/stare_folding_nets_vsim.yaml
lmms_eval/tasks/stare/stare_perspective.yaml
lmms_eval/tasks/stare/stare_tangram_puzzle.yaml
lmms_eval/tasks/stare/stare_tangram_puzzle_vsim.yaml
lmms_eval/tasks/stare/stare_temporal.yaml
lmms_eval/tasks/stare/utils.py
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_age.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_emotions.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_event.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_gender.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_pitch.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_rhythm.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_scene.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_speed.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_vocalsound.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_voice_styles.yaml
lmms_eval/tasks/step2_audio_paralinguistic/step2_audio_paralinguistic_voice_tone.yaml
lmms_eval/tasks/step2_audio_paralinguistic/utils.py
lmms_eval/tasks/structeditbench/__init__.py
lmms_eval/tasks/structeditbench/structeditbench.yaml
lmms_eval/tasks/structeditbench/utils.py
lmms_eval/tasks/stvqa/stvqa.yaml
lmms_eval/tasks/stvqa/utils.py
lmms_eval/tasks/super_gpqa/super_gpqa.yaml
lmms_eval/tasks/super_gpqa/super_gpqa_multishot.yaml
lmms_eval/tasks/super_gpqa/utils.py
lmms_eval/tasks/synthdog/donut_evaluator.py
lmms_eval/tasks/synthdog/synthdog.yaml
lmms_eval/tasks/synthdog/synthdog_en.yaml
lmms_eval/tasks/synthdog/synthdog_zh.yaml
lmms_eval/tasks/synthdog/utils.py
lmms_eval/tasks/tau2_bench/tau2_bench_telecom.yaml
lmms_eval/tasks/tau2_bench/utils.py
lmms_eval/tasks/tedlium/tedlium_dev_test.yaml
lmms_eval/tasks/tedlium/tedlium_long_form.yaml
lmms_eval/tasks/tedlium/utils.py
lmms_eval/tasks/tempcompass/_tempcompass.yaml
lmms_eval/tasks/tempcompass/tempcompass_caption_matching.yaml
lmms_eval/tasks/tempcompass/tempcompass_captioning.yaml
lmms_eval/tasks/tempcompass/tempcompass_mc.yaml
lmms_eval/tasks/tempcompass/tempcompass_yes_no.yaml
lmms_eval/tasks/tempcompass/utils.py
lmms_eval/tasks/temporalbench/temporalbench.yaml
lmms_eval/tasks/temporalbench/temporalbench_long_qa.yaml
lmms_eval/tasks/temporalbench/temporalbench_short_caption.yaml
lmms_eval/tasks/temporalbench/temporalbench_short_qa.yaml
lmms_eval/tasks/temporalbench/utils.py
lmms_eval/tasks/textcaps/textcaps.yaml
lmms_eval/tasks/textcaps/textcaps_test.yaml
lmms_eval/tasks/textcaps/textcaps_train.yaml
lmms_eval/tasks/textcaps/textcaps_val.yaml
lmms_eval/tasks/textcaps/textcaps_val_lite.yaml
lmms_eval/tasks/textcaps/utils.py
lmms_eval/tasks/textvqa/_textvqa.yaml
lmms_eval/tasks/textvqa/textvqa_test.yaml
lmms_eval/tasks/textvqa/textvqa_val.yaml
lmms_eval/tasks/textvqa/textvqa_val_lite.yaml
lmms_eval/tasks/textvqa/utils.py
lmms_eval/tasks/timelens/timelens.yaml
lmms_eval/tasks/timelens/timelens_activitynet.yaml
lmms_eval/tasks/timelens/timelens_charades.yaml
lmms_eval/tasks/timelens/timelens_qvhighlights.yaml
lmms_eval/tasks/timelens/utils.py
lmms_eval/tasks/timescope/timescope.yaml
lmms_eval/tasks/timescope/utils.py
lmms_eval/tasks/tomato/tomato.yaml
lmms_eval/tasks/tomato/utils.py
lmms_eval/tasks/tvbench/tvbench.yaml
lmms_eval/tasks/tvbench/tvbench_action_antonym.yaml
lmms_eval/tasks/tvbench/tvbench_action_count.yaml
lmms_eval/tasks/tvbench/tvbench_action_localization.yaml
lmms_eval/tasks/tvbench/tvbench_action_sequence.yaml
lmms_eval/tasks/tvbench/tvbench_egocentric_sequence.yaml
lmms_eval/tasks/tvbench/tvbench_moving_direction.yaml
lmms_eval/tasks/tvbench/tvbench_object_count.yaml
lmms_eval/tasks/tvbench/tvbench_object_shuffle.yaml
lmms_eval/tasks/tvbench/tvbench_scene_transition.yaml
lmms_eval/tasks/tvbench/tvbench_unexpected_action.yaml
lmms_eval/tasks/tvbench/utils.py
lmms_eval/tasks/ueval/ueval.yaml
lmms_eval/tasks/ueval/utils.py
lmms_eval/tasks/uni_mmmu/__init__.py
lmms_eval/tasks/uni_mmmu/_default_template.yaml
lmms_eval/tasks/uni_mmmu/geometry.yaml
lmms_eval/tasks/uni_mmmu/jigsaw.yaml
lmms_eval/tasks/uni_mmmu/maze.yaml
lmms_eval/tasks/uni_mmmu/sliding.yaml
lmms_eval/tasks/uni_mmmu/uni_mmmu.yaml
lmms_eval/tasks/uni_mmmu/utils.py
lmms_eval/tasks/unig2u/unig2u.yaml
lmms_eval/tasks/unig2u/unig2u_GtA.yaml
lmms_eval/tasks/unig2u/auxsolidmath/auxsolidmath_easy.yaml
lmms_eval/tasks/unig2u/auxsolidmath/auxsolidmath_easy_visual_cot.yaml
lmms_eval/tasks/unig2u/auxsolidmath/utils.py
lmms_eval/tasks/unig2u/babyvision/babyvision.yaml
lmms_eval/tasks/unig2u/babyvision/babyvision_cot.yaml
lmms_eval/tasks/unig2u/babyvision/fine_grained_discrimination.yaml
lmms_eval/tasks/unig2u/babyvision/fine_grained_discrimination_visual_cot.yaml
lmms_eval/tasks/unig2u/babyvision/utils.py
lmms_eval/tasks/unig2u/babyvision/visual_tracking.yaml
lmms_eval/tasks/unig2u/babyvision/visual_tracking_visual_cot.yaml
lmms_eval/tasks/unig2u/chartqa100/chartqa100.yaml
lmms_eval/tasks/unig2u/chartqa100/chartqa100_visual_cot.yaml
lmms_eval/tasks/unig2u/chartqa100/utils.py
lmms_eval/tasks/unig2u/geometry3k/geometry3k.yaml
lmms_eval/tasks/unig2u/geometry3k/geometry3k_visual_cot.yaml
lmms_eval/tasks/unig2u/geometry3k/utils.py
lmms_eval/tasks/unig2u/illusionbench/arshia_utils.py
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_icon_scene_test.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_icon_scene_visual_cot.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_icon_shape_test.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_icon_shape_visual_cot.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_in_scene_test.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_in_scene_visual_cot.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_in_shape_test.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_in_shape_visual_cot.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_logo_scene_test.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_logo_scene_visual_cot.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_logo_shape_test.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_logo_shape_visual_cot.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_test.yaml
lmms_eval/tasks/unig2u/illusionbench/illusionbench_arshia_visual_cot_split.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_attribute_appr.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_attribute_appr_visual_cot.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_attribute_meas.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_attribute_meas_visual_cot.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_cot.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_motion_cam.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_motion_cam_visual_cot.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_motion_obj.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_motion_obj_visual_cot.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_msr.yaml
lmms_eval/tasks/unig2u/mmsi/mmsi_msr_visual_cot.yaml
lmms_eval/tasks/unig2u/mmsi/utils.py
lmms_eval/tasks/unig2u/phyx/phyx_cot.yaml
lmms_eval/tasks/unig2u/phyx/phyx_mechanics100.yaml
lmms_eval/tasks/unig2u/phyx/phyx_mechanics100_visual_cot.yaml
lmms_eval/tasks/unig2u/phyx/phyx_optics100.yaml
lmms_eval/tasks/unig2u/phyx/phyx_optics100_visual_cot.yaml
lmms_eval/tasks/unig2u/phyx/phyx_simple.yaml
lmms_eval/tasks/unig2u/phyx/utils.py
lmms_eval/tasks/unig2u/realunify/attentional_focusing.yaml
lmms_eval/tasks/unig2u/realunify/attentional_focusing_visual_cot.yaml
lmms_eval/tasks/unig2u/realunify/mental_reconstruction.yaml
lmms_eval/tasks/unig2u/realunify/mental_reconstruction_visual_cot.yaml
lmms_eval/tasks/unig2u/realunify/mental_tracking.yaml
lmms_eval/tasks/unig2u/realunify/mental_tracking_visual_cot.yaml
lmms_eval/tasks/unig2u/realunify/realunify.yaml
lmms_eval/tasks/unig2u/realunify/realunify_cot.yaml
lmms_eval/tasks/unig2u/realunify/utils.py
lmms_eval/tasks/unig2u/uni_mmmu/jigsaw100.yaml
lmms_eval/tasks/unig2u/uni_mmmu/jigsaw100_visual_cot.yaml
lmms_eval/tasks/unig2u/uni_mmmu/maze100.yaml
lmms_eval/tasks/unig2u/uni_mmmu/maze100_visual_cot.yaml
lmms_eval/tasks/unig2u/uni_mmmu/sliding54.yaml
lmms_eval/tasks/unig2u/uni_mmmu/sliding54_visual_cot.yaml
lmms_eval/tasks/unig2u/uni_mmmu/uni_mmmu.yaml
lmms_eval/tasks/unig2u/uni_mmmu/uni_mmmu_cot.yaml
lmms_eval/tasks/unig2u/uni_mmmu/utils.py
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_algorithmic.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_algorithmic_visual_cot.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_analogical.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_analogical_visual_cot.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_deductive.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_deductive_visual_cot.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_inductive.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_inductive_visual_cot.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_spatial.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_spatial_visual_cot.yaml
lmms_eval/tasks/unig2u/visualpuzzles/VisualPuzzles_visual_cot.yaml
lmms_eval/tasks/unig2u/visualpuzzles/utils.py
lmms_eval/tasks/unig2u/vsp/collision.yaml
lmms_eval/tasks/unig2u/vsp/collision_visual_cot.yaml
lmms_eval/tasks/unig2u/vsp/google_map.yaml
lmms_eval/tasks/unig2u/vsp/google_map_visual_cot.yaml
lmms_eval/tasks/unig2u/vsp/utils.py
lmms_eval/tasks/unig2u/vsp/vsp.yaml
lmms_eval/tasks/unig2u/vsp/vsp_cot.yaml
lmms_eval/tasks/vatex/_vatex.yaml
lmms_eval/tasks/vatex/utils.py
lmms_eval/tasks/vatex/vatex_test.yaml
lmms_eval/tasks/vatex/vatex_val_zh.yaml
lmms_eval/tasks/vbvr/__init__.py
lmms_eval/tasks/vbvr/utils.py
lmms_eval/tasks/vbvr/vbvr.yaml
lmms_eval/tasks/vbvr/vbvr_in_domain.yaml
lmms_eval/tasks/vbvr/vbvr_out_of_domain.yaml
lmms_eval/tasks/vbvr/vbvr_bench/__init__.py
lmms_eval/tasks/vbvr/vbvr_bench/utils.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/In_Domain_50_part1.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/In_Domain_50_part2.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/In_Domain_50_part3.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/In_Domain_50_part4.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/In_Domain_50_part5.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/Out_of_Domain_50_part1.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/Out_of_Domain_50_part2.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/Out_of_Domain_50_part3.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/Out_of_Domain_50_part4.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/Out_of_Domain_50_part5.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/__init__.py
lmms_eval/tasks/vbvr/vbvr_bench/evaluators/base_evaluator.py
lmms_eval/tasks/vcr_wiki/utils.py
lmms_eval/tasks/vcr_wiki/vcr_wiki_en_easy.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_en_easy_100.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_en_easy_500.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_en_hard.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_en_hard_100.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_en_hard_500.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_easy.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_easy_100.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_easy_500.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_hard.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_hard_100.yaml
lmms_eval/tasks/vcr_wiki/vcr_wiki_zh_hard_500.yaml
lmms_eval/tasks/vdc/background_test.yaml
lmms_eval/tasks/vdc/camera_test.yaml
lmms_eval/tasks/vdc/detailed_test.yaml
lmms_eval/tasks/vdc/main_object_test.yaml
lmms_eval/tasks/vdc/short_test.yaml
lmms_eval/tasks/vdc/utils.py
lmms_eval/tasks/vending_bench2/utils.py
lmms_eval/tasks/vending_bench2/vending_bench2.yaml
lmms_eval/tasks/vggsound/utils.py
lmms_eval/tasks/vggsound/vggsound.yaml
lmms_eval/tasks/vibe_eval/utils.py
lmms_eval/tasks/vibe_eval/vibe_eval.yaml
lmms_eval/tasks/video-tt/_default_template.yaml
lmms_eval/tasks/video-tt/gpt_utils.py
lmms_eval/tasks/video-tt/utils.py
lmms_eval/tasks/video-tt/videott_all.yaml
lmms_eval/tasks/video-tt/videott_all_audio.yaml
lmms_eval/tasks/video-tt/videott_correct_leading_oe.yaml
lmms_eval/tasks/video-tt/videott_no_leading_oe.yaml
lmms_eval/tasks/video-tt/videott_paraphrase_oe.yaml
lmms_eval/tasks/video-tt/videott_single_mc.yaml
lmms_eval/tasks/video-tt/videott_single_mc_description.yaml
lmms_eval/tasks/video-tt/videott_wrong_leading_oe.yaml
lmms_eval/tasks/video_detail_description/utils.py
lmms_eval/tasks/video_detail_description/video_detail_description.yaml
lmms_eval/tasks/video_holmes/utils.py
lmms_eval/tasks/video_holmes/video_holmes.yaml
lmms_eval/tasks/video_holmes/video_holmes_reasoning.yaml
lmms_eval/tasks/video_holmes/video_holmes_test.yaml
lmms_eval/tasks/videochatgpt/_videochatgpt.yaml
lmms_eval/tasks/videochatgpt/utils.py
lmms_eval/tasks/videochatgpt/videochatgpt_consistency.yaml
lmms_eval/tasks/videochatgpt/videochatgpt_generic.yaml
lmms_eval/tasks/videochatgpt/videochatgpt_temporal.yaml
lmms_eval/tasks/videoevalpro/utils.py
lmms_eval/tasks/videoevalpro/videoevalpro.yaml
lmms_eval/tasks/videomathqa/cot_postprocess.py
lmms_eval/tasks/videomathqa/cot_step_evaluation.py
lmms_eval/tasks/videomathqa/utils.py
lmms_eval/tasks/videomathqa/videomathqa_mbin.yaml
lmms_eval/tasks/videomathqa/videomathqa_mbin_cot.yaml
lmms_eval/tasks/videomathqa/videomathqa_mbin_cot_w_subtitle.yaml
lmms_eval/tasks/videomathqa/videomathqa_mbin_w_subtitle.yaml
lmms_eval/tasks/videomathqa/videomathqa_mcq.yaml
lmms_eval/tasks/videomathqa/videomathqa_mcq_cot.yaml
lmms_eval/tasks/videomathqa/videomathqa_mcq_cot_w_subtitle.yaml
lmms_eval/tasks/videomathqa/videomathqa_mcq_w_subtitle.yaml
lmms_eval/tasks/videomme/utils.py
lmms_eval/tasks/videomme/videomme.yaml
lmms_eval/tasks/videomme/videomme_long.yaml
lmms_eval/tasks/videomme/videomme_long_w_subtitle.yaml
lmms_eval/tasks/videomme/videomme_w_subtitle.yaml
lmms_eval/tasks/videomme/convert_mcq_oe/utils.py
lmms_eval/tasks/videomme/convert_mcq_oe/videomme_convert_mcq_oe.yaml
lmms_eval/tasks/videomme/gt_none_option/utils.py
lmms_eval/tasks/videomme/gt_none_option/videomme_gt_none_option.yaml
lmms_eval/tasks/videomme/no_visual/utils.py
lmms_eval/tasks/videomme/no_visual/videomme_no_visual.yaml
lmms_eval/tasks/videomme/number_option/utils.py
lmms_eval/tasks/videomme/number_option/videomme_number_option.yaml
lmms_eval/tasks/videomme/random_choice/utils.py
lmms_eval/tasks/videomme/random_choice/videomme_random_choice.yaml
lmms_eval/tasks/videomme/revert_oe_mcq/utils.py
lmms_eval/tasks/videomme/revert_oe_mcq/videomme_revert_oe_mcq.yaml
lmms_eval/tasks/videomme/video_only_abcd/utils.py
lmms_eval/tasks/videomme/video_only_abcd/videomme_video_only_abcd.yaml
lmms_eval/tasks/videomme_v2/utils.py
lmms_eval/tasks/videomme_v2/videomme_v2.yaml
lmms_eval/tasks/videomme_v2/videomme_v2_reasoning.yaml
lmms_eval/tasks/videomme_v2/videomme_v2_w_subtitle.yaml
lmms_eval/tasks/videommmu/adaptation.yaml
lmms_eval/tasks/videommmu/adaptation_question_only.yaml
lmms_eval/tasks/videommmu/comprehension.yaml
lmms_eval/tasks/videommmu/perception.yaml
lmms_eval/tasks/videommmu/utils.py
lmms_eval/tasks/videommmu/video_mmmu.yaml
lmms_eval/tasks/videommmu/gt_none_option/adaptation.yaml
lmms_eval/tasks/videommmu/gt_none_option/comprehension.yaml
lmms_eval/tasks/videommmu/gt_none_option/perception.yaml
lmms_eval/tasks/videommmu/gt_none_option/utils.py
lmms_eval/tasks/videommmu/no_visual/adaptation.yaml
lmms_eval/tasks/videommmu/no_visual/comprehension.yaml
lmms_eval/tasks/videommmu/no_visual/perception.yaml
lmms_eval/tasks/videommmu/no_visual/utils.py
lmms_eval/tasks/videommmu/number_option/adaptation.yaml
lmms_eval/tasks/videommmu/number_option/comprehension.yaml
lmms_eval/tasks/videommmu/number_option/perception.yaml
lmms_eval/tasks/videommmu/number_option/utils.py
lmms_eval/tasks/videommmu/random_choice/adaptation.yaml
lmms_eval/tasks/videommmu/random_choice/comprehension.yaml
lmms_eval/tasks/videommmu/random_choice/perception.yaml
lmms_eval/tasks/videommmu/random_choice/utils.py
lmms_eval/tasks/videonet/_binary.yaml
lmms_eval/tasks/videonet/_default.yaml
lmms_eval/tasks/videonet/_mcq.yaml
lmms_eval/tasks/videonet/binary_utils.py
lmms_eval/tasks/videonet/mcq_utils.py
lmms_eval/tasks/videonet/videonet_binary_0shot.yaml
lmms_eval/tasks/videonet/videonet_binary_1shot.yaml
lmms_eval/tasks/videonet/videonet_binary_2shot.yaml
lmms_eval/tasks/videonet/videonet_binary_3shot.yaml
lmms_eval/tasks/videonet/videonet_mcq_test.yaml
lmms_eval/tasks/videonet/videonet_mcq_val.yaml
lmms_eval/tasks/viewspatial/utils.py
lmms_eval/tasks/viewspatial/viewspatial.yaml
lmms_eval/tasks/vinoground/utils.py
lmms_eval/tasks/vinoground/vinoground.yaml
lmms_eval/tasks/visfactor/utils.py
lmms_eval/tasks/visfactor/visfactor.yaml
lmms_eval/tasks/visres_bench/_visres_bench.yaml
lmms_eval/tasks/visres_bench/_visres_bench_level_1.yaml
lmms_eval/tasks/visres_bench/_visres_bench_level_2.yaml
lmms_eval/tasks/visres_bench/_visres_bench_level_3.yaml
lmms_eval/tasks/visres_bench/utils.py
lmms_eval/tasks/visres_bench/visres_bench_level_1_blur.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_brightness.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_edges.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_edges_random_sampling.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_global_occlusion_50.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_global_occlusion_70.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_global_occlusion_80.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_location.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_location_random_sampling.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_rotation.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_1_rotation_random_sampling.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_color_2same_1diff.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_color_3_diff.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_count_2_same_1_diff.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_count_3_diff.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_count_arithmetic.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_count_minmax.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_count_progression.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_orientation_2same_1diff.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_orientation_3_diff.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_uniform_color.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_uniform_count.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_2_uniform_orientation.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_3_Independent_count_object_color.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_3_coupled_color_count.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_3_coupled_color_orientation.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_3_independent_color_object_rientation.yaml
lmms_eval/tasks/visres_bench/visres_bench_level_3_spiral_color_orientation.yaml
lmms_eval/tasks/visualwebbench/prompts.py
lmms_eval/tasks/visualwebbench/utils.py
lmms_eval/tasks/visualwebbench/visualwebbench_action_ground.yaml
lmms_eval/tasks/visualwebbench/visualwebbench_action_prediction.yaml
lmms_eval/tasks/visualwebbench/visualwebbench_element_ground.yaml
lmms_eval/tasks/visualwebbench/visualwebbench_element_ocr.yaml
lmms_eval/tasks/visualwebbench/visualwebbench_heading_ocr.yaml
lmms_eval/tasks/visualwebbench/visualwebbench_web_caption.yaml
lmms_eval/tasks/visualwebbench/visualwebbench_webqa.yaml
lmms_eval/tasks/visulogic/utils.py
lmms_eval/tasks/visulogic/visulogic.yaml
lmms_eval/tasks/vitatecs/_vitatecs.yaml
lmms_eval/tasks/vitatecs/utils.py
lmms_eval/tasks/vitatecs/vitatecs_compositionality.yaml
lmms_eval/tasks/vitatecs/vitatecs_direction.yaml
lmms_eval/tasks/vitatecs/vitatecs_intensity.yaml
lmms_eval/tasks/vitatecs/vitatecs_localization.yaml
lmms_eval/tasks/vitatecs/vitatecs_sequence.yaml
lmms_eval/tasks/vitatecs/vitatecs_type.yaml
lmms_eval/tasks/viverbench/utils.py
lmms_eval/tasks/viverbench/viverbench.yaml
lmms_eval/tasks/vizwiz_vqa/_generate_config.py
lmms_eval/tasks/vizwiz_vqa/_vizwiz_vqa.yaml
lmms_eval/tasks/vizwiz_vqa/utils.py
lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_test.yaml
lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val.yaml
lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val_lite.yaml
lmms_eval/tasks/vl_rewardbench/utils.py
lmms_eval/tasks/vl_rewardbench/vl_rewardbench.yaml
lmms_eval/tasks/vlms_are_biased/__init__.py
lmms_eval/tasks/vlms_are_biased/utils.py
lmms_eval/tasks/vlms_are_biased/vlms_are_biased.yaml
lmms_eval/tasks/vlmsareblind/__init__.py
lmms_eval/tasks/vlmsareblind/utils.py
lmms_eval/tasks/vlmsareblind/vlmsareblind.yaml
lmms_eval/tasks/vlmsareblind/vlmsareblind_lite.yaml
lmms_eval/tasks/vmcbench/utils.py
lmms_eval/tasks/vmcbench/vmcbench.yaml
lmms_eval/tasks/vocalsound/utils.py
lmms_eval/tasks/vocalsound/vocalsound_test.yaml
lmms_eval/tasks/vocalsound/vocalsound_val.yaml
lmms_eval/tasks/voicebench/utils.py
lmms_eval/tasks/voicebench/voicebench.yaml
lmms_eval/tasks/voicebench/voicebench_advbench.yaml
lmms_eval/tasks/voicebench/voicebench_alpacaeval.yaml
lmms_eval/tasks/voicebench/voicebench_bbh.yaml
lmms_eval/tasks/voicebench/voicebench_commoneval.yaml
lmms_eval/tasks/voicebench/voicebench_ifeval.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_biology.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_business.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_chemistry.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_economics.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_engineering.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_health.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_history.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_law.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_other.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_philosophy.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_physics.yaml
lmms_eval/tasks/voicebench/voicebench_mmsu_psychology.yaml
lmms_eval/tasks/voicebench/voicebench_openbookqa.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_aus.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_gbr.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_ind_n.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_ind_s.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_irl.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_kenya.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_nga.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_nzl.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_phl.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_usa.yaml
lmms_eval/tasks/voicebench/voicebench_sd-qa_zaf.yaml
lmms_eval/tasks/voicebench/voicebench_wildvoice.yaml
lmms_eval/tasks/voicebench/instruction_following_eval/__init__.py
lmms_eval/tasks/voicebench/instruction_following_eval/instructions.py
lmms_eval/tasks/voicebench/instruction_following_eval/instructions_registry.py
lmms_eval/tasks/voicebench/instruction_following_eval/instructions_util.py
lmms_eval/tasks/voxpopuli/utils.py
lmms_eval/tasks/voxpopuli/voxpopuli.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_cs.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_de.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_en.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_en_accented.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_es.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_et.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_fi.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_fr.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_hr.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_hu.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_it.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_lt.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_nl.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_pl.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_ro.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_sk.yaml
lmms_eval/tasks/voxpopuli/voxpopuli_sl.yaml
lmms_eval/tasks/vpct/utils.py
lmms_eval/tasks/vpct/vpct.yaml
lmms_eval/tasks/vqav2/_vqav2.yaml
lmms_eval/tasks/vqav2/utils.py
lmms_eval/tasks/vqav2/vqav2_test.yaml
lmms_eval/tasks/vqav2/vqav2_val.yaml
lmms_eval/tasks/vqav2/vqav2_val_lite.yaml
lmms_eval/tasks/vsibench/utils.py
lmms_eval/tasks/vsibench/vsibench.yaml
lmms_eval/tasks/vsibench/vsibench_debiased.yaml
lmms_eval/tasks/vsibench/vsibench_pruned.yaml
lmms_eval/tasks/vsibench/multi_image_input/utils.py
lmms_eval/tasks/vsibench/multi_image_input/vsibench_debiased_multiimage.yaml
lmms_eval/tasks/vsibench/multi_image_input/vsibench_multiimage.yaml
lmms_eval/tasks/vsisuper/count/vsc_10mins.yaml
lmms_eval/tasks/vsisuper/count/vsc_120mins.yaml
lmms_eval/tasks/vsisuper/count/vsc_30mins.yaml
lmms_eval/tasks/vsisuper/count/vsc_60mins.yaml
lmms_eval/tasks/vsisuper/count/vsc_utils.py
lmms_eval/tasks/vsisuper/count_streaming/vsc_streaming_10mins.yaml
lmms_eval/tasks/vsisuper/count_streaming/vsc_streaming_120mins.yaml
lmms_eval/tasks/vsisuper/count_streaming/vsc_streaming_30mins.yaml
lmms_eval/tasks/vsisuper/count_streaming/vsc_streaming_60mins.yaml
lmms_eval/tasks/vsisuper/count_streaming/vsc_streaming_utils.py
lmms_eval/tasks/vsisuper/recall/vsr_10mins.yaml
lmms_eval/tasks/vsisuper/recall/vsr_120mins.yaml
lmms_eval/tasks/vsisuper/recall/vsr_240mins.yaml
lmms_eval/tasks/vsisuper/recall/vsr_30mins.yaml
lmms_eval/tasks/vsisuper/recall/vsr_60mins.yaml
lmms_eval/tasks/vsisuper/recall/vsr_utils.py
lmms_eval/tasks/vstar_bench/__init__.py
lmms_eval/tasks/vstar_bench/utils.py
lmms_eval/tasks/vstar_bench/vstar_bench.yaml
lmms_eval/tasks/vstar_bench/vstar_bench_direct_attributes.yaml
lmms_eval/tasks/vstar_bench/vstar_bench_relative_position.yaml
lmms_eval/tasks/vstar_bench/reasoning/utils.py
lmms_eval/tasks/vstar_bench/reasoning/vstar_bench_direct_attributes_reasoning.yaml
lmms_eval/tasks/vstar_bench/reasoning/vstar_bench_reasoning.yaml
lmms_eval/tasks/vstar_bench/reasoning/vstar_bench_relative_position_reasoning.yaml
lmms_eval/tasks/vstat/utils.py
lmms_eval/tasks/vstat/vstat.yaml
lmms_eval/tasks/wavcaps/utils.py
lmms_eval/tasks/wavcaps/wavcaps.yaml
lmms_eval/tasks/websrc/utils.py
lmms_eval/tasks/websrc/websrc.yaml
lmms_eval/tasks/websrc/websrc_test.yaml
lmms_eval/tasks/websrc/websrc_val.yaml
lmms_eval/tasks/wemath/wemath_utils.py
lmms_eval/tasks/wemath/reasoning/utils.py
lmms_eval/tasks/wemath/reasoning/wemath_testmini_thinking.yaml
lmms_eval/tasks/wenet_speech/utils.py
lmms_eval/tasks/wenet_speech/wenet_speech.yaml
lmms_eval/tasks/wenet_speech/wenet_speech_dev.yaml
lmms_eval/tasks/wenet_speech/wenet_speech_test_meeting.yaml
lmms_eval/tasks/wenet_speech/wenet_speech_test_net.yaml
lmms_eval/tasks/where2place/utils.py
lmms_eval/tasks/where2place/where2place.yaml
lmms_eval/tasks/wild_vision_bench/utils.py
lmms_eval/tasks/wild_vision_bench/wild_vision_bench0617.yaml
lmms_eval/tasks/wild_vision_bench/wild_vision_bench0630.yaml
lmms_eval/tasks/wild_vision_bench/wildvision_bench.yaml
lmms_eval/tasks/wm_abench/utils.py
lmms_eval/tasks/wm_abench/wm_abench.yaml
lmms_eval/tasks/wm_abench/wm_abench_compositionality_maniskill_lift_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_compositionality_maniskill_push_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_compositionality_maniskill_rotate_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_compositionality_tdw_two_sphere_2_14_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_discretecounting_tdw_top_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_mechanisticknowledge_maniskill_drop_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_mechanisticknowledge_maniskill_lift_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_mechanisticknowledge_maniskill_push_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_mechanisticknowledge_maniskill_rotate_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_mechanisticknowledge_physion_type_collision_v2_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_mechanisticknowledge_physion_type_drop_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_mechanisticknowledge_physion_type_slide_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_motiondirection_maniskill_subtask_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_motiondirection_maniskill_towards_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_motiondirection_tdw_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_motiondirection_tdw_subtask_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_motionspeed_maniskill_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_motionspeed_tdw_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_motiontrajectory_maniskill_path_choices_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_motiontrajectory_path_choices_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_multiview_maniskill_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_multiview_tdw_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_predictionnextstate_carla_next_state_prediction_02_08_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_quantitycontinuous_tdw_continuous_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_relativecounting_maniskill_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_relativecounting_tdw_color_letter_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_relativeposition_front_top_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_relativeposition_front_top_tdw_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_spatialoccupancy_maniskill_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_spatialoccupancy_occupancy_fitting_tdw_top_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_spatialoccupancy_occupancy_size_tdw_front_large_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_spatialoccupancy_volume_object_only_unfloat_top_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_speed_tdw_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_temporalextension_temporal_tdw_yjx_01_05_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_temporalpositioning_maniskill_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_transitivity_habitat_lab_trans_ver_2_noback_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_visualattribute_color_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_visualattribute_color_tdw_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_visualattribute_material_diff_tdw_subset.yaml
lmms_eval/tasks/wm_abench/wm_abench_visualattribute_shape_tdw_subset.yaml
lmms_eval/tasks/worldqa/utils.py
lmms_eval/tasks/worldqa/worldqa.yaml
lmms_eval/tasks/worldqa/worldqa_generation.yaml
lmms_eval/tasks/worldqa/worldqa_mc.yaml
lmms_eval/tasks/worldqa/worldqa_mc_evaluator.py
lmms_eval/tasks/worldqa/worldqa_mcppl.yaml
lmms_eval/tasks/worldsense/utils.py
lmms_eval/tasks/worldsense/worldsense.yaml
lmms_eval/tasks/worldsense/worldsense_w_subtitle.yaml
lmms_eval/tasks/worldvqa/utils.py
lmms_eval/tasks/worldvqa/worldvqa.yaml
lmms_eval/tasks/worldvqa/worldvqa_generation.yaml
lmms_eval/tasks/worldvqa/worldvqa_mc.yaml
lmms_eval/tasks/worldvqa/worldvqa_mcppl.yaml
lmms_eval/tasks/xlrs/XLRS-lite.yaml
lmms_eval/tasks/xlrs/mcq_utils.py
lmms_eval/tasks/xmod_bench/build_data.py
lmms_eval/tasks/xmod_bench/make_lite.py
lmms_eval/tasks/xmod_bench/subsample_perception.py
lmms_eval/tasks/xmod_bench/summarize.py
lmms_eval/tasks/xmod_bench/utils.py
lmms_eval/tasks/xmod_bench/xmod_bench.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_audio_image.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_audio_text.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_audio_video.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_image_audio.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_image_text.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_lite.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_lite_a2t.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_lite_a2v.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_lite_t2a.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_lite_t2v.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_lite_v2a.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_lite_v2t.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_text_audio.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_text_image.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_text_video.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_video_audio.yaml
lmms_eval/tasks/xmod_bench/xmod_bench_video_text.yaml
lmms_eval/tasks/youcook2/utils.py
lmms_eval/tasks/youcook2/youcook2_val.yaml
lmms_eval/tasks/zerobench/utils.py
lmms_eval/tasks/zerobench/zerobench.yaml
lmms_eval/tui/__init__.py
lmms_eval/tui/cli.py
lmms_eval/tui/discovery.py
lmms_eval/tui/server.py
lmms_eval/tui/web/package-lock.json
lmms_eval/tui/web/package.json
lmms_eval/tui/web/tsconfig.json
lmms_eval/tui/web/dist/favicon.svg
lmms_eval/tui/web/dist/index.html
lmms_eval/tui/web/dist/assets/index-BLeQKQAq.js
lmms_eval/tui/web/dist/assets/index-BMub-jBn.css