from dataclasses import dataclass from enum import Enum @dataclass class Task: benchmark: str metric: str col_name: str # Select your tasks here # --------------------------------------------------- class Tasks(Enum): # task_key in the json file, metric_key in the json file, name to display in the leaderboard task0 = Task("anli_r1", "acc", "ANLI") task1 = Task("logiqa", "acc_norm", "LogiQA") NUM_FEWSHOT = 0 # Change with your few shot # --------------------------------------------------- # Your leaderboard name TITLE = """

MMIE

""" # MJB_LOGO = 'Logo' # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ # MMIE: Massive Multimodal Interleaved Comprehension Benchmark for Large Vision-Language Models We present MMIE, a Massive Multimodal Interleaved understanding Evaluation benchmark, designed for Large Vision-Language Models (LVLMs). MMIE offers a robust framework for evaluating the interleaved comprehension and generation capabilities of LVLMs across diverse fields, supported by reliable automated metrics. [Website](https://mmie-bench.github.io) | [Code](https://github.com/Lillianwei-h/MMIE) | [Dataset](https://huggingface.co/datasets/MMIE/MMIE) | [Results](https://huggingface.co/spaces/MMIE/Leaderboard) | [Evaluation Model](https://huggingface.co/MMIE/MMIE-Score) | [Paper](https://arxiv.org/abs/2410.10139) """ # Which evaluations are you running? how can people reproduce what you have? LLM_BENCHMARKS_TEXT = f""" """ EVALUATION_QUEUE_TEXT = """ """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r""" @article{xia2024mmie, title={MMIE: Massive Multimodal Interleaved Comprehension Benchmark for Large Vision-Language Models}, author={Xia, Peng and Han, Siwei and Qiu, Shi and Zhou, Yiyang and Wang, Zhaoyang and Zheng, Wenhao and Chen, Zhaorun and Cui, Chenhang and Ding, Mingyu and Li, Linjie and Wang, Lijuan and Yao, Huaxiu}, journal={arXiv preprint arXiv:2410.10139}, year={2024} } """ ABOUT_TEXT = """ """