import json import re import gradio as gr import numpy import pandas as pd from src.display.css_html_js import custom_css from src.about import ( INTRODUCTION_TEXT, TITLE, AUTHORS, ) from src.display.formatting import make_clickable_model demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") NUMBER_OF_QUESTIONS = 171.0 # load dataframe from csv # leaderboard_df = pd.read_csv("benchmark_results.csv") leaderboard_df = [] with open("benchmark_results.csv", "r") as f: header = f.readline().strip().split(",") header = [h.strip() for h in header] for i, line in enumerate(f): leaderboard_df.append(line.strip().split(",", 13)) metadata = json.load(open('metadata.json')) for k, v in list(metadata.items()): metadata[k.split(",")[0]] = v # create dataframe from list and header leaderboard_df = pd.DataFrame(leaderboard_df, columns=header) # filter column with value eq-bench_v2_pl print(header) leaderboard_df = leaderboard_df[(leaderboard_df["Benchmark Version"] == "eq-bench_v2_pl") | ( leaderboard_df["Benchmark Version"] == 'eq-bench_pl')] # fix: ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all(). # leave only defined columns leaderboard_df = leaderboard_df[["Model Path", "Benchmark Score", "Num Questions Parseable", "Error"]] # create new column with model name def parse_parseable(x): if x["Num Questions Parseable"] == 'FAILED': m = re.match(r'(\d+)\.0 questions were parseable', x["Error"]) return m.group(1) return x["Num Questions Parseable"] leaderboard_df["Num Questions Parseable"] = leaderboard_df[["Num Questions Parseable", "Error"]].apply( lambda x: parse_parseable(x), axis=1) def fraction_to_percentage(numerator: float, denominator: float) -> float: return (numerator / denominator) * 100 leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].apply(lambda x: fraction_to_percentage(float(x), NUMBER_OF_QUESTIONS)) def get_params(model_name): if model_name in metadata: return metadata[model_name] else: print(model_name) return numpy.nan leaderboard_df["Params"] = leaderboard_df["Model Path"].apply(lambda x: get_params(x)) # move column order leaderboard_df = leaderboard_df[["Model Path", "Params", "Benchmark Score", "Num Questions Parseable", 'Error']] leaderboard_df["Model Path"] = leaderboard_df["Model Path"].apply(lambda x: make_clickable_model(x)) # change value of column to nan leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].replace('FAILED', numpy.nan) #scale Benchmark Score by Num Questions Parseable*100 leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) * ((leaderboard_df["Num Questions Parseable"].astype(float) / 100)) # set datatype of column leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].astype(float) # set nan if value of column is less than 0 leaderboard_df.loc[leaderboard_df["Benchmark Score"] < 0, "Benchmark Score"] = 0 # sort by 2 columns leaderboard_df = leaderboard_df.sort_values(by=["Benchmark Score", "Num Questions Parseable"], ascending=[False, False]) # rename columns leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model"}) leaderboard_df = leaderboard_df.rename(columns={"Num Questions Parseable": "Percentage Questions Parseable"}) leaderboard_df_styled = leaderboard_df.style.background_gradient(cmap="RdYlGn") leaderboard_df_styled = leaderboard_df_styled.background_gradient(cmap="RdYlGn_r", subset=['Params']) rounding = {} # for col in ["Benchmark Score", "Num Questions Parseable"]: rounding["Benchmark Score"] = "{:.2f}" rounding["Percentage Questions Parseable"] = "{:.2f}" rounding["Params"] = "{:.0f}" leaderboard_df_styled = leaderboard_df_styled.format(rounding) leaderboard_table = gr.components.Dataframe( value=leaderboard_df_styled, # headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value, datatype=['markdown', 'number', 'number', 'number', 'str'], elem_id="leaderboard-table", interactive=False, visible=True, ) gr.Markdown(AUTHORS, elem_classes="markdown-text") demo.queue(default_concurrency_limit=40).launch()