from apscheduler.schedulers.background import BackgroundScheduler import gradio as gr import pandas as pd COLS = [ ("Model", "str"), ("FPB-acc", "number"), ("FPB-F1", "number"), ("FiQA-SA-F1", "number"), ("Headline-AvgF1", "number"), ("NER-EntityF1", "number"), ("FinQA-EmAcc", "number"), ("ConvFinQA-EmAcc", "number"), ("BigData22-Acc", "number"), ("BigData22-MCC", "number"), ("ACL18-Acc", "number"), ("ACL18-MCC", "number"), ("CIKM18-Acc", "number"), ("CIKM18-MCC", "number") ] COLS_AUTO = [ ("Model", "str"), ("FPB-acc", "number"), ("FPB-F1", "number"), ("FPB-missing", "number"), ("FiQA-SA-F1", "number"), ("FiQA-SA-missing", "number"), ("Headline-AvgF1", "number"), ("NER-EntityF1", "number"), ("FinQA-EmAcc", "number"), ("BigData22-Acc", "number"), ("BigData22-MCC", "number"), ("BigData22-missing", "number"), ("ACL18-Acc", "number"), ("ACL18-MCC", "number"), ("ACL18-missing", "number"), ("CIKM18-Acc", "number"), ("CIKM18-MCC", "number"), ("CIKM18-missing", "number"), ("FOMC-acc", "number"), ("FOMC-F1", "number"), ("FOMC-missing", "number"), ("FinerOrd-EntityF1", "number"), ("FinerOrd-F1", "number"), ("German-Acc", "number"), ("German-MCC", "number"), ("German-missing", "number"), ("Australian-Acc", "number"), ("Australian-MCC", "number"), ("Australian-missing", "number") ] # Load leaderboard data leaderboard_df = pd.read_csv('leaderboard.csv', header=None) print (leaderboard_df) leaderboard_auto_df = pd.read_csv('leaderboard_auto.csv', header=None) # Constants TITLE = "Financial Natural Language Understanding and Prediction Evaluation Benchmark (FLARE) Leaderboard" INTRODUCTION_TEXT = "The leaderboard shows the performance of various models in financial natural language understanding and prediction tasks." COLS = [ ("Model", "str"), ("FPB-acc", "number"), ("FPB-F1", "number"), ("FiQA-SA-F1", "number"), ("Headline-AvgF1", "number"), ("NER-EntityF1", "number"), ("FinQA-EmAcc", "number"), ("ConvFinQA-EmAcc", "number"), ("BigData22-Acc", "number"), ("BigData22-MCC", "number"), ("ACL18-Acc", "number"), ("ACL18-MCC", "number"), ("CIKM18-Acc", "number"), ("CIKM18-MCC", "number") ] COLS_AUTO = [ ("Model", "str"), ("FPB-acc", "number"), ("FPB-F1", "number"), ("FPB-missing", "number"), ("FiQA-SA-F1", "number"), ("FiQA-SA-missing", "number"), ("Headline-AvgF1", "number"), ("NER-EntityF1", "number"), ("FinQA-EmAcc", "number"), ("BigData22-Acc", "number"), ("BigData22-MCC", "number"), ("BigData22-missing", "number"), ("ACL18-Acc", "number"), ("ACL18-MCC", "number"), ("ACL18-missing", "number"), ("CIKM18-Acc", "number"), ("CIKM18-MCC", "number"), ("CIKM18-missing", "number"), ("FOMC-acc", "number"), ("FOMC-F1", "number"), ("FOMC-missing", "number"), ("FinerOrd-EntityF1", "number"), ("FinerOrd-F1", "number"), ("German-Acc", "number"), ("German-MCC", "number"), ("German-missing", "number"), ("Australian-Acc", "number"), ("Australian-MCC", "number"), ("Australian-missing", "number") ] TYPES = [col_type for _, col_type in COLS] TYPES_AUTO = [col_type for _, col_type in COLS_AUTO] def create_leaderboard_table(df, headers, types): return gr.components.Dataframe( value=df.values.tolist(), headers=[col_name for col_name, _ in headers], datatype=types, max_rows=10, ) def launch_gradio(): demo = gr.Blocks() with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") # Create tabs for self-hosted and automatic evaluation with gr.Tab("Self-hosted evaluation"): lt = create_leaderboard_table(leaderboard_df, COLS, TYPES) with gr.Tab("Automatic evaluation"): lat = create_leaderboard_table(leaderboard_auto_df, COLS_AUTO, TYPES_AUTO) demo.launch() scheduler = BackgroundScheduler() scheduler.add_job(launch_gradio, "interval", seconds=3600) scheduler.start() # Launch immediately launch_gradio()