File size: 2,877 Bytes
71b62ee
 
 
 
aadfcea
98a40e2
22e8484
71b62ee
aadfcea
89d0d4b
 
71b62ee
22e8484
129288f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22e8484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129288f
22e8484
 
 
 
 
 
 
 
 
129288f
a79e05b
 
 
71b62ee
8752ce9
 
 
 
22e8484
 
 
 
8752ce9
 
 
 
 
 
 
22e8484
 
71b62ee
 
 
 
 
aadfcea
71b62ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from apscheduler.schedulers.background import BackgroundScheduler
import gradio as gr
import pandas as pd

# Load leaderboard data
leaderboard_df = pd.read_csv('leaderboard.csv')
leaderboard_auto_df = pd.read_csv('leaderboard_auto.csv')

# Constants
TITLE = "Financial Natural Language Understanding and Prediction Evaluation Benchmark (FLARE) Leaderboard"
INTRODUCTION_TEXT = "The leaderboard shows the performance of various models in financial natural language understanding and prediction tasks."


COLS = [
    ("Model", "str"),
    ("FPB-acc", "number"),
    ("FPB-F1", "number"),
    ("FiQA-SA-F1", "number"),
    ("Headline-AvgF1", "number"),
    ("NER-EntityF1", "number"),
    ("FinQA-EmAcc", "number"),
    ("ConvFinQA-EmAcc", "number"),
    ("BigData22-Acc", "number"),
    ("BigData22-MCC", "number"),
    ("ACL18-Acc", "number"),
    ("ACL18-MCC", "number"),
    ("CIKM18-Acc", "number"),
    ("CIKM18-MCC", "number")
]

COLS_AUTO = [
    ("Model", "str"),
    ("FPB-acc", "number"),
    ("FPB-F1", "number"),
    ("FPB-missing", "number"),
    ("FiQA-SA-F1", "number"),
    ("FiQA-SA-missing", "number"),
    ("Headline-AvgF1", "number"),
    ("NER-EntityF1", "number"),
    ("FinQA-EmAcc", "number"),
    ("BigData22-Acc", "number"),
    ("BigData22-MCC", "number"),
    ("BigData22-missing", "number"),
    ("ACL18-Acc", "number"),
    ("ACL18-MCC", "number"),
    ("ACL18-missing", "number"),
    ("CIKM18-Acc", "number"),
    ("CIKM18-MCC", "number"),
    ("CIKM18-missing", "number"),
    ("FOMC-acc", "number"),
    ("FOMC-F1", "number"),
    ("FOMC-missing", "number"),
    ("FinerOrd-EntityF1", "number"),
    ("FinerOrd-F1", "number"),
    ("German-Acc", "number"),
    ("German-MCC", "number"),
    ("German-missing", "number"),
    ("Australian-Acc", "number"),
    ("Australian-MCC", "number"),
    ("Australian-missing", "number")
]

TYPES = [col_type for _, col_type in COLS]
TYPES_AUTO = [col_type for _, col_type in COLS_AUTO]

def create_leaderboard_table(df, headers, types):
    return gr.components.Dataframe(
        value=df.values.tolist(),
        headers=[col_name for col_name, _ in headers],
        datatype=types,
        max_rows=10,
    )

def dummy_fn():
    pass

def launch_gradio():
    iface = gr.Interface(
        fn=dummy_fn,
        layout="tab",
        inputs=None,
        outputs=None,
        title=TITLE,
        description=INTRODUCTION_TEXT,
        tabs=["Self-hosted evaluation", "Automatic evaluation"],
    )

    with iface.tab("Self-hosted evaluation"):
        create_leaderboard_table(leaderboard_df, COLS, TYPES)

    with iface.tab("Automatic evaluation"):
        create_leaderboard_table(leaderboard_auto_df, COLS_AUTO, TYPES_AUTO)

    iface.launch()

scheduler = BackgroundScheduler()
scheduler.add_job(launch_gradio, "interval", seconds=3600)
scheduler.start()

# Launch immediately
launch_gradio()