Jimin Huang commited on
Commit
eae5636
1 Parent(s): b99a4dc

feature: add auto evaluation tab

Browse files
Files changed (1) hide show
  1. app.py.bak +95 -0
app.py.bak ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from apscheduler.schedulers.background import BackgroundScheduler
2
+ import gradio as gr
3
+ import pandas as pd
4
+
5
+ # Load leaderboard data
6
+ leaderboard_df = pd.read_csv('leaderboard.csv', header=None)
7
+ print (leaderboard_df)
8
+ leaderboard_auto_df = pd.read_csv('leaderboard_auto.csv', header=None)
9
+
10
+ # Constants
11
+ TITLE = "Financial Natural Language Understanding and Prediction Evaluation Benchmark (FLARE) Leaderboard"
12
+ INTRODUCTION_TEXT = "The leaderboard shows the performance of various models in financial natural language understanding and prediction tasks."
13
+
14
+
15
+ COLS = [
16
+ ("Model", "str"),
17
+ ("FPB-acc", "number"),
18
+ ("FPB-F1", "number"),
19
+ ("FiQA-SA-F1", "number"),
20
+ ("Headline-AvgF1", "number"),
21
+ ("NER-EntityF1", "number"),
22
+ ("FinQA-EmAcc", "number"),
23
+ ("ConvFinQA-EmAcc", "number"),
24
+ ("BigData22-Acc", "number"),
25
+ ("BigData22-MCC", "number"),
26
+ ("ACL18-Acc", "number"),
27
+ ("ACL18-MCC", "number"),
28
+ ("CIKM18-Acc", "number"),
29
+ ("CIKM18-MCC", "number")
30
+ ]
31
+
32
+ COLS_AUTO = [
33
+ ("Model", "str"),
34
+ ("FPB-acc", "number"),
35
+ ("FPB-F1", "number"),
36
+ ("FPB-missing", "number"),
37
+ ("FiQA-SA-F1", "number"),
38
+ ("FiQA-SA-missing", "number"),
39
+ ("Headline-AvgF1", "number"),
40
+ ("NER-EntityF1", "number"),
41
+ ("FinQA-EmAcc", "number"),
42
+ ("BigData22-Acc", "number"),
43
+ ("BigData22-MCC", "number"),
44
+ ("BigData22-missing", "number"),
45
+ ("ACL18-Acc", "number"),
46
+ ("ACL18-MCC", "number"),
47
+ ("ACL18-missing", "number"),
48
+ ("CIKM18-Acc", "number"),
49
+ ("CIKM18-MCC", "number"),
50
+ ("CIKM18-missing", "number"),
51
+ ("FOMC-acc", "number"),
52
+ ("FOMC-F1", "number"),
53
+ ("FOMC-missing", "number"),
54
+ ("FinerOrd-EntityF1", "number"),
55
+ ("FinerOrd-F1", "number"),
56
+ ("German-Acc", "number"),
57
+ ("German-MCC", "number"),
58
+ ("German-missing", "number"),
59
+ ("Australian-Acc", "number"),
60
+ ("Australian-MCC", "number"),
61
+ ("Australian-missing", "number")
62
+ ]
63
+
64
+ TYPES = [col_type for _, col_type in COLS]
65
+ TYPES_AUTO = [col_type for _, col_type in COLS_AUTO]
66
+
67
+ def create_leaderboard_table(df, headers, types):
68
+ return gr.components.Dataframe(
69
+ value=df.values.tolist(),
70
+ headers=[col_name for col_name, _ in headers],
71
+ datatype=types,
72
+ max_rows=10,
73
+ )
74
+
75
+ def launch_gradio():
76
+ demo = gr.Blocks()
77
+
78
+ with demo:
79
+ gr.HTML(TITLE)
80
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
81
+
82
+ # Create tabs for self-hosted and automatic evaluation
83
+ with gr.Tab("Self-hosted evaluation"):
84
+ lt = create_leaderboard_table(leaderboard_df, COLS, TYPES)
85
+ with gr.Tab("Automatic evaluation"):
86
+ lat = create_leaderboard_table(leaderboard_auto_df, COLS_AUTO, TYPES_AUTO)
87
+
88
+ demo.launch()
89
+
90
+ scheduler = BackgroundScheduler()
91
+ scheduler.add_job(launch_gradio, "interval", seconds=3600)
92
+ scheduler.start()
93
+
94
+ # Launch immediately
95
+ launch_gradio()