Jimin Huang commited on
Commit
52f2f47
1 Parent(s): c218da6

feature: add auto evaluation tab

Browse files
Files changed (1) hide show
  1. leaderboard_auto.csv +2 -0
leaderboard_auto.csv CHANGED
@@ -4,3 +4,5 @@ chatgpt,0.78,0.78,0.0,,,0.77,0.77,0.58,0.53,-0.025,0,0.5,0.005,0,0.55,0.01,0,,,,
4
  GPT-4,0.76,0.78,0.0,,,0.86,0.83,0.63,0.54,0.03,0,0.52,0.02,0,0.57,0.01,0,,,,,,,,,,,
5
  FinMA-7B,0.86,0.86,0.0,0.84,0.0,0.98,0.75,0.06,0.48,0.04,0.0,0.5,0.0,0,0.56,-0.02,0.0,0.45,0.58,0.24,0,0,0,0,1,0,0,1
6
  FinMA-7B-full,0.88,0.88,0.0,0.83,0,0.97,0.67,0.06,0.51,0.06,0.0,0.52,0.03,0.0,0.52,0.04,0.0,0.47,0.61,0.24,0,0,0,0,1,0,0,1
 
 
 
4
  GPT-4,0.76,0.78,0.0,,,0.86,0.83,0.63,0.54,0.03,0,0.52,0.02,0,0.57,0.01,0,,,,,,,,,,,
5
  FinMA-7B,0.86,0.86,0.0,0.84,0.0,0.98,0.75,0.06,0.48,0.04,0.0,0.5,0.0,0,0.56,-0.02,0.0,0.45,0.58,0.24,0,0,0,0,1,0,0,1
6
  FinMA-7B-full,0.88,0.88,0.0,0.83,0,0.97,0.67,0.06,0.51,0.06,0.0,0.52,0.03,0.0,0.52,0.04,0.0,0.47,0.61,0.24,0,0,0,0,1,0,0,1
7
+ Baichuan-7B,0.01,0.02,0.99,0.55,0.03,0.76,0.16,0,0.34,0,0.38,0.32,0,0.38,0.15,0,0.73,0.01,0.06,0.98,0,0,0,0,1,0,0,1
8
+ llama-2-7b-chat,0.05,0.12,0.89,0.61,0.09,0.57,0.12,0,0.42,0.05,0.26,0.41,0,0.19,0.31,0.01,0.48,0.17,0.27,0.39,0.03,0.0,0,0,1,0,0,1