Andrea Seveso commited on
Commit
48d0e61
1 Parent(s): e6aadde

Add claude-3.5-sonnet

Browse files
Files changed (2) hide show
  1. src/macro_area.csv +1 -0
  2. src/question_format.csv +1 -0
src/macro_area.csv CHANGED
@@ -6,6 +6,7 @@ Minerva-3B-base-v1.0,4.6,3.9,9.1,28.6,3.4,4.2,0.0,5.3,0.0
6
  claude-3-haiku,78.7,86.0,75.8,71.4,65.5,62.5,0.0,57.9,83.3
7
  claude-3-opus,91.7,91.6,78.8,100.0,82.8,75.0,50.0,89.5,83.3
8
  claude-3-sonnet,87.0,90.5,75.8,100.0,62.1,75.0,0.0,52.6,100.0
 
9
  command-r-plus,74.1,80.4,81.8,71.4,65.5,66.7,0.0,57.9,83.3
10
  gemini-flash-1.5,83.3,85.5,81.8,85.7,62.1,83.3,25.0,63.2,66.7
11
  gemini-pro,78.7,82.1,81.8,71.4,51.7,70.8,0.0,68.4,66.7
 
6
  claude-3-haiku,78.7,86.0,75.8,71.4,65.5,62.5,0.0,57.9,83.3
7
  claude-3-opus,91.7,91.6,78.8,100.0,82.8,75.0,50.0,89.5,83.3
8
  claude-3-sonnet,87.0,90.5,75.8,100.0,62.1,75.0,0.0,52.6,100.0
9
+ claude-3.5-sonnet:beta,92.6,95.0,84.8,100.0,93.1,87.5,25.0,94.7,83.3
10
  command-r-plus,74.1,80.4,81.8,71.4,65.5,66.7,0.0,57.9,83.3
11
  gemini-flash-1.5,83.3,85.5,81.8,85.7,62.1,83.3,25.0,63.2,66.7
12
  gemini-pro,78.7,82.1,81.8,71.4,51.7,70.8,0.0,68.4,66.7
src/question_format.csv CHANGED
@@ -6,6 +6,7 @@ Minerva-3B-base-v1.0,0.0,0.0,0.0,13.3,0.0,0.0,0.0,0.0,0.0,0.0,8.6,0.0,0.0,0.0,6.
6
  claude-3-haiku,100.0,50.0,0.0,91.7,28.6,0.0,33.3,84.5,57.1,77.8,85.2,100.0,75.0,50.0,75.0,46.2,64.3,71.4,12.5
7
  claude-3-opus,100.0,100.0,100.0,98.3,71.4,100.0,33.3,93.0,85.7,88.9,93.8,0.0,100.0,50.0,85.4,61.5,71.4,90.5,25.0
8
  claude-3-sonnet,100.0,100.0,100.0,96.7,85.7,100.0,50.0,88.7,57.1,66.7,87.6,0.0,75.0,50.0,81.2,53.8,64.3,78.6,12.5
 
9
  command-r-plus,90.6,0.0,100.0,88.3,14.3,0.0,50.0,80.3,57.1,66.7,85.2,0.0,100.0,50.0,79.2,46.2,57.1,61.9,12.5
10
  gemini-flash-1.5,90.6,0.0,0.0,86.7,71.4,100.0,33.3,93.0,85.7,88.9,88.9,0.0,100.0,50.0,81.2,38.5,50.0,81.0,0.0
11
  gemini-pro,96.9,0.0,0.0,90.0,14.3,0.0,16.7,80.3,71.4,66.7,88.9,0.0,100.0,0.0,79.2,46.2,64.3,69.0,0.0
 
6
  claude-3-haiku,100.0,50.0,0.0,91.7,28.6,0.0,33.3,84.5,57.1,77.8,85.2,100.0,75.0,50.0,75.0,46.2,64.3,71.4,12.5
7
  claude-3-opus,100.0,100.0,100.0,98.3,71.4,100.0,33.3,93.0,85.7,88.9,93.8,0.0,100.0,50.0,85.4,61.5,71.4,90.5,25.0
8
  claude-3-sonnet,100.0,100.0,100.0,96.7,85.7,100.0,50.0,88.7,57.1,66.7,87.6,0.0,75.0,50.0,81.2,53.8,64.3,78.6,12.5
9
+ claude-3.5-sonnet:beta,100.0,100.0,100.0,100.0,85.7,100.0,50.0,97.2,100.0,88.9,95.1,100.0,100.0,50.0,93.8,69.2,50.0,92.9,62.5
10
  command-r-plus,90.6,0.0,100.0,88.3,14.3,0.0,50.0,80.3,57.1,66.7,85.2,0.0,100.0,50.0,79.2,46.2,57.1,61.9,12.5
11
  gemini-flash-1.5,90.6,0.0,0.0,86.7,71.4,100.0,33.3,93.0,85.7,88.9,88.9,0.0,100.0,50.0,81.2,38.5,50.0,81.0,0.0
12
  gemini-pro,96.9,0.0,0.0,90.0,14.3,0.0,16.7,80.3,71.4,66.7,88.9,0.0,100.0,0.0,79.2,46.2,64.3,69.0,0.0