amezasor commited on
Commit
902a415
1 Parent(s): 58cad21

update: eval results

Browse files
Files changed (1) hide show
  1. README.md +71 -34
README.md CHANGED
@@ -2,31 +2,28 @@
2
  pipeline_tag: text-generation
3
  inference: false
4
  license: apache-2.0
5
- # datasets:
6
- # metrics:
7
- # - code_eval
8
  library_name: transformers
9
  tags:
10
  - language
11
  - granite-3.0
12
  model-index:
13
- - name: granite-3.0-3b-a800m-instruct
14
  results:
15
  - task:
16
  type: text-generation
17
  dataset:
18
- type: human-exams
19
- name: MMLU
20
  metrics:
21
  - name: pass@1
22
  type: pass@1
23
- value:
24
  veriefied: false
25
  - task:
26
  type: text-generation
27
  dataset:
28
- type: human-exams
29
- name: MMLU-Pro
30
  metrics:
31
  - name: pass@1
32
  type: pass@1
@@ -40,17 +37,27 @@ model-index:
40
  metrics:
41
  - name: pass@1
42
  type: pass@1
43
- value:
44
  veriefied: false
45
  - task:
46
  type: text-generation
47
  dataset:
48
- type: commonsense
49
- name: WinoGrande
50
  metrics:
51
  - name: pass@1
52
  type: pass@1
53
- value:
 
 
 
 
 
 
 
 
 
 
54
  veriefied: false
55
  - task:
56
  type: text-generation
@@ -60,7 +67,7 @@ model-index:
60
  metrics:
61
  - name: pass@1
62
  type: pass@1
63
- value:
64
  veriefied: false
65
  - task:
66
  type: text-generation
@@ -70,27 +77,27 @@ model-index:
70
  metrics:
71
  - name: pass@1
72
  type: pass@1
73
- value:
74
  veriefied: false
75
  - task:
76
  type: text-generation
77
  dataset:
78
  type: commonsense
79
- name: PIQA
80
  metrics:
81
  - name: pass@1
82
  type: pass@1
83
- value:
84
  veriefied: false
85
  - task:
86
  type: text-generation
87
  dataset:
88
  type: commonsense
89
- name: Hellaswag
90
  metrics:
91
  - name: pass@1
92
  type: pass@1
93
- value:
94
  veriefied: false
95
  - task:
96
  type: text-generation
@@ -100,7 +107,7 @@ model-index:
100
  metrics:
101
  - name: pass@1
102
  type: pass@1
103
- value:
104
  veriefied: false
105
  - task:
106
  type: text-generation
@@ -110,17 +117,17 @@ model-index:
110
  metrics:
111
  - name: pass@1
112
  type: pass@1
113
- value:
114
  veriefied: false
115
  - task:
116
  type: text-generation
117
  dataset:
118
  type: reading-comprehension
119
- name: SQuAD v2
120
  metrics:
121
  - name: pass@1
122
  type: pass@1
123
- value:
124
  veriefied: false
125
  - task:
126
  type: text-generation
@@ -130,7 +137,7 @@ model-index:
130
  metrics:
131
  - name: pass@1
132
  type: pass@1
133
- value:
134
  veriefied: false
135
  - task:
136
  type: text-generation
@@ -140,7 +147,7 @@ model-index:
140
  metrics:
141
  - name: pass@1
142
  type: pass@1
143
- value:
144
  veriefied: false
145
  - task:
146
  type: text-generation
@@ -150,17 +157,37 @@ model-index:
150
  metrics:
151
  - name: pass@1
152
  type: pass@1
153
- value:
154
  veriefied: false
155
  - task:
156
  type: text-generation
157
  dataset:
158
  type: code
159
- name: HumanEval
160
  metrics:
161
  - name: pass@1
162
  type: pass@1
163
- value:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  veriefied: false
165
  - task:
166
  type: text-generation
@@ -170,7 +197,7 @@ model-index:
170
  metrics:
171
  - name: pass@1
172
  type: pass@1
173
- value:
174
  veriefied: false
175
  - task:
176
  type: text-generation
@@ -180,7 +207,7 @@ model-index:
180
  metrics:
181
  - name: pass@1
182
  type: pass@1
183
- value:
184
  veriefied: false
185
  - task:
186
  type: text-generation
@@ -190,18 +217,28 @@ model-index:
190
  metrics:
191
  - name: pass@1
192
  type: pass@1
193
- value:
194
  veriefied: false
195
  - task:
196
  type: text-generation
197
  dataset:
198
  type: multilingual
199
- name: MGSM
200
  metrics:
201
  - name: pass@1
202
  type: pass@1
203
- value:
204
- veriefied: false
 
 
 
 
 
 
 
 
 
 
205
  ---
206
 
207
  <!-- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png) -->
 
2
  pipeline_tag: text-generation
3
  inference: false
4
  license: apache-2.0
 
 
 
5
  library_name: transformers
6
  tags:
7
  - language
8
  - granite-3.0
9
  model-index:
10
+ - name: granite-3.0-2b-instruct
11
  results:
12
  - task:
13
  type: text-generation
14
  dataset:
15
+ type: instruction-following
16
+ name: IFEval
17
  metrics:
18
  - name: pass@1
19
  type: pass@1
20
+ value: 42.49
21
  veriefied: false
22
  - task:
23
  type: text-generation
24
  dataset:
25
+ type: instruction-following
26
+ name: MT-Bench
27
  metrics:
28
  - name: pass@1
29
  type: pass@1
 
37
  metrics:
38
  - name: pass@1
39
  type: pass@1
40
+ value: 25.70
41
  veriefied: false
42
  - task:
43
  type: text-generation
44
  dataset:
45
+ type: human-exams
46
+ name: MMLU
47
  metrics:
48
  - name: pass@1
49
  type: pass@1
50
+ value: 50.16
51
+ veriefied: false
52
+ - task:
53
+ type: text-generation
54
+ dataset:
55
+ type: human-exams
56
+ name: MMLU-Pro
57
+ metrics:
58
+ - name: pass@1
59
+ type: pass@1
60
+ value: 20.51
61
  veriefied: false
62
  - task:
63
  type: text-generation
 
67
  metrics:
68
  - name: pass@1
69
  type: pass@1
70
+ value: 40.80
71
  veriefied: false
72
  - task:
73
  type: text-generation
 
77
  metrics:
78
  - name: pass@1
79
  type: pass@1
80
+ value: 59.95
81
  veriefied: false
82
  - task:
83
  type: text-generation
84
  dataset:
85
  type: commonsense
86
+ name: Hellaswag
87
  metrics:
88
  - name: pass@1
89
  type: pass@1
90
+ value: 71.86
91
  veriefied: false
92
  - task:
93
  type: text-generation
94
  dataset:
95
  type: commonsense
96
+ name: WinoGrande
97
  metrics:
98
  - name: pass@1
99
  type: pass@1
100
+ value: 67.01
101
  veriefied: false
102
  - task:
103
  type: text-generation
 
107
  metrics:
108
  - name: pass@1
109
  type: pass@1
110
+ value: 48.00
111
  veriefied: false
112
  - task:
113
  type: text-generation
 
117
  metrics:
118
  - name: pass@1
119
  type: pass@1
120
+ value: 78.65
121
  veriefied: false
122
  - task:
123
  type: text-generation
124
  dataset:
125
  type: reading-comprehension
126
+ name: SQuAD 2.0
127
  metrics:
128
  - name: pass@1
129
  type: pass@1
130
+ value: 6.71
131
  veriefied: false
132
  - task:
133
  type: text-generation
 
137
  metrics:
138
  - name: pass@1
139
  type: pass@1
140
+ value: 50.94
141
  veriefied: false
142
  - task:
143
  type: text-generation
 
147
  metrics:
148
  - name: pass@1
149
  type: pass@1
150
+ value: 26.85
151
  veriefied: false
152
  - task:
153
  type: text-generation
 
157
  metrics:
158
  - name: pass@1
159
  type: pass@1
160
+ value: 37.70
161
  veriefied: false
162
  - task:
163
  type: text-generation
164
  dataset:
165
  type: code
166
+ name: HumanEvalSynthesis
167
  metrics:
168
  - name: pass@1
169
  type: pass@1
170
+ value: 39.63
171
+ veriefied: false
172
+ - task:
173
+ type: text-generation
174
+ dataset:
175
+ type: code
176
+ name: HumanEvalExplain
177
+ metrics:
178
+ - name: pass@1
179
+ type: pass@1
180
+ value: 40.85
181
+ veriefied: false
182
+ - task:
183
+ type: text-generation
184
+ dataset:
185
+ type: code
186
+ name: HumanEvalFix
187
+ metrics:
188
+ - name: pass@1
189
+ type: pass@1
190
+ value: 35.98
191
  veriefied: false
192
  - task:
193
  type: text-generation
 
197
  metrics:
198
  - name: pass@1
199
  type: pass@1
200
+ value: 27.40
201
  veriefied: false
202
  - task:
203
  type: text-generation
 
207
  metrics:
208
  - name: pass@1
209
  type: pass@1
210
+ value: 47.54
211
  veriefied: false
212
  - task:
213
  type: text-generation
 
217
  metrics:
218
  - name: pass@1
219
  type: pass@1
220
+ value: 19.86
221
  veriefied: false
222
  - task:
223
  type: text-generation
224
  dataset:
225
  type: multilingual
226
+ name: PAWS-X (7 langs)
227
  metrics:
228
  - name: pass@1
229
  type: pass@1
230
+ value: 50.23
231
+ veriefied: false
232
+ - task:
233
+ type: text-generation
234
+ dataset:
235
+ type: multilingual
236
+ name: MGSM (6 langs)
237
+ metrics:
238
+ - name: pass@1
239
+ type: pass@1
240
+ value: 28.87
241
+ veriefied: false
242
  ---
243
 
244
  <!-- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png) -->