1
+ {
2
+ "CodeLlama-34B-Instruct" : {
3
+ "link" : " https://huggingface.co/codellama/CodeLlama-34b-hf" ,
4
+ "open-data" : " None" ,
5
+ "pass@1" : {
6
+ "instruct" : null ,
7
+ "complete" : 38.73
8
+ },
9
+ "prompted" : true ,
10
+ "size" : 34 ,
11
+ "direct_complete" : false ,
12
+ "lazy" : false ,
13
+ "elo_mle" : 942
14
+ },
15
+ "Meta-Llama-3-70B" : {
16
+ "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B" ,
17
+ "open-data" : " None" ,
18
+ "pass@1" : {
19
+ "instruct" : null ,
20
+ "complete" : 48.98
21
+ },
22
+ "prompted" : false ,
23
+ "size" : 70 ,
24
+ "direct_complete" : false ,
25
+ "lazy" : false ,
26
+ "elo_mle" : 874
27
+ },
28
+ "Meta-Llama-3-70B-Instruct" : {
29
+ "link" : " https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" ,
30
+ "open-data" : " None" ,
31
+ "pass@1" : {
32
+ "instruct" : null ,
33
+ "complete" : 62.45
34
+ },
35
+ "prompted" : true ,
36
+ "size" : 70 ,
37
+ "direct_complete" : false ,
38
+ "lazy" : false ,
39
+ "elo_mle" : 874
40
+ },
41
+ "Meta-Llama-3.1-70B-Instruct" : {
42
+ "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct" ,
43
+ "open-data" : " None" ,
44
+ "pass@1" : {
45
+ "instruct" : null ,
46
+ "complete" : 60
47
+ },
48
+ "prompted" : true ,
49
+ "size" : 70 ,
50
+ "direct_complete" : false ,
51
+ "lazy" : false ,
52
+ "elo_mle" : 874
53
+ },
54
+ "Meta-Llama-3.1-70B" : {
55
+ "link" : " https://huggingface.co/meta-llama/Llama-3.1-70B" ,
56
+ "open-data" : " None" ,
57
+ "pass@1" : {
58
+ "instruct" : null ,
59
+ "complete" : 37.56
60
+ },
61
+ "prompted" : false ,
62
+ "size" : 70 ,
63
+ "direct_complete" : false ,
64
+ "lazy" : false ,
65
+ "elo_mle" : 874
66
+ },
67
+ "Mistral-7B-Instruct-v0.3" : {
68
+ "link" : " https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3" ,
69
+ "open-data" : " None" ,
70
+ "pass@1" : {
71
+ "instruct" : null ,
72
+ "complete" : 43.33
73
+ },
74
+ "prompted" : true ,
75
+ "size" : 7 ,
76
+ "direct_complete" : false ,
77
+ "lazy" : false ,
78
+ "elo_mle" : 874
79
+ },
80
+ "Mixtral-8x7B-Instruct-v0.1" : {
81
+ "link" : " https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1" ,
82
+ "open-data" : " None" ,
83
+ "pass@1" : {
84
+ "instruct" : null ,
85
+ "complete" : 42.96
86
+ },
87
+ "prompted" : true ,
88
+ "size" : 7 ,
89
+ "direct_complete" : false ,
90
+ "lazy" : false ,
91
+ "elo_mle" : 874
92
+ },
93
+ "Codestral-22B-v0.1" : {
94
+ "link" : " https://huggingface.co/mistralai/Codestral-22B-v0.1" ,
95
+ "open-data" : " None" ,
96
+ "pass@1" : {
97
+ "instruct" : null ,
98
+ "complete" : 47.6
99
+ },
100
+ "prompted" : true ,
101
+ "size" : 22 ,
102
+ "direct_complete" : false ,
103
+ "lazy" : false ,
104
+ "elo_mle" : 874
105
+ },
106
+ "Phi-3-medium-128k-instruct" : {
107
+ "link" : " https://huggingface.co/microsoft/Phi-3-medium-128k-instruct" ,
108
+ "open-data" : " None" ,
109
+ "pass@1" : {
110
+ "instruct" : null ,
111
+ "complete" : 48.03
112
+ },
113
+ "prompted" : true ,
114
+ "size" : 14 ,
115
+ "direct_complete" : false ,
116
+ "lazy" : false ,
117
+ "elo_mle" : 874
118
+ },
119
+ "Phi-3-mini-128k-instruct" : {
120
+ "link" : " https://huggingface.co/microsoft/Phi-3-mini-128k-instruct" ,
121
+ "open-data" : " None" ,
122
+ "pass@1" : {
123
+ "instruct" : null ,
124
+ "complete" : 37.93
125
+ },
126
+ "prompted" : true ,
127
+ "size" : 3.8 ,
128
+ "direct_complete" : false ,
129
+ "lazy" : false ,
130
+ "elo_mle" : 874
131
+ },
132
+ "Qwen2-57B-A14B-Instruct" : {
133
+ "link" : " https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct" ,
134
+ "open-data" : " None" ,
135
+ "pass@1" : {
136
+ "instruct" : null ,
137
+ "complete" : 46.34
138
+ },
139
+ "prompted" : true ,
140
+ "size" : 57 ,
141
+ "direct_complete" : false ,
142
+ "lazy" : false ,
143
+ "elo_mle" : 874
144
+ },
145
+ "CodeQwen1.5-7B-Chat" : {
146
+ "link" : " https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat" ,
147
+ "open-data" : " None" ,
148
+ "pass@1" : {
149
+ "instruct" : null ,
150
+ "complete" : 49.82
151
+ },
152
+ "prompted" : true ,
153
+ "size" : 7 ,
154
+ "direct_complete" : false ,
155
+ "lazy" : false ,
156
+ "elo_mle" : 874
157
+ },
158
+ "Yi-1.5-34B-Chat" : {
159
+ "link" : " https://huggingface.co/01-ai/Yi-1.5-34B-Chat" ,
160
+ "open-data" : " None" ,
161
+ "pass@1" : {
162
+ "instruct" : null ,
163
+ "complete" : 49.39
164
+ },
165
+ "prompted" : true ,
166
+ "size" : 34 ,
167
+ "direct_complete" : false ,
168
+ "lazy" : false ,
169
+ "elo_mle" : 874
170
+ },
171
+ "Yi-1.5-9B-Chat" : {
172
+ "link" : " https://huggingface.co/01-ai/Yi-1.5-9B-Chat" ,
173
+ "open-data" : " None" ,
174
+ "pass@1" : {
175
+ "instruct" : null ,
176
+ "complete" : 47.23
177
+ },
178
+ "prompted" : true ,
179
+ "size" : 9 ,
180
+ "direct_complete" : false ,
181
+ "lazy" : false ,
182
+ "elo_mle" : 874
183
+ },
184
+ "DeepSeek-coder-7b-instruct-v1.5" : {
185
+ "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-7b-instruct-v1.5" ,
186
+ "open-data" : " None" ,
187
+ "pass@1" : {
188
+ "instruct" : null ,
189
+ "complete" : 41.21
190
+ },
191
+ "prompted" : true ,
192
+ "size" : 7 ,
193
+ "direct_complete" : false ,
194
+ "lazy" : false ,
195
+ "elo_mle" : 874
196
+ },
197
+ "DeepSeek-coder-33b-instruct" : {
198
+ "link" : " https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct" ,
199
+ "open-data" : " None" ,
200
+ "pass@1" : {
201
+ "instruct" : null ,
202
+ "complete" : 36.6
203
+ },
204
+ "prompted" : true ,
205
+ "size" : 33 ,
206
+ "direct_complete" : false ,
207
+ "lazy" : false ,
208
+ "elo_mle" : 874
209
+ },
210
+ "DeepSeek-moe-16b-chat" : {
211
+ "link" : " https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat" ,
212
+ "open-data" : " None" ,
213
+ "pass@1" : {
214
+ "instruct" : null ,
215
+ "complete" : 31.01
216
+ },
217
+ "prompted" : true ,
218
+ "size" : 16.4 ,
219
+ "direct_complete" : false ,
220
+ "lazy" : false ,
221
+ "elo_mle" : 874
222
+ },
223
+ "DeepSeek-Coder-V2-Lite-Instruct" : {
224
+ "link" : " https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct" ,
225
+ "open-data" : " None" ,
226
+ "pass@1" : {
227
+ "instruct" : null ,
228
+ "complete" : 46.51
229
+ },
230
+ "prompted" : true ,
231
+ "size" : 16 ,
232
+ "direct_complete" : false ,
233
+ "lazy" : false ,
234
+ "elo_mle" : 874
235
+ },
236
+ "InternLM2-5-20b-chat" : {
237
+ "link" : " https://huggingface.co/internlm/internlm2_5-20b-chat" ,
238
+ "open-data" : " None" ,
239
+ "pass@1" : {
240
+ "instruct" : null ,
241
+ "complete" : 44.89
242
+ },
243
+ "prompted" : true ,
244
+ "size" : 20 ,
245
+ "direct_complete" : false ,
246
+ "lazy" : false ,
247
+ "elo_mle" : 874
248
+ },
249
+ "StarCoder2-15b-instruct-v0.1" : {
250
+ "link" : " https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1" ,
251
+ "open-data" : " None" ,
252
+ "pass@1" : {
253
+ "instruct" : null ,
254
+ "complete" : 47.94
255
+ },
256
+ "prompted" : true ,
257
+ "size" : 15 ,
258
+ "direct_complete" : false ,
259
+ "lazy" : false ,
260
+ "elo_mle" : 874
261
+ },
262
+ "Claude-3-sonnet@20240229" : {
263
+ "link" : " " ,
264
+ "open-data" : " None" ,
265
+ "pass@1" : {
266
+ "instruct" : null ,
267
+ "complete" : 53.97
268
+ },
269
+ "prompted" : true ,
270
+ "size" : null ,
271
+ "direct_complete" : false ,
272
+ "lazy" : false ,
273
+ "elo_mle" : 874
274
+ },
275
+ "GPT-4o-2024-05-13" : {
276
+ "link" : " " ,
277
+ "open-data" : " None" ,
278
+ "pass@1" : {
279
+ "instruct" : null ,
280
+ "complete" : 67
281
+ },
282
+ "prompted" : true ,
283
+ "size" : null ,
284
+ "direct_complete" : false ,
285
+ "lazy" : false ,
286
+ "elo_mle" : 874
287
+ },
288
+ "GPT-3.5-turbo-0613" : {
289
+ "link" : " " ,
290
+ "open-data" : null ,
291
+ "pass@1" : {
292
+ "instruct" : null ,
293
+ "complete" : 51.7
294
+ },
295
+ "prompted" : true ,
296
+ "size" : null ,
297
+ "direct_complete" : false ,
298
+ "lazy" : false ,
299
+ "elo_mle" : 874
300
+ }
301
+ }
0 commit comments