Skip to content

Commit 02bfe4b

Browse files
authored
Merge pull request #1 from FSoft-AI4Code/dev/code_mmlu
Update CodeMMLU
2 parents 9bc7186 + 701a73e commit 02bfe4b

File tree

8 files changed

+1240
-97
lines changed

8 files changed

+1240
-97
lines changed

codemmlu/index.html

Lines changed: 261 additions & 19 deletions
Large diffs are not rendered by default.
32.5 KB
Loading
1.08 MB
Loading
3.33 MB
Loading

leaderboards/codemmlu/_results.json

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
{
2+
"CodeLlama-34B-Instruct": {
3+
"link": "https://huggingface.co/codellama/CodeLlama-34b-hf",
4+
"open-data": "None",
5+
"pass@1": {
6+
"instruct": null,
7+
"complete": 38.73
8+
},
9+
"prompted": true,
10+
"size": 34,
11+
"direct_complete": false,
12+
"lazy": false,
13+
"elo_mle": 942
14+
},
15+
"Meta-Llama-3-70B": {
16+
"link": "https://huggingface.co/meta-llama/Meta-Llama-3-70B",
17+
"open-data": "None",
18+
"pass@1": {
19+
"instruct": null,
20+
"complete": 48.98
21+
},
22+
"prompted": false,
23+
"size": 70,
24+
"direct_complete": false,
25+
"lazy": false,
26+
"elo_mle": 874
27+
},
28+
"Meta-Llama-3-70B-Instruct": {
29+
"link": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
30+
"open-data": "None",
31+
"pass@1": {
32+
"instruct": null,
33+
"complete": 62.45
34+
},
35+
"prompted": true,
36+
"size": 70,
37+
"direct_complete": false,
38+
"lazy": false,
39+
"elo_mle": 874
40+
},
41+
"Meta-Llama-3.1-70B-Instruct": {
42+
"link": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
43+
"open-data": "None",
44+
"pass@1": {
45+
"instruct": null,
46+
"complete": 60
47+
},
48+
"prompted": true,
49+
"size": 70,
50+
"direct_complete": false,
51+
"lazy": false,
52+
"elo_mle": 874
53+
},
54+
"Meta-Llama-3.1-70B": {
55+
"link": "https://huggingface.co/meta-llama/Llama-3.1-70B",
56+
"open-data": "None",
57+
"pass@1": {
58+
"instruct": null,
59+
"complete": 37.56
60+
},
61+
"prompted": false,
62+
"size": 70,
63+
"direct_complete": false,
64+
"lazy": false,
65+
"elo_mle": 874
66+
},
67+
"Mistral-7B-Instruct-v0.3": {
68+
"link": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
69+
"open-data": "None",
70+
"pass@1": {
71+
"instruct": null,
72+
"complete": 43.33
73+
},
74+
"prompted": true,
75+
"size": 7,
76+
"direct_complete": false,
77+
"lazy": false,
78+
"elo_mle": 874
79+
},
80+
"Mixtral-8x7B-Instruct-v0.1": {
81+
"link": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
82+
"open-data": "None",
83+
"pass@1": {
84+
"instruct": null,
85+
"complete": 42.96
86+
},
87+
"prompted": true,
88+
"size": 7,
89+
"direct_complete": false,
90+
"lazy": false,
91+
"elo_mle": 874
92+
},
93+
"Codestral-22B-v0.1": {
94+
"link": "https://huggingface.co/mistralai/Codestral-22B-v0.1",
95+
"open-data": "None",
96+
"pass@1": {
97+
"instruct": null,
98+
"complete": 47.6
99+
},
100+
"prompted": true,
101+
"size": 22,
102+
"direct_complete": false,
103+
"lazy": false,
104+
"elo_mle": 874
105+
},
106+
"Phi-3-medium-128k-instruct": {
107+
"link": "https://huggingface.co/microsoft/Phi-3-medium-128k-instruct",
108+
"open-data": "None",
109+
"pass@1": {
110+
"instruct": null,
111+
"complete": 48.03
112+
},
113+
"prompted": true,
114+
"size": 14,
115+
"direct_complete": false,
116+
"lazy": false,
117+
"elo_mle": 874
118+
},
119+
"Phi-3-mini-128k-instruct": {
120+
"link": "https://huggingface.co/microsoft/Phi-3-mini-128k-instruct",
121+
"open-data": "None",
122+
"pass@1": {
123+
"instruct": null,
124+
"complete": 37.93
125+
},
126+
"prompted": true,
127+
"size": 3.8,
128+
"direct_complete": false,
129+
"lazy": false,
130+
"elo_mle": 874
131+
},
132+
"Qwen2-57B-A14B-Instruct": {
133+
"link": "https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct",
134+
"open-data": "None",
135+
"pass@1": {
136+
"instruct": null,
137+
"complete": 46.34
138+
},
139+
"prompted": true,
140+
"size": 57,
141+
"direct_complete": false,
142+
"lazy": false,
143+
"elo_mle": 874
144+
},
145+
"CodeQwen1.5-7B-Chat": {
146+
"link": "https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat",
147+
"open-data": "None",
148+
"pass@1": {
149+
"instruct": null,
150+
"complete": 49.82
151+
},
152+
"prompted": true,
153+
"size": 7,
154+
"direct_complete": false,
155+
"lazy": false,
156+
"elo_mle": 874
157+
},
158+
"Yi-1.5-34B-Chat": {
159+
"link": "https://huggingface.co/01-ai/Yi-1.5-34B-Chat",
160+
"open-data": "None",
161+
"pass@1": {
162+
"instruct": null,
163+
"complete": 49.39
164+
},
165+
"prompted": true,
166+
"size": 34,
167+
"direct_complete": false,
168+
"lazy": false,
169+
"elo_mle": 874
170+
},
171+
"Yi-1.5-9B-Chat": {
172+
"link": "https://huggingface.co/01-ai/Yi-1.5-9B-Chat",
173+
"open-data": "None",
174+
"pass@1": {
175+
"instruct": null,
176+
"complete": 47.23
177+
},
178+
"prompted": true,
179+
"size": 9,
180+
"direct_complete": false,
181+
"lazy": false,
182+
"elo_mle": 874
183+
},
184+
"DeepSeek-coder-7b-instruct-v1.5": {
185+
"link": "https://huggingface.co/deepseek-ai/deepseek-coder-7b-instruct-v1.5",
186+
"open-data": "None",
187+
"pass@1": {
188+
"instruct": null,
189+
"complete": 41.21
190+
},
191+
"prompted": true,
192+
"size": 7,
193+
"direct_complete": false,
194+
"lazy": false,
195+
"elo_mle": 874
196+
},
197+
"DeepSeek-coder-33b-instruct": {
198+
"link": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
199+
"open-data": "None",
200+
"pass@1": {
201+
"instruct": null,
202+
"complete": 36.6
203+
},
204+
"prompted": true,
205+
"size": 33,
206+
"direct_complete": false,
207+
"lazy": false,
208+
"elo_mle": 874
209+
},
210+
"DeepSeek-moe-16b-chat": {
211+
"link": "https://huggingface.co/deepseek-ai/deepseek-moe-16b-chat",
212+
"open-data": "None",
213+
"pass@1": {
214+
"instruct": null,
215+
"complete": 31.01
216+
},
217+
"prompted": true,
218+
"size": 16.4,
219+
"direct_complete": false,
220+
"lazy": false,
221+
"elo_mle": 874
222+
},
223+
"DeepSeek-Coder-V2-Lite-Instruct": {
224+
"link": "https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
225+
"open-data": "None",
226+
"pass@1": {
227+
"instruct": null,
228+
"complete": 46.51
229+
},
230+
"prompted": true,
231+
"size": 16,
232+
"direct_complete": false,
233+
"lazy": false,
234+
"elo_mle": 874
235+
},
236+
"InternLM2-5-20b-chat": {
237+
"link": "https://huggingface.co/internlm/internlm2_5-20b-chat",
238+
"open-data": "None",
239+
"pass@1": {
240+
"instruct": null,
241+
"complete": 44.89
242+
},
243+
"prompted": true,
244+
"size": 20,
245+
"direct_complete": false,
246+
"lazy": false,
247+
"elo_mle": 874
248+
},
249+
"StarCoder2-15b-instruct-v0.1": {
250+
"link": "https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1",
251+
"open-data": "None",
252+
"pass@1": {
253+
"instruct": null,
254+
"complete": 47.94
255+
},
256+
"prompted": true,
257+
"size": 15,
258+
"direct_complete": false,
259+
"lazy": false,
260+
"elo_mle": 874
261+
},
262+
"Claude-3-sonnet@20240229": {
263+
"link": "",
264+
"open-data": "None",
265+
"pass@1": {
266+
"instruct": null,
267+
"complete": 53.97
268+
},
269+
"prompted": true,
270+
"size": null,
271+
"direct_complete": false,
272+
"lazy": false,
273+
"elo_mle": 874
274+
},
275+
"GPT-4o-2024-05-13": {
276+
"link": "",
277+
"open-data": "None",
278+
"pass@1": {
279+
"instruct": null,
280+
"complete": 67
281+
},
282+
"prompted": true,
283+
"size": null,
284+
"direct_complete": false,
285+
"lazy": false,
286+
"elo_mle": 874
287+
},
288+
"GPT-3.5-turbo-0613": {
289+
"link": "",
290+
"open-data": null,
291+
"pass@1": {
292+
"instruct": null,
293+
"complete": 51.7
294+
},
295+
"prompted": true,
296+
"size": null,
297+
"direct_complete": false,
298+
"lazy": false,
299+
"elo_mle": 874
300+
}
301+
}
32.5 KB
Loading

0 commit comments

Comments
 (0)