Add EvalEval community eval results

#14
.eval_results/gpqa-diamond.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ - dataset:
2
+ id: Idavidrein/gpqa
3
+ task_id: diamond
4
+ date: '2026-04-17'
5
+ notes: GPQA Diamond
6
+ source:
7
+ name: EvalEval
8
+ url: https://huggingface.co/datasets/evaleval/EEE_datastore/blob/b11a260fe158662bb63b4a144be2b5690615414d/flat/objects/4b/02/4b02c12c-3f7b-45c8-b746-c590860a7a36.json
9
+ value: 72.2222222222
.eval_results/mmlu_pro.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ - dataset:
2
+ id: TIGER-Lab/MMLU-Pro
3
+ task_id: mmlu_pro
4
+ source:
5
+ name: EvalEval
6
+ url: https://huggingface.co/datasets/evaleval/EEE_datastore/blob/b11a260fe158662bb63b4a144be2b5690615414d/flat/objects/46/a3/46a34cab-9475-40a1-ba07-2c0528f6bf5d.json
7
+ value: 80.9