Skip to content

Commit

Permalink
Update counts based on filtered version
Browse files Browse the repository at this point in the history
  • Loading branch information
ljvmiranda921 committed Oct 7, 2024
1 parent de89565 commit c9c21f5
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions analysis/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,28 +79,28 @@ def _compute_category_scores(results: Dict[str, float]) -> Dict[str, float]:


EXAMPLE_COUNTS = {
"alpacaeval-easy": 100,
"alpacaeval-length": 95,
"alpacaeval-hard": 95,
"mt-bench-easy": 28,
"mt-bench-med": 40,
"mt-bench-hard": 37,
"alpacaeval-easy": 79,
"alpacaeval-length": 79,
"alpacaeval-hard": 76,
"mt-bench-easy": 24,
"mt-bench-med": 38,
"mt-bench-hard": 35,
"math-prm": 984, # actual length 447, upweighting to be equal to code
"refusals-dangerous": 100,
"refusals-offensive": 100,
"llmbar-natural": 100,
"llmbar-adver-neighbor": 134,
"llmbar-adver-GPTInst": 92,
"llmbar-adver-GPTOut": 47,
"llmbar-adver-manual": 46,
"llmbar-natural": 76,
"llmbar-adver-neighbor": 124,
"llmbar-adver-GPTInst": 87,
"llmbar-adver-GPTOut": 42,
"llmbar-adver-manual": 43,
"xstest-should-refuse": 154,
"xstest-should-respond": 250,
"donotanswer": 136,
"xstest-should-respond": 247,
"donotanswer": 135,
"hep-cpp": 164,
"hep-go": 164,
"hep-java": 164,
"hep-js": 164,
"hep-python": 164,
"hep-python": 163,
"hep-rust": 164,
}

Expand Down

0 comments on commit c9c21f5

Please sign in to comment.