update metrics for human ai agg

Mcilie · Mcilie · commit 6d88c7c2da25 · 2024-05-03T12:55:44.000-04:00
diff --git a/src/prompt_systematic_review/experiments/evaluate_human_agreement.py b/src/prompt_systematic_review/experiments/evaluate_human_agreement.py
@@ -57,15 +57,18 @@ def evaluate_human_agreement(inputFile="arxiv_papers_with_abstract.csv"):
     agreement_grid = pd.crosstab(df_limited["AI_keep"], df_limited["human_review"])
 
     true_positives = agreement_grid.loc[True, True]
+    true_negatives = agreement_grid.loc[False, False]
     false_positives = agreement_grid.loc[True, False]
     false_negatives = agreement_grid.loc[False, True]
 
+    accuracy = (true_positives + true_negatives) / len(df_limited)
     precision = true_positives / (true_positives + false_positives)
     recall = true_positives / (true_positives + false_negatives)
 
     f1_score = 2 * (precision * recall) / (precision + recall)
     print(f"Precision: {precision}")
     print(f"Recall: {recall}")
+    print(f"Accuracy: {accuracy}")
     print(f"F1 Score: {f1_score}")