From aa52a9a06532b389c05e7fcb004dc91fbc2c0ac9 Mon Sep 17 00:00:00 2001 From: Akshita Bhagia Date: Wed, 2 Jun 2021 15:01:59 -0700 Subject: [PATCH] Checklist fixes (#5239) * bug fix * common lexicons * update changelog * Update CHANGELOG.md --- .../sentiment_analysis_suite.py | 312 +++++++++--------- .../task_checklists/task_suite.py | 1 + .../textual_entailment_suite.py | 88 +---- .../task_checklists/utils.py | 86 +++++ 4 files changed, 247 insertions(+), 240 deletions(-) diff --git a/allennlp/confidence_checks/task_checklists/sentiment_analysis_suite.py b/allennlp/confidence_checks/task_checklists/sentiment_analysis_suite.py index 2c68cd9efaf..a6eeac149a0 100644 --- a/allennlp/confidence_checks/task_checklists/sentiment_analysis_suite.py +++ b/allennlp/confidence_checks/task_checklists/sentiment_analysis_suite.py @@ -3,7 +3,6 @@ from overrides import overrides from checklist.test_suite import TestSuite from checklist.test_types import MFT, INV, DIR, Expect -from checklist.editor import Editor from checklist.perturb import Perturb from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite from allennlp.confidence_checks.task_checklists import utils @@ -60,7 +59,7 @@ def preds_and_confs_fn(data): for pred in predictions: label = pred["probs"].index(max(pred["probs"])) labels.append(label) - confs.append([pred["probs"][self._positive], pred["probs"][self._negative]]) + confs.append(pred["probs"]) return np.array(labels), np.array(confs) return preds_and_confs_fn @@ -97,156 +96,153 @@ def _default_tests(self, data: Optional[Iterable[str]], num_test_cases=100): self._default_negation_tests(data, num_test_cases) def _setup_editor(self): - if not hasattr(self, "editor"): - self.editor = Editor() - - pos_adj = [ - "good", - "great", - "excellent", - "amazing", - "extraordinary", - "beautiful", - "fantastic", - "nice", - "incredible", - "exceptional", - "awesome", - "perfect", - "fun", - "adorable", - "brilliant", - "exciting", - "sweet", - "wonderful", - ] - neg_adj = [ - "awful", - "bad", - "horrible", - "weird", - "rough", - "lousy", - "unhappy", - "average", - "difficult", - "poor", - "sad", - "frustrating", - "hard", - "lame", - "nasty", - "annoying", - "boring", - "creepy", - "dreadful", - "ridiculous", - "terrible", - "ugly", - "unpleasant", - ] - self.editor.add_lexicon("pos_adj", pos_adj, overwrite=True) - self.editor.add_lexicon("neg_adj", neg_adj, overwrite=True) - - pos_verb_present = [ - "like", - "enjoy", - "appreciate", - "love", - "recommend", - "admire", - "value", - "welcome", - ] - neg_verb_present = ["hate", "dislike", "regret", "abhor", "dread", "despise"] - pos_verb_past = [ - "liked", - "enjoyed", - "appreciated", - "loved", - "admired", - "valued", - "welcomed", - ] - neg_verb_past = ["hated", "disliked", "regretted", "abhorred", "dreaded", "despised"] - self.editor.add_lexicon("pos_verb_present", pos_verb_present, overwrite=True) - self.editor.add_lexicon("neg_verb_present", neg_verb_present, overwrite=True) - self.editor.add_lexicon("pos_verb_past", pos_verb_past, overwrite=True) - self.editor.add_lexicon("neg_verb_past", neg_verb_past, overwrite=True) - self.editor.add_lexicon("pos_verb", pos_verb_present + pos_verb_past, overwrite=True) - self.editor.add_lexicon("neg_verb", neg_verb_present + neg_verb_past, overwrite=True) - - noun = [ - "airline", - "movie", - "product", - "customer service", - "restaurant", - "hotel", - "food", - "staff", - "company", - "crew", - "service", - ] - self.editor.add_lexicon("noun", noun, overwrite=True) - - intens_adj = [ - "very", - "really", - "absolutely", - "truly", - "extremely", - "quite", - "incredibly", - "amazingly", - "especially", - "exceptionally", - "unbelievably", - "utterly", - "exceedingly", - "rather", - "totally", - "particularly", - ] - intens_verb = [ - "really", - "absolutely", - "truly", - "extremely", - "especially", - "utterly", - "totally", - "particularly", - "highly", - "definitely", - "certainly", - "genuinely", - "honestly", - "strongly", - "sure", - "sincerely", - ] - - self.editor.add_lexicon("intens_adj", intens_adj, overwrite=True) - self.editor.add_lexicon("intens_verb", intens_verb, overwrite=True) - - reducer_adj = [ - "somewhat", - "kinda", - "mostly", - "probably", - "generally", - "reasonably", - "a little", - "a bit", - "slightly", - ] - - self.editor.add_lexicon("reducer_adj", reducer_adj, overwrite=True) - - self.monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1) - self.monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1) + super()._setup_editor() + + pos_adj = [ + "good", + "great", + "excellent", + "amazing", + "extraordinary", + "beautiful", + "fantastic", + "nice", + "incredible", + "exceptional", + "awesome", + "perfect", + "fun", + "adorable", + "brilliant", + "exciting", + "sweet", + "wonderful", + ] + neg_adj = [ + "awful", + "bad", + "horrible", + "weird", + "rough", + "lousy", + "average", + "difficult", + "poor", + "sad", + "frustrating", + "lame", + "nasty", + "annoying", + "boring", + "creepy", + "dreadful", + "ridiculous", + "terrible", + "ugly", + "unpleasant", + ] + self.editor.add_lexicon("pos_adj", pos_adj, overwrite=True) + self.editor.add_lexicon("neg_adj", neg_adj, overwrite=True) + + pos_verb_present = [ + "like", + "enjoy", + "appreciate", + "love", + "recommend", + "admire", + "value", + "welcome", + ] + neg_verb_present = ["hate", "dislike", "regret", "abhor", "dread", "despise"] + pos_verb_past = [ + "liked", + "enjoyed", + "appreciated", + "loved", + "admired", + "valued", + "welcomed", + ] + neg_verb_past = ["hated", "disliked", "regretted", "abhorred", "dreaded", "despised"] + self.editor.add_lexicon("pos_verb_present", pos_verb_present, overwrite=True) + self.editor.add_lexicon("neg_verb_present", neg_verb_present, overwrite=True) + self.editor.add_lexicon("pos_verb_past", pos_verb_past, overwrite=True) + self.editor.add_lexicon("neg_verb_past", neg_verb_past, overwrite=True) + self.editor.add_lexicon("pos_verb", pos_verb_present + pos_verb_past, overwrite=True) + self.editor.add_lexicon("neg_verb", neg_verb_present + neg_verb_past, overwrite=True) + + noun = [ + "airline", + "movie", + "product", + "customer service", + "restaurant", + "hotel", + "food", + "staff", + "company", + "crew", + "service", + ] + self.editor.add_lexicon("noun", noun, overwrite=True) + + intens_adj = [ + "very", + "really", + "absolutely", + "truly", + "extremely", + "quite", + "incredibly", + "amazingly", + "especially", + "exceptionally", + "unbelievably", + "utterly", + "exceedingly", + "rather", + "totally", + "particularly", + ] + intens_verb = [ + "really", + "absolutely", + "truly", + "extremely", + "especially", + "utterly", + "totally", + "particularly", + "highly", + "definitely", + "certainly", + "genuinely", + "honestly", + "strongly", + "sure", + "sincerely", + ] + + self.editor.add_lexicon("intens_adj", intens_adj, overwrite=True) + self.editor.add_lexicon("intens_verb", intens_verb, overwrite=True) + + reducer_adj = [ + "somewhat", + "kinda", + "mostly", + "probably", + "generally", + "reasonably", + "a little", + "a bit", + "slightly", + ] + + self.editor.add_lexicon("reducer_adj", reducer_adj, overwrite=True) + + self.monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1) + self.monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1) def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_cases=100): @@ -371,7 +367,7 @@ def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_case templates=template.templates, name="Intensifiers", capability="Vocabulary", - description="Test is composed of pairs of sentences (x1, x2), where we add an intensifier" + description="Test is composed of pairs of sentences (x1, x2), where we add an intensifier " "such as 'really',or 'very' to x2 and expect the confidence to NOT go down " "(with tolerance=0.1). e.g.:" "x1 = 'That was a good movie'" @@ -400,7 +396,7 @@ def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_case templates=template.templates, name="Reducers", capability="Vocabulary", - description="Test is composed of pairs of sentences (x1, x2), where we add a reducer" + description="Test is composed of pairs of sentences (x1, x2), where we add a reducer " "such as 'somewhat', or 'kinda' to x2 and expect the confidence to NOT go up " " (with tolerance=0.1). e.g.:" "x1 = 'The staff was good.'" @@ -555,8 +551,8 @@ def _default_temporal_tests(self, data: Optional[Iterable[str]], num_test_cases= capability="Temporal", description="Have two conflicing statements, one about the past and " "one about the present." - "Expect the present to carry the sentiment. Examples:" - "I used to love this airline, now I hate it -> should be negative" + "Expect the present to carry the sentiment. Examples:\n" + "I used to love this airline, now I hate it -> should be negative\n" "I love this airline, although I used to hate it -> should be positive", ) @@ -604,13 +600,13 @@ def _default_fairness_tests(self, data: Optional[Iterable[str]], num_test_cases= for p, vals in protected.items(): template = self.editor.template( - ["{male} is %s {mask}." % r for r in vals], + ["{male} is %s {profession}." % r for r in vals], return_maps=False, nsamples=num_test_cases, save=True, ) template += self.editor.template( - ["{female} is %s {mask}." % r for r in vals], + ["{female} is %s {profession}." % r for r in vals], return_maps=False, nsamples=num_test_cases, save=True, diff --git a/allennlp/confidence_checks/task_checklists/task_suite.py b/allennlp/confidence_checks/task_checklists/task_suite.py index 6ddf00d59b1..0d7e1a1f688 100644 --- a/allennlp/confidence_checks/task_checklists/task_suite.py +++ b/allennlp/confidence_checks/task_checklists/task_suite.py @@ -378,6 +378,7 @@ def _setup_editor(self): """ if not hasattr(self, "editor"): self.editor = Editor() + utils.add_common_lexicons(self.editor) def add_test(self, test: Union[MFT, INV, DIR]): """ diff --git a/allennlp/confidence_checks/task_checklists/textual_entailment_suite.py b/allennlp/confidence_checks/task_checklists/textual_entailment_suite.py index b8e1a810f23..7e7fb30209d 100644 --- a/allennlp/confidence_checks/task_checklists/textual_entailment_suite.py +++ b/allennlp/confidence_checks/task_checklists/textual_entailment_suite.py @@ -220,82 +220,6 @@ def _setup_editor(self): ] self.editor.add_lexicon("nouns", nouns, overwrite=True) - professions = [ - "journalist", - "historian", - "secretary", - "nurse", - "waitress", - "accountant", - "engineer", - "attorney", - "artist", - "editor", - "architect", - "model", - "interpreter", - "analyst", - "actor", - "actress", - "assistant", - "intern", - "economist", - "organizer", - "author", - "investigator", - "agent", - "administrator", - "executive", - "educator", - "investor", - "DJ", - "entrepreneur", - "auditor", - "advisor", - "instructor", - "activist", - "consultant", - "apprentice", - "reporter", - "expert", - "psychologist", - "examiner", - "painter", - "manager", - "contractor", - "therapist", - "programmer", - "musician", - "producer", - "associate", - "intermediary", - "designer", - "cook", - "salesperson", - "dentist", - "attorney", - "detective", - "banker", - "researcher", - "cop", - "driver", - "counselor", - "clerk", - "professor", - "tutor", - "coach", - "chemist", - "scientist", - "veterinarian", - "firefighter", - "baker", - "psychiatrist", - "prosecutor", - "director", - "technician", - ] - self.editor.add_lexicon("professions", professions, overwrite=True) - @overrides def _default_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): super()._default_tests(data, num_test_cases) @@ -406,8 +330,8 @@ def _default_ner_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100 def _default_temporal_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = self.editor.template( ( - "{first_name} works as {a:professions}", - "{first_name} used to work as a {professions}", + "{first_name} works as {a:profession}", + "{first_name} used to work as a {profession}", ), nsamples=num_test_cases, remove_duplicates=True, @@ -415,8 +339,8 @@ def _default_temporal_tests(self, data: Optional[Iterable[Tuple]], num_test_case template += self.editor.template( ( - "{first_name} {last_name} is {a:professions}", - "{first_name} {last_name} was {a:professions}", + "{first_name} {last_name} is {a:profession}", + "{first_name} {last_name} was {a:profession}", ), nsamples=num_test_cases, remove_duplicates=True, @@ -434,8 +358,8 @@ def _default_temporal_tests(self, data: Optional[Iterable[Tuple]], num_test_case template = self.editor.template( ( - "{first_name} was {a:professions1} before they were {a:professions2}", - "{first_name} was {a:professions1} after they were {a:professions2}", + "{first_name} was {a:profession1} before they were {a:profession2}", + "{first_name} was {a:profession1} after they were {a:profession2}", ), nsamples=num_test_cases, remove_duplicates=True, diff --git a/allennlp/confidence_checks/task_checklists/utils.py b/allennlp/confidence_checks/task_checklists/utils.py index 22ad9deedf1..236c1618372 100644 --- a/allennlp/confidence_checks/task_checklists/utils.py +++ b/allennlp/confidence_checks/task_checklists/utils.py @@ -2,6 +2,92 @@ from typing import Dict, Callable, List, Union import numpy as np import spacy +from checklist.editor import Editor + + +def add_common_lexicons(editor: Editor): + """ + Add commonly used lexicons to the editor object. These can be used in all + the task suites. + + Note: Updates the `editor` object in place. + """ + profession = [ + "journalist", + "historian", + "secretary", + "nurse", + "waitress", + "accountant", + "engineer", + "attorney", + "artist", + "editor", + "architect", + "model", + "interpreter", + "analyst", + "actor", + "actress", + "assistant", + "intern", + "economist", + "organizer", + "author", + "investigator", + "agent", + "administrator", + "executive", + "educator", + "investor", + "DJ", + "entrepreneur", + "auditor", + "advisor", + "instructor", + "activist", + "consultant", + "apprentice", + "reporter", + "expert", + "psychologist", + "examiner", + "painter", + "manager", + "contractor", + "therapist", + "programmer", + "musician", + "producer", + "associate", + "intermediary", + "designer", + "cook", + "salesperson", + "dentist", + "attorney", + "detective", + "banker", + "researcher", + "cop", + "driver", + "counselor", + "clerk", + "professor", + "tutor", + "coach", + "chemist", + "scientist", + "veterinarian", + "firefighter", + "baker", + "psychiatrist", + "prosecutor", + "director", + "technician", + ] + + editor.add_lexicon("profession", profession, overwrite=True) def spacy_wrap(fn: Callable, language: str = "en_core_web_sm", **kwargs) -> Callable: