From b92fd9a7cd3b9c295f94bf7df6edd228172e91a9 Mon Sep 17 00:00:00 2001
From: ArjunSubramonian <arjun.subramonian@gmail.com>
Date: Wed, 2 Jun 2021 16:14:01 -0700
Subject: [PATCH] Contextualized bias mitigation (#5176)

* added linear and hard debiasers

* worked on documentation

* committing changes before branch switch

* committing changes before switching branch

* finished bias direction, linear and hard debiasers, need to write tests

* finished bias direction test

* Commiting changes before switching branch

* finished hard and linear debiasers

* finished OSCaR

* bias mitigators tests and bias metrics remaining

* added bias mitigator tests

* added bias mitigator tests

* finished tests for bias mitigation methods

* fixed gpu issues

* fixed gpu issues

* fixed gpu issues

* resolve issue with count_nonzero not being differentiable

* added more references

* fairness during finetuning

* finished bias mitigator wrapper

* added reference

* updated CHANGELOG and fixed minor docs issues

* move id tensors to embedding device

* fixed to use predetermined bias direction

* fixed minor doc errors

* snli reader registration issue

* fixed _pretrained from params issue

* fixed device issues

* evaluate bias mitigation initial commit

* finished evaluate bias mitigation

* handles multiline prediction files

* fixed minor bugs

* fixed minor bugs

* improved prediction diff JSON format

* forgot to resolve a conflict

* Refactored evaluate bias mitigation to use NLI metric

* Added SNLIPredictionsDiff class

* ensured dataloader is same for bias mitigated and baseline models

* finished evaluate bias mitigation

* Update CHANGELOG.md

* Replaced local data files with github raw content links

* Update allennlp/fairness/bias_mitigator_applicator.py

Co-authored-by: Pete <petew@allenai.org>

* deleted evaluate_bias_mitigation from git tracking

* removed evaluate-bias-mitigation instances from rest of repo

* addressed Akshita's comments

* moved bias mitigator applicator test to allennlp-models

* removed unnecessary files

Co-authored-by: Arjun Subramonian <arjuns@Arjuns-MacBook-Pro.local>
Co-authored-by: Arjun Subramonian <arjuns@ip-192-168-0-106.us-west-2.compute.internal>
Co-authored-by: Arjun Subramonian <arjuns@ip-192-168-0-108.us-west-2.compute.internal>
Co-authored-by: Arjun Subramonian <arjuns@ip-192-168-1-108.us-west-2.compute.internal>
Co-authored-by: Akshita Bhagia <akshita23bhagia@gmail.com>
Co-authored-by: Pete <petew@allenai.org>
---
 CHANGELOG.md                                  |    7 +-
 allennlp/fairness/__init__.py                 |   17 +-
 allennlp/fairness/bias_direction_wrappers.py  |  269 +++
 allennlp/fairness/bias_metrics.py             |    2 +
 .../fairness/bias_mitigator_applicator.py     |  114 ++
 allennlp/fairness/bias_mitigator_wrappers.py  |  266 +++
 allennlp/fairness/bias_mitigators.py          |    1 +
 allennlp/fairness/bias_utils.py               |  111 ++
 .../fairness/definitional_pairs.json          |   42 +
 test_fixtures/fairness/equalize_pairs.json    |  210 +++
 .../fairness/gender_specific_full.json        | 1443 +++++++++++++++++
 tests/fairness/bias_utils_test.py             |   79 +
 12 files changed, 2557 insertions(+), 4 deletions(-)
 create mode 100644 allennlp/fairness/bias_direction_wrappers.py
 create mode 100644 allennlp/fairness/bias_mitigator_applicator.py
 create mode 100644 allennlp/fairness/bias_mitigator_wrappers.py
 create mode 100644 allennlp/fairness/bias_utils.py
 create mode 100644 test_fixtures/fairness/definitional_pairs.json
 create mode 100644 test_fixtures/fairness/equalize_pairs.json
 create mode 100644 test_fixtures/fairness/gender_specific_full.json
 create mode 100644 tests/fairness/bias_utils_test.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3aba4a6d638..8778f696a95 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Added `TaskSuite` base class and command line functionality for running [`checklist`](https://github.com/marcotcr/checklist) test suites, along with implementations for `SentimentAnalysisSuite`, `QuestionAnsweringSuite`, and `TextualEntailmentSuite`. These can be found in the `allennlp.confidence_checks.task_checklists` module.
+- Added `BiasMitigatorApplicator`, which wraps any Model and mitigates biases by finetuning
+on a downstream task.
 - Added `allennlp diff` command to compute a diff on model checkpoints, analogous to what `git diff` does on two files.
 - Meta data defined by the class `allennlp.common.meta.Meta` is now saved in the serialization directory and archive file
   when training models from the command line. This is also now part of the `Archive` named tuple that's returned from `load_archive()`.
@@ -54,7 +56,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed `wandb` callback to work in distributed training.
 - Fixed `tqdm` logging into multiple files with `allennlp-optuna`.
 
-
 ## [v2.4.0](https://github.com/allenai/allennlp/releases/tag/v2.4.0) - 2021-04-22
 
 ### Added
@@ -80,8 +81,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add new dimension to the `interpret` module: influence functions via the `InfluenceInterpreter` base class, along with a concrete implementation: `SimpleInfluence`.
 - Added a `quiet` parameter to the `MultiProcessDataLoading` that disables `Tqdm` progress bars.
 - The test for distributed metrics now takes a parameter specifying how often you want to run it.
-- Created the fairness module and added four fairness metrics: `Independence`, `Separation`, and `Sufficiency`.
-- Added three bias metrics to the fairness module: `WordEmbeddingAssociationTest`, `EmbeddingCoherenceTest`, `NaturalLanguageInference`, and `AssociationWithoutGroundTruth`.
+- Created the fairness module and added three fairness metrics: `Independence`, `Separation`, and `Sufficiency`.
+- Added four bias metrics to the fairness module: `WordEmbeddingAssociationTest`, `EmbeddingCoherenceTest`, `NaturalLanguageInference`, and `AssociationWithoutGroundTruth`.
 - Added four bias direction methods (`PCABiasDirection`, `PairedPCABiasDirection`, `TwoMeansBiasDirection`, `ClassificationNormalBiasDirection`) and four bias mitigation methods (`LinearBiasMitigator`, `HardBiasMitigator`, `INLPBiasMitigator`, `OSCaRBiasMitigator`).
 
 ### Changed
diff --git a/allennlp/fairness/__init__.py b/allennlp/fairness/__init__.py
index 976ada2d076..02a02506eb1 100644
--- a/allennlp/fairness/__init__.py
+++ b/allennlp/fairness/__init__.py
@@ -3,7 +3,8 @@
 
 1. measure the fairness of models according to multiple definitions of fairness
 2. measure bias amplification
-3. debias embeddings during training time and post-processing
+3. mitigate bias in static and contextualized embeddings during training time and
+post-processing
 """
 
 from allennlp.fairness.fairness_metrics import Independence, Separation, Sufficiency
@@ -25,3 +26,17 @@
     INLPBiasMitigator,
     OSCaRBiasMitigator,
 )
+from allennlp.fairness.bias_utils import load_words, load_word_pairs
+from allennlp.fairness.bias_mitigator_applicator import BiasMitigatorApplicator
+from allennlp.fairness.bias_mitigator_wrappers import (
+    HardBiasMitigatorWrapper,
+    LinearBiasMitigatorWrapper,
+    INLPBiasMitigatorWrapper,
+    OSCaRBiasMitigatorWrapper,
+)
+from allennlp.fairness.bias_direction_wrappers import (
+    PCABiasDirectionWrapper,
+    PairedPCABiasDirectionWrapper,
+    TwoMeansBiasDirectionWrapper,
+    ClassificationNormalBiasDirectionWrapper,
+)
diff --git a/allennlp/fairness/bias_direction_wrappers.py b/allennlp/fairness/bias_direction_wrappers.py
new file mode 100644
index 00000000000..94cb4abe8ca
--- /dev/null
+++ b/allennlp/fairness/bias_direction_wrappers.py
@@ -0,0 +1,269 @@
+import torch
+from typing import Union, Optional
+from os import PathLike
+
+from allennlp.fairness.bias_direction import (
+    BiasDirection,
+    PCABiasDirection,
+    PairedPCABiasDirection,
+    TwoMeansBiasDirection,
+    ClassificationNormalBiasDirection,
+)
+from allennlp.fairness.bias_utils import load_word_pairs, load_words
+
+from allennlp.common import Registrable
+from allennlp.data.tokenizers.tokenizer import Tokenizer
+from allennlp.data import Vocabulary
+
+
+class BiasDirectionWrapper(Registrable):
+    """
+    Parent class for bias direction wrappers.
+    """
+
+    def __init__(self):
+        self.direction: BiasDirection = None
+        self.noise: float = None
+
+    def __call__(self, module):
+        raise NotImplementedError
+
+    def train(self, mode: bool = True):
+        """
+
+        # Parameters
+
+        mode : `bool`, optional (default=`True`)
+            Sets `requires_grad` to value of `mode` for bias direction.
+        """
+        self.direction.requires_grad = mode
+
+    def add_noise(self, t: torch.Tensor):
+        """
+
+        # Parameters
+
+        t : `torch.Tensor`
+            Tensor to which to add small amount of Gaussian noise.
+        """
+        return t + self.noise * torch.randn(t.size(), device=t.device)
+
+
+@BiasDirectionWrapper.register("pca")
+class PCABiasDirectionWrapper(BiasDirectionWrapper):
+    """
+
+    # Parameters
+
+    seed_words_file : `Union[PathLike, str]`
+        Path of file containing seed words.
+    tokenizer : `Tokenizer`
+        Tokenizer used to tokenize seed words.
+    direction_vocab : `Vocabulary`, optional (default=`None`)
+        Vocabulary of tokenizer. If `None`, assumes tokenizer is of
+        type `PreTrainedTokenizer` and uses tokenizer's `vocab` attribute.
+    namespace : `str`, optional (default=`"tokens"`)
+        Namespace of direction_vocab to use when tokenizing.
+        Disregarded when direction_vocab is `None`.
+    requires_grad : `bool`, optional (default=`False`)
+        Option to enable gradient calculation for bias direction.
+    noise : `float`, optional (default=`1e-10`)
+        To avoid numerical instability if embeddings are initialized uniformly.
+    """
+
+    def __init__(
+        self,
+        seed_words_file: Union[PathLike, str],
+        tokenizer: Tokenizer,
+        direction_vocab: Optional[Vocabulary] = None,
+        namespace: str = "tokens",
+        requires_grad: bool = False,
+        noise: float = 1e-10,
+    ):
+        self.ids = load_words(seed_words_file, tokenizer, direction_vocab, namespace)
+        self.direction = PCABiasDirection(requires_grad=requires_grad)
+        self.noise = noise
+
+    def __call__(self, module):
+        # embed subword token IDs and mean pool to get
+        # embedding of original word
+        ids_embeddings = []
+        for i in self.ids:
+            i = i.to(module.weight.device)
+            ids_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids_embeddings = torch.cat(ids_embeddings)
+
+        # adding trivial amount of noise
+        # to eliminate linear dependence amongst all embeddings
+        # when training first starts
+        ids_embeddings = self.add_noise(ids_embeddings)
+
+        return self.direction(ids_embeddings)
+
+
+@BiasDirectionWrapper.register("paired_pca")
+class PairedPCABiasDirectionWrapper(BiasDirectionWrapper):
+    """
+
+    # Parameters
+
+    seed_word_pairs_file : `Union[PathLike, str]`
+        Path of file containing seed word pairs.
+    tokenizer : `Tokenizer`
+        Tokenizer used to tokenize seed words.
+    direction_vocab : `Vocabulary`, optional (default=`None`)
+        Vocabulary of tokenizer. If `None`, assumes tokenizer is of
+        type `PreTrainedTokenizer` and uses tokenizer's `vocab` attribute.
+    namespace : `str`, optional (default=`"tokens"`)
+        Namespace of direction_vocab to use when tokenizing.
+        Disregarded when direction_vocab is `None`.
+    requires_grad : `bool`, optional (default=`False`)
+        Option to enable gradient calculation for bias direction.
+    noise : `float`, optional (default=`1e-10`)
+        To avoid numerical instability if embeddings are initialized uniformly.
+    """
+
+    def __init__(
+        self,
+        seed_word_pairs_file: Union[PathLike, str],
+        tokenizer: Tokenizer,
+        direction_vocab: Optional[Vocabulary] = None,
+        namespace: str = "tokens",
+        requires_grad: bool = False,
+        noise: float = 1e-10,
+    ):
+        self.ids1, self.ids2 = load_word_pairs(
+            seed_word_pairs_file, tokenizer, direction_vocab, namespace
+        )
+        self.direction = PairedPCABiasDirection(requires_grad=requires_grad)
+        self.noise = noise
+
+    def __call__(self, module):
+        # embed subword token IDs and mean pool to get
+        # embedding of original word
+        ids1_embeddings = []
+        for i in self.ids1:
+            i = i.to(module.weight.device)
+            ids1_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids2_embeddings = []
+        for i in self.ids2:
+            i = i.to(module.weight.device)
+            ids2_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids1_embeddings = torch.cat(ids1_embeddings)
+        ids2_embeddings = torch.cat(ids2_embeddings)
+
+        ids1_embeddings = self.add_noise(ids1_embeddings)
+        ids2_embeddings = self.add_noise(ids2_embeddings)
+
+        return self.direction(ids1_embeddings, ids2_embeddings)
+
+
+@BiasDirectionWrapper.register("two_means")
+class TwoMeansBiasDirectionWrapper(BiasDirectionWrapper):
+    """
+
+    # Parameters
+
+    seed_word_pairs_file : `Union[PathLike, str]`
+        Path of file containing seed word pairs.
+    tokenizer : `Tokenizer`
+        Tokenizer used to tokenize seed words.
+    direction_vocab : `Vocabulary`, optional (default=`None`)
+        Vocabulary of tokenizer. If `None`, assumes tokenizer is of
+        type `PreTrainedTokenizer` and uses tokenizer's `vocab` attribute.
+    namespace : `str`, optional (default=`"tokens"`)
+        Namespace of direction_vocab to use when tokenizing.
+        Disregarded when direction_vocab is `None`.
+    requires_grad : `bool`, optional (default=`False`)
+        Option to enable gradient calculation for bias direction.
+    noise : `float`, optional (default=`1e-10`)
+        To avoid numerical instability if embeddings are initialized uniformly.
+    """
+
+    def __init__(
+        self,
+        seed_word_pairs_file: Union[PathLike, str],
+        tokenizer: Tokenizer,
+        direction_vocab: Optional[Vocabulary] = None,
+        namespace: str = "tokens",
+        requires_grad: bool = False,
+        noise: float = 1e-10,
+    ):
+        self.ids1, self.ids2 = load_word_pairs(
+            seed_word_pairs_file, tokenizer, direction_vocab, namespace
+        )
+        self.direction = TwoMeansBiasDirection(requires_grad=requires_grad)
+        self.noise = noise
+
+    def __call__(self, module):
+        # embed subword token IDs and mean pool to get
+        # embedding of original word
+        ids1_embeddings = []
+        for i in self.ids1:
+            i = i.to(module.weight.device)
+            ids1_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids2_embeddings = []
+        for i in self.ids2:
+            i = i.to(module.weight.device)
+            ids2_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids1_embeddings = torch.cat(ids1_embeddings)
+        ids2_embeddings = torch.cat(ids2_embeddings)
+
+        ids1_embeddings = self.add_noise(ids1_embeddings)
+        ids2_embeddings = self.add_noise(ids2_embeddings)
+
+        return self.direction(ids1_embeddings, ids2_embeddings)
+
+
+@BiasDirectionWrapper.register("classification_normal")
+class ClassificationNormalBiasDirectionWrapper(BiasDirectionWrapper):
+    """
+
+    # Parameters
+
+    seed_word_pairs_file : `Union[PathLike, str]`
+        Path of file containing seed word pairs.
+    tokenizer : `Tokenizer`
+        Tokenizer used to tokenize seed words.
+    direction_vocab : `Vocabulary`, optional (default=`None`)
+        Vocabulary of tokenizer. If `None`, assumes tokenizer is of
+        type `PreTrainedTokenizer` and uses tokenizer's `vocab` attribute.
+    namespace : `str`, optional (default=`"tokens"`)
+        Namespace of direction_vocab to use when tokenizing.
+        Disregarded when direction_vocab is `None`.
+    noise : `float`, optional (default=`1e-10`)
+        To avoid numerical instability if embeddings are initialized uniformly.
+    """
+
+    def __init__(
+        self,
+        seed_word_pairs_file: Union[PathLike, str],
+        tokenizer: Tokenizer,
+        direction_vocab: Optional[Vocabulary] = None,
+        namespace: str = "tokens",
+        noise: float = 1e-10,
+    ):
+        self.ids1, self.ids2 = load_word_pairs(
+            seed_word_pairs_file, tokenizer, direction_vocab, namespace
+        )
+        self.direction = ClassificationNormalBiasDirection()
+        self.noise = noise
+
+    def __call__(self, module):
+        # embed subword token IDs and mean pool to get
+        # embedding of original word
+        ids1_embeddings = []
+        for i in self.ids1:
+            i = i.to(module.weight.device)
+            ids1_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids2_embeddings = []
+        for i in self.ids2:
+            i = i.to(module.weight.device)
+            ids2_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids1_embeddings = torch.cat(ids1_embeddings)
+        ids2_embeddings = torch.cat(ids2_embeddings)
+
+        ids1_embeddings = self.add_noise(ids1_embeddings)
+        ids2_embeddings = self.add_noise(ids2_embeddings)
+
+        return self.direction(ids1_embeddings, ids2_embeddings)
diff --git a/allennlp/fairness/bias_metrics.py b/allennlp/fairness/bias_metrics.py
index e7be2763c1c..3c38e35dc08 100644
--- a/allennlp/fairness/bias_metrics.py
+++ b/allennlp/fairness/bias_metrics.py
@@ -258,6 +258,8 @@ class NaturalLanguageInference(Metric):
     3. Threshold:tau (T:tau): A parameterized measure that reports the fraction
     of examples whose probability of neutral is above tau.
 
+    # Parameters
+
     neutral_label : `int`, optional (default=`2`)
         The discrete integer label corresponding to a neutral entailment prediction.
     taus : `List[float]`, optional (default=`[0.5, 0.7]`)
diff --git a/allennlp/fairness/bias_mitigator_applicator.py b/allennlp/fairness/bias_mitigator_applicator.py
new file mode 100644
index 00000000000..add604473f0
--- /dev/null
+++ b/allennlp/fairness/bias_mitigator_applicator.py
@@ -0,0 +1,114 @@
+"""
+A Model wrapper to mitigate biases in
+contextual embeddings during finetuning
+on a downstream task and test time.
+
+Based on: Dev, S., Li, T., Phillips, J.M., & Srikumar, V. (2020).
+[On Measuring and Mitigating Biased Inferences of Word Embeddings]
+(https://api.semanticscholar.org/CorpusID:201670701).
+ArXiv, abs/1908.09369.
+"""
+
+from overrides import overrides
+
+from allennlp.fairness.bias_mitigator_wrappers import BiasMitigatorWrapper
+
+from allennlp.common.lazy import Lazy
+from allennlp.data import Vocabulary
+from allennlp.models import Model
+from allennlp.nn.util import find_embedding_layer
+
+
+@Model.register("bias_mitigator_applicator")
+class BiasMitigatorApplicator(Model):
+    """
+    Wrapper class to apply bias mitigation to any pretrained Model.
+
+    # Parameters
+
+    vocab : `Vocabulary`
+        Vocabulary of base model.
+    base_model : `Model`
+        Base model for which to mitigate biases.
+    bias_mitigator : `Lazy[BiasMitigatorWrapper]`
+        Bias mitigator to apply to base model.
+    """
+
+    def __init__(
+        self,
+        vocab: Vocabulary,
+        base_model: Model,
+        bias_mitigator: Lazy[BiasMitigatorWrapper],
+        **kwargs
+    ):
+        super().__init__(vocab, **kwargs)
+
+        self.base_model = base_model
+        # want to keep bias mitigation hook during test time
+        embedding_layer = find_embedding_layer(self.base_model)
+
+        self.bias_mitigator = bias_mitigator.construct(embedding_layer=embedding_layer)
+        embedding_layer.register_forward_hook(self.bias_mitigator)
+
+        self.vocab = self.base_model.vocab
+        self._regularizer = self.base_model._regularizer
+
+    @overrides
+    def train(self, mode: bool = True):
+        super().train(mode)
+        self.base_model.train(mode)
+        # appropriately change requires_grad
+        # in bias mitigator and bias direction
+        # when train() and eval() are called
+        self.bias_mitigator.train(mode)
+
+    # Delegate Model function calls to base_model
+    # Currently doing this manually because difficult to
+    # dynamically forward __getattribute__ due to
+    # behind-the-scenes usage of dunder attributes by torch.nn.Module
+    # and both BiasMitigatorWrapper and base_model inheriting from Model
+    # Assumes Model is relatively stable
+    # TODO: adapt BiasMitigatorWrapper to changes in Model
+    @overrides
+    def forward(self, *args, **kwargs):
+        return self.base_model.forward(*args, **kwargs)
+
+    @overrides
+    def forward_on_instance(self, *args, **kwargs):
+        return self.base_model.forward_on_instance(*args, **kwargs)
+
+    @overrides
+    def forward_on_instances(self, *args, **kwargs):
+        return self.base_model.forward_on_instances(*args, **kwargs)
+
+    @overrides
+    def get_regularization_penalty(self, *args, **kwargs):
+        return self.base_model.get_regularization_penalty(*args, **kwargs)
+
+    @overrides
+    def get_parameters_for_histogram_logging(self, *args, **kwargs):
+        return self.base_model.get_parameters_for_histogram_logging(*args, **kwargs)
+
+    @overrides
+    def get_parameters_for_histogram_tensorboard_logging(self, *args, **kwargs):
+        return self.base_model.get_parameters_for_histogram_tensorboard_logging(*args, **kwargs)
+
+    @overrides
+    def make_output_human_readable(self, *args, **kwargs):
+        return self.base_model.make_output_human_readable(*args, **kwargs)
+
+    @overrides
+    def get_metrics(self, *args, **kwargs):
+        return self.base_model.get_metrics(*args, **kwargs)
+
+    @overrides
+    def _get_prediction_device(self, *args, **kwargs):
+        return self.base_model._get_prediction_device(*args, **kwargs)
+
+    @overrides
+    def _maybe_warn_for_unseparable_batches(self, *args, **kwargs):
+        return self.base_model._maybe_warn_for_unseparable_batches(*args, **kwargs)
+
+    @overrides
+    def extend_embedder_vocab(self, *args, **kwargs):
+        return self.base_model.extend_embedder_vocab(*args, **kwargs)
diff --git a/allennlp/fairness/bias_mitigator_wrappers.py b/allennlp/fairness/bias_mitigator_wrappers.py
new file mode 100644
index 00000000000..6351a6cceac
--- /dev/null
+++ b/allennlp/fairness/bias_mitigator_wrappers.py
@@ -0,0 +1,266 @@
+import torch
+from typing import Union, Optional
+from os import PathLike
+
+from allennlp.fairness.bias_mitigators import (
+    HardBiasMitigator,
+    LinearBiasMitigator,
+    INLPBiasMitigator,
+    OSCaRBiasMitigator,
+)
+from allennlp.fairness.bias_direction_wrappers import BiasDirectionWrapper
+from allennlp.fairness.bias_utils import load_word_pairs
+
+from allennlp.common import Registrable
+from allennlp.data.tokenizers.tokenizer import Tokenizer
+from allennlp.data import Vocabulary
+
+
+class BiasMitigatorWrapper(Registrable):
+    """
+    Parent class for bias mitigator wrappers.
+    """
+
+    def train(self, mode: bool = True):
+        """
+
+        # Parameters
+
+        mode : `bool`, optional (default=`True`)
+            Sets `requires_grad` to value of `mode` for bias mitigator
+            and associated bias direction.
+        """
+        raise NotImplementedError
+
+
+# TODO: remove equalize words from evaluation words
+@BiasMitigatorWrapper.register("hard")
+class HardBiasMitigatorWrapper(BiasMitigatorWrapper):
+    """
+
+    # Parameters
+
+    bias_direction : `BiasDirectionWrapper`
+        Bias direction used by mitigator.
+    embedding_layer : `torch.nn.Embedding`
+        Embedding layer of base model.
+    equalize_word_pairs_file : `Union[PathLike, str]`
+        Path of file containing equalize word pairs.
+    tokenizer : `Tokenizer`
+        Tokenizer used to tokenize equalize words.
+    mitigator_vocab : `Vocabulary`, optional (default=`None`)
+        Vocabulary of tokenizer. If `None`, assumes tokenizer is of
+        type `PreTrainedTokenizer` and uses tokenizer's `vocab` attribute.
+    namespace : `str`, optional (default=`"tokens"`)
+        Namespace of mitigator_vocab to use when tokenizing.
+        Disregarded when mitigator_vocab is `None`.
+    requires_grad : `bool`, optional (default=`True`)
+        Option to enable gradient calculation for bias mitigator.
+    """
+
+    def __init__(
+        self,
+        bias_direction: BiasDirectionWrapper,
+        embedding_layer: torch.nn.Embedding,
+        equalize_word_pairs_file: Union[PathLike, str],
+        tokenizer: Tokenizer,
+        mitigator_vocab: Optional[Vocabulary] = None,
+        namespace: str = "tokens",
+        requires_grad: bool = True,
+    ):
+        # use predetermined bias direction
+        self.bias_direction = bias_direction
+        self.predetermined_bias_direction = self.bias_direction(embedding_layer)
+        self.ids1, self.ids2 = load_word_pairs(
+            equalize_word_pairs_file, tokenizer, mitigator_vocab, namespace
+        )
+        self.mitigator = HardBiasMitigator(requires_grad=requires_grad)
+
+    def __call__(self, module, module_in, module_out):
+        """
+        Called as forward hook.
+        """
+        # embed subword token IDs and mean pool to get
+        # embedding of original word
+        ids1_embeddings = []
+        for i in self.ids1:
+            i = i.to(module.weight.device)
+            ids1_embeddings.append(
+                torch.mean(module.forward(i), dim=0, keepdim=True)
+            )  # forward() does not trigger hooks, thereby avoiding infinite recursion
+        ids2_embeddings = []
+        for i in self.ids2:
+            i = i.to(module.weight.device)
+            ids2_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids1_embeddings = torch.cat(ids1_embeddings)
+        ids2_embeddings = torch.cat(ids2_embeddings)
+
+        module_out_size = module_out.size()
+        # flatten tensor except for last dimension
+        module_out = module_out.flatten(end_dim=-2)
+        # only return bias-mitigated evaluation embeddings
+        module_out = self.mitigator(
+            module_out,
+            self.predetermined_bias_direction.to(module_out.device),
+            ids1_embeddings.to(module_out.device),
+            ids2_embeddings.to(module_out.device),
+        )[: module_out.size(0)]
+        return module_out.reshape(module_out_size)
+
+    def train(self, mode: bool = True):
+        self.mitigator.requires_grad = mode
+        self.bias_direction.train(mode)
+
+
+@BiasMitigatorWrapper.register("linear")
+class LinearBiasMitigatorWrapper(BiasMitigatorWrapper):
+    """
+
+    # Parameters
+
+    bias_direction : `BiasDirectionWrapper`
+        Bias direction used by mitigator.
+    embedding_layer : `torch.nn.Embedding`
+        Embedding layer of base model.
+    requires_grad : `bool`, optional (default=`True`)
+        Option to enable gradient calculation for bias mitigator.
+    """
+
+    def __init__(
+        self,
+        bias_direction: BiasDirectionWrapper,
+        embedding_layer: torch.nn.Embedding,
+        requires_grad: bool = True,
+    ):
+        # use predetermined bias direction
+        self.bias_direction = bias_direction
+        self.predetermined_bias_direction = self.bias_direction(embedding_layer)
+        self.mitigator = LinearBiasMitigator(requires_grad=requires_grad)
+
+    def __call__(self, module, module_in, module_out):
+        """
+        Called as forward hook.
+        """
+        module_out_size = module_out.size()
+        # flatten tensor except for last dimension
+        module_out = module_out.flatten(end_dim=-2)
+        module_out = self.mitigator(
+            module_out, self.predetermined_bias_direction.to(module_out.device)
+        )
+        return module_out.reshape(module_out_size)
+
+    def train(self, mode: bool = True):
+        self.mitigator.requires_grad = mode
+        self.bias_direction.train(mode)
+
+
+@BiasMitigatorWrapper.register("inlp")
+class INLPBiasMitigatorWrapper(BiasMitigatorWrapper):
+    """
+
+    # Parameters
+
+    embedding_layer : `torch.nn.Embedding`
+        Embedding layer of base model.
+    seed_word_pairs_file : `Union[PathLike, str]`
+        Path of file containing seed word pairs.
+    tokenizer : `Tokenizer`
+        Tokenizer used to tokenize seed words.
+    mitigator_vocab : `Vocabulary`, optional (default=`None`)
+        Vocabulary of tokenizer. If `None`, assumes tokenizer is of
+        type `PreTrainedTokenizer` and uses tokenizer's `vocab` attribute.
+    namespace : `str`, optional (default=`"tokens"`)
+        Namespace of mitigator_vocab to use when tokenizing.
+        Disregarded when mitigator_vocab is `None`.
+    """
+
+    def __init__(
+        self,
+        embedding_layer: torch.nn.Embedding,
+        seed_word_pairs_file: Union[PathLike, str],
+        tokenizer: Tokenizer,
+        mitigator_vocab: Optional[Vocabulary] = None,
+        namespace: str = "tokens",
+    ):
+        self.ids1, self.ids2 = load_word_pairs(
+            seed_word_pairs_file, tokenizer, mitigator_vocab, namespace
+        )
+        self.mitigator = INLPBiasMitigator()
+
+    def __call__(self, module, module_in, module_out):
+        """
+        Called as forward hook.
+        """
+        # embed subword token IDs and mean pool to get
+        # embedding of original word
+        ids1_embeddings = []
+        for i in self.ids1:
+            i = i.to(module.weight.device)
+            ids1_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids2_embeddings = []
+        for i in self.ids2:
+            i = i.to(module.weight.device)
+            ids2_embeddings.append(torch.mean(module.forward(i), dim=0, keepdim=True))
+        ids1_embeddings = torch.cat(ids1_embeddings)
+        ids2_embeddings = torch.cat(ids2_embeddings)
+
+        module_out_size = module_out.size()
+        # flatten tensor except for last dimension
+        module_out = module_out.flatten(end_dim=-2)
+        module_out = self.mitigator(
+            module_out, ids1_embeddings.to(module_out.device), ids2_embeddings.to(module_out.device)
+        )
+        return module_out.reshape(module_out_size)
+
+    def train(self, mode: bool = True):
+        pass
+
+
+@BiasMitigatorWrapper.register("oscar")
+class OSCaRBiasMitigatorWrapper(BiasMitigatorWrapper):
+    """
+
+    # Parameters
+
+    bias_direction1 : `BiasDirectionWrapper`
+        Bias direction of first concept subspace used by mitigator.
+    bias_direction2 : `BiasDirectionWrapper`
+        Bias direction of second concept subspace used by mitigator.
+    embedding_layer : `torch.nn.Embedding`
+        Embedding layer of base model.
+    requires_grad : `bool`, optional (default=`True`)
+        Option to enable gradient calculation for bias mitigator.
+    """
+
+    def __init__(
+        self,
+        bias_direction1: BiasDirectionWrapper,
+        bias_direction2: BiasDirectionWrapper,
+        embedding_layer: torch.nn.Embedding,
+        requires_grad: bool = True,
+    ):
+        # use predetermined bias directions
+        self.bias_direction1 = bias_direction1
+        self.predetermined_bias_direction1 = self.bias_direction1(embedding_layer)
+        self.bias_direction2 = bias_direction2(embedding_layer)
+        self.predetermined_bias_direction2 = self.bias_direction2(embedding_layer)
+        self.mitigator = OSCaRBiasMitigator(requires_grad=requires_grad)
+
+    def __call__(self, module, module_in, module_out):
+        """
+        Called as forward hook.
+        """
+        module_out_size = module_out.size()
+        # flatten tensor except for last dimension
+        module_out = module_out.flatten(end_dim=-2)
+        module_out = self.mitigator(
+            module_out,
+            self.predetermined_bias_direction1.to(module_out.device),
+            self.predetermined_bias_direction2.to(module_out.device),
+        )
+        return module_out.reshape(module_out_size)
+
+    def train(self, mode: bool = True):
+        self.mitigator.requires_grad = mode
+        self.bias_direction1.train(mode)
+        self.bias_direction2.train(mode)
diff --git a/allennlp/fairness/bias_mitigators.py b/allennlp/fairness/bias_mitigators.py
index 113a6472b9b..d3c0f089733 100644
--- a/allennlp/fairness/bias_mitigators.py
+++ b/allennlp/fairness/bias_mitigators.py
@@ -7,6 +7,7 @@
 import numpy as np
 import scipy
 import sklearn
+
 from allennlp.common.checks import ConfigurationError
 
 
diff --git a/allennlp/fairness/bias_utils.py b/allennlp/fairness/bias_utils.py
new file mode 100644
index 00000000000..c4bbb33479e
--- /dev/null
+++ b/allennlp/fairness/bias_utils.py
@@ -0,0 +1,111 @@
+import torch
+import json
+from os import PathLike
+from typing import List, Tuple, Union, Optional
+
+from allennlp.common.file_utils import cached_path
+from allennlp.data import Vocabulary
+from allennlp.data.tokenizers.tokenizer import Tokenizer
+
+
+def _convert_word_to_ids_tensor(word, tokenizer, vocab, namespace, all_cases):
+    # function does NOT strip special tokens if tokenizer adds them
+    if all_cases:
+        words_list = [word.lower(), word.title(), word.upper()]
+    else:
+        words_list = [word]
+    ids = []
+    for w in words_list:
+        # if vocab is None, use tokenizer vocab (only works for Huggingface PreTrainedTokenizer)
+        if vocab:
+            tokens = tokenizer.tokenize(w)
+            ids.append(torch.tensor([vocab.get_token_index(t.text, namespace) for t in tokens]))
+        else:
+            ids.append(torch.tensor(tokenizer.tokenizer(w)["input_ids"]))
+    return ids
+
+
+def load_words(
+    fname: Union[str, PathLike],
+    tokenizer: Tokenizer,
+    vocab: Optional[Vocabulary] = None,
+    namespace: str = "tokens",
+    all_cases: bool = True,
+) -> List[torch.Tensor]:
+    """
+    This function loads a list of words from a file,
+    tokenizes each word into subword tokens, and converts the
+    tokens into IDs.
+
+    # Parameters
+
+    fname : `Union[str, PathLike]`
+        Name of file containing list of words to load.
+    tokenizer : `Tokenizer`
+        Tokenizer to tokenize words in file.
+    vocab : `Vocabulary`, optional (default=`None`)
+        Vocabulary of tokenizer. If `None`, assumes tokenizer is of
+        type `PreTrainedTokenizer` and uses tokenizer's `vocab` attribute.
+    namespace : `str`
+        Namespace of vocab to use when tokenizing.
+    all_cases : `bool`, optional (default=`True`)
+        Whether to tokenize lower, title, and upper cases of each word.
+
+    # Returns
+
+    word_ids : `List[torch.Tensor]`
+        List of tensors containing the IDs of subword tokens for
+        each word in the file.
+    """
+    word_ids = []
+    with open(cached_path(fname)) as f:
+        words = json.load(f)
+        for w in words:
+            word_ids.extend(_convert_word_to_ids_tensor(w, tokenizer, vocab, namespace, all_cases))
+    return word_ids
+
+
+def load_word_pairs(
+    fname: Union[str, PathLike],
+    tokenizer: Tokenizer,
+    vocab: Optional[Vocabulary] = None,
+    namespace: str = "token",
+    all_cases: bool = True,
+) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+    """
+    This function loads a list of pairs of words from a file,
+    tokenizes each word into subword tokens, and converts the
+    tokens into IDs.
+
+    # Parameters
+
+    fname : `Union[str, PathLike]`
+        Name of file containing list of pairs of words to load.
+    tokenizer : `Tokenizer`
+        Tokenizer to tokenize words in file.
+    vocab : `Vocabulary`, optional (default=`None`)
+        Vocabulary of tokenizer. If `None`, assumes tokenizer is of
+        type `PreTrainedTokenizer` and uses tokenizer's `vocab` attribute.
+    namespace : `str`
+        Namespace of vocab to use when tokenizing.
+    all_cases : `bool`, optional (default=`True`)
+        Whether to tokenize lower, title, and upper cases of each word.
+
+    # Returns
+
+    word_ids : `Tuple[List[torch.Tensor], List[torch.Tensor]]`
+        Pair of lists of tensors containing the IDs of subword tokens for
+        words in the file.
+    """
+    word_ids1 = []
+    word_ids2 = []
+    with open(cached_path(fname)) as f:
+        words = json.load(f)
+        for w1, w2 in words:
+            word_ids1.extend(
+                _convert_word_to_ids_tensor(w1, tokenizer, vocab, namespace, all_cases)
+            )
+            word_ids2.extend(
+                _convert_word_to_ids_tensor(w2, tokenizer, vocab, namespace, all_cases)
+            )
+    return word_ids1, word_ids2
diff --git a/test_fixtures/fairness/definitional_pairs.json b/test_fixtures/fairness/definitional_pairs.json
new file mode 100644
index 00000000000..37ae95e9876
--- /dev/null
+++ b/test_fixtures/fairness/definitional_pairs.json
@@ -0,0 +1,42 @@
+[
+    [
+        "woman",
+        "man"
+    ],
+    [
+        "girl",
+        "boy"
+    ],
+    [
+        "she",
+        "he"
+    ],
+    [
+        "mother",
+        "father"
+    ],
+    [
+        "daughter",
+        "son"
+    ],
+    [
+        "gal",
+        "guy"
+    ],
+    [
+        "female",
+        "male"
+    ],
+    [
+        "her",
+        "his"
+    ],
+    [
+        "herself",
+        "himself"
+    ],
+    [
+        "Mary",
+        "John"
+    ]
+]
\ No newline at end of file
diff --git a/test_fixtures/fairness/equalize_pairs.json b/test_fixtures/fairness/equalize_pairs.json
new file mode 100644
index 00000000000..6fbdacefaed
--- /dev/null
+++ b/test_fixtures/fairness/equalize_pairs.json
@@ -0,0 +1,210 @@
+[
+    [
+        "monastery",
+        "convent"
+    ],
+    [
+        "spokesman",
+        "spokeswoman"
+    ],
+    [
+        "Catholic_priest",
+        "nun"
+    ],
+    [
+        "Dad",
+        "Mom"
+    ],
+    [
+        "Men",
+        "Women"
+    ],
+    [
+        "councilman",
+        "councilwoman"
+    ],
+    [
+        "grandpa",
+        "grandma"
+    ],
+    [
+        "grandsons",
+        "granddaughters"
+    ],
+    [
+        "prostate_cancer",
+        "ovarian_cancer"
+    ],
+    [
+        "testosterone",
+        "estrogen"
+    ],
+    [
+        "uncle",
+        "aunt"
+    ],
+    [
+        "wives",
+        "husbands"
+    ],
+    [
+        "Father",
+        "Mother"
+    ],
+    [
+        "Grandpa",
+        "Grandma"
+    ],
+    [
+        "He",
+        "She"
+    ],
+    [
+        "boy",
+        "girl"
+    ],
+    [
+        "boys",
+        "girls"
+    ],
+    [
+        "brother",
+        "sister"
+    ],
+    [
+        "brothers",
+        "sisters"
+    ],
+    [
+        "businessman",
+        "businesswoman"
+    ],
+    [
+        "chairman",
+        "chairwoman"
+    ],
+    [
+        "colt",
+        "filly"
+    ],
+    [
+        "congressman",
+        "congresswoman"
+    ],
+    [
+        "dad",
+        "mom"
+    ],
+    [
+        "dads",
+        "moms"
+    ],
+    [
+        "dudes",
+        "gals"
+    ],
+    [
+        "ex_girlfriend",
+        "ex_boyfriend"
+    ],
+    [
+        "father",
+        "mother"
+    ],
+    [
+        "fatherhood",
+        "motherhood"
+    ],
+    [
+        "fathers",
+        "mothers"
+    ],
+    [
+        "fella",
+        "granny"
+    ],
+    [
+        "fraternity",
+        "sorority"
+    ],
+    [
+        "gelding",
+        "mare"
+    ],
+    [
+        "gentleman",
+        "lady"
+    ],
+    [
+        "gentlemen",
+        "ladies"
+    ],
+    [
+        "grandfather",
+        "grandmother"
+    ],
+    [
+        "grandson",
+        "granddaughter"
+    ],
+    [
+        "he",
+        "she"
+    ],
+    [
+        "himself",
+        "herself"
+    ],
+    [
+        "his",
+        "her"
+    ],
+    [
+        "king",
+        "queen"
+    ],
+    [
+        "kings",
+        "queens"
+    ],
+    [
+        "male",
+        "female"
+    ],
+    [
+        "males",
+        "females"
+    ],
+    [
+        "man",
+        "woman"
+    ],
+    [
+        "men",
+        "women"
+    ],
+    [
+        "nephew",
+        "niece"
+    ],
+    [
+        "prince",
+        "princess"
+    ],
+    [
+        "schoolboy",
+        "schoolgirl"
+    ],
+    [
+        "son",
+        "daughter"
+    ],
+    [
+        "sons",
+        "daughters"
+    ],
+    [
+        "twin_brother",
+        "twin_sister"
+    ]
+]
\ No newline at end of file
diff --git a/test_fixtures/fairness/gender_specific_full.json b/test_fixtures/fairness/gender_specific_full.json
new file mode 100644
index 00000000000..a7f0c73ce68
--- /dev/null
+++ b/test_fixtures/fairness/gender_specific_full.json
@@ -0,0 +1,1443 @@
+[
+    "he",
+    "his",
+    "He",
+    "her",
+    "she",
+    "him",
+    "She",
+    "man",
+    "women",
+    "men",
+    "His",
+    "woman",
+    "spokesman",
+    "wife",
+    "himself",
+    "son",
+    "mother",
+    "father",
+    "chairman",
+    "daughter",
+    "husband",
+    "guy",
+    "girls",
+    "girl",
+    "Her",
+    "boy",
+    "King",
+    "boys",
+    "brother",
+    "Chairman",
+    "spokeswoman",
+    "female",
+    "sister",
+    "Women",
+    "Man",
+    "male",
+    "herself",
+    "Lions",
+    "Lady",
+    "brothers",
+    "dad",
+    "actress",
+    "mom",
+    "sons",
+    "girlfriend",
+    "Kings",
+    "Men",
+    "daughters",
+    "Prince",
+    "Queen",
+    "teenager",
+    "lady",
+    "Bulls",
+    "boyfriend",
+    "sisters",
+    "Colts",
+    "mothers",
+    "Sir",
+    "king",
+    "businessman",
+    "Boys",
+    "grandmother",
+    "grandfather",
+    "deer",
+    "cousin",
+    "Woman",
+    "ladies",
+    "Girls",
+    "Father",
+    "uncle",
+    "PA",
+    "Boy",
+    "Councilman",
+    "mum",
+    "Brothers",
+    "MA",
+    "males",
+    "Girl",
+    "Mom",
+    "Guy",
+    "Queens",
+    "congressman",
+    "Dad",
+    "Mother",
+    "grandson",
+    "twins",
+    "bull",
+    "queen",
+    "businessmen",
+    "wives",
+    "widow",
+    "nephew",
+    "bride",
+    "females",
+    "aunt",
+    "Congressman",
+    "prostate_cancer",
+    "lesbian",
+    "chairwoman",
+    "fathers",
+    "Son",
+    "moms",
+    "Ladies",
+    "maiden",
+    "granddaughter",
+    "younger_brother",
+    "Princess",
+    "Guys",
+    "lads",
+    "Ma",
+    "Sons",
+    "lion",
+    "Bachelor",
+    "gentleman",
+    "fraternity",
+    "bachelor",
+    "niece",
+    "Lion",
+    "Sister",
+    "bulls",
+    "husbands",
+    "prince",
+    "colt",
+    "salesman",
+    "Bull",
+    "Sisters",
+    "hers",
+    "dude",
+    "Spokesman",
+    "beard",
+    "filly",
+    "Actress",
+    "Him",
+    "princess",
+    "Brother",
+    "lesbians",
+    "councilman",
+    "actresses",
+    "Viagra",
+    "gentlemen",
+    "stepfather",
+    "Deer",
+    "monks",
+    "Beard",
+    "Uncle",
+    "ex_girlfriend",
+    "lad",
+    "sperm",
+    "Daddy",
+    "testosterone",
+    "MAN",
+    "Female",
+    "nephews",
+    "maid",
+    "daddy",
+    "mare",
+    "fiance",
+    "Wife",
+    "fiancee",
+    "kings",
+    "dads",
+    "waitress",
+    "Male",
+    "maternal",
+    "heroine",
+    "feminist",
+    "Mama",
+    "nieces",
+    "girlfriends",
+    "Councilwoman",
+    "sir",
+    "stud",
+    "Mothers",
+    "mistress",
+    "lions",
+    "estranged_wife",
+    "womb",
+    "Brotherhood",
+    "Statesman",
+    "grandma",
+    "maternity",
+    "estrogen",
+    "ex_boyfriend",
+    "widows",
+    "gelding",
+    "diva",
+    "teenage_girls",
+    "nuns",
+    "Daughter",
+    "czar",
+    "ovarian_cancer",
+    "HE",
+    "Monk",
+    "countrymen",
+    "Grandma",
+    "teenage_girl",
+    "penis",
+    "bloke",
+    "nun",
+    "Husband",
+    "brides",
+    "housewife",
+    "spokesmen",
+    "suitors",
+    "menopause",
+    "monastery",
+    "patriarch",
+    "Beau",
+    "motherhood",
+    "brethren",
+    "stepmother",
+    "Dude",
+    "prostate",
+    "Moms",
+    "hostess",
+    "twin_brother",
+    "Colt",
+    "schoolboy",
+    "eldest",
+    "brotherhood",
+    "Godfather",
+    "fillies",
+    "stepson",
+    "congresswoman",
+    "Chairwoman",
+    "Daughters",
+    "uncles",
+    "witch",
+    "Mommy",
+    "monk",
+    "viagra",
+    "paternity",
+    "suitor",
+    "chick",
+    "Pa",
+    "fianc\u00e9",
+    "sorority",
+    "macho",
+    "Spokeswoman",
+    "businesswoman",
+    "eldest_son",
+    "gal",
+    "statesman",
+    "schoolgirl",
+    "fathered",
+    "goddess",
+    "hubby",
+    "mares",
+    "stepdaughter",
+    "blokes",
+    "dudes",
+    "socialite",
+    "strongman",
+    "Witch",
+    "fianc\u00e9e",
+    "uterus",
+    "grandsons",
+    "Bride",
+    "studs",
+    "mama",
+    "Aunt",
+    "godfather",
+    "hens",
+    "hen",
+    "mommy",
+    "Babe",
+    "estranged_husband",
+    "Fathers",
+    "elder_brother",
+    "boyhood",
+    "baritone",
+    "Diva",
+    "Lesbian",
+    "grandmothers",
+    "grandpa",
+    "boyfriends",
+    "feminism",
+    "countryman",
+    "stallion",
+    "heiress",
+    "queens",
+    "Grandpa",
+    "witches",
+    "aunts",
+    "semen",
+    "fella",
+    "granddaughters",
+    "chap",
+    "knight",
+    "widower",
+    "Maiden",
+    "salesmen",
+    "convent",
+    "KING",
+    "vagina",
+    "beau",
+    "babe",
+    "HIS",
+    "beards",
+    "handyman",
+    "twin_sister",
+    "maids",
+    "gals",
+    "housewives",
+    "Gentlemen",
+    "horsemen",
+    "Businessman",
+    "obstetrics",
+    "fatherhood",
+    "beauty_queen",
+    "councilwoman",
+    "princes",
+    "matriarch",
+    "colts",
+    "manly",
+    "ma",
+    "fraternities",
+    "Spokesmen",
+    "pa",
+    "fellas",
+    "Gentleman",
+    "councilmen",
+    "dowry",
+    "barbershop",
+    "Monks",
+    "WOMAN",
+    "fraternal",
+    "ballerina",
+    "manhood",
+    "Dads",
+    "heroines",
+    "granny",
+    "gynecologist",
+    "princesses",
+    "Goddess",
+    "yo",
+    "Granny",
+    "knights",
+    "eldest_daughter",
+    "HER",
+    "underage_girls",
+    "masculinity",
+    "Girlfriend",
+    "bro",
+    "Grandmother",
+    "grandfathers",
+    "crown_prince",
+    "Restless",
+    "paternal",
+    "Queen_Mother",
+    "Boyfriend",
+    "womens",
+    "Males",
+    "SHE",
+    "Countess",
+    "stepchildren",
+    "Belles",
+    "bachelors",
+    "matron",
+    "momma",
+    "Legs",
+    "maidens",
+    "goddesses",
+    "landlady",
+    "sisterhood",
+    "Grandfather",
+    "Fraternity",
+    "Majesty",
+    "Babes",
+    "lass",
+    "maternal_grandmother",
+    "blondes",
+    "ma'am",
+    "Womens",
+    "divorcee",
+    "Momma",
+    "fathering",
+    "Effie",
+    "Lad",
+    "womanhood",
+    "missus",
+    "Sisterhood",
+    "granddad",
+    "Mens",
+    "papa",
+    "gf",
+    "sis",
+    "Husbands",
+    "Hen",
+    "womanizer",
+    "gynecological",
+    "stepsister",
+    "Handsome",
+    "Prince_Charming",
+    "BOY",
+    "stepdad",
+    "teen_ager",
+    "GIRL",
+    "dame",
+    "Sorority",
+    "beauty_pageants",
+    "raspy",
+    "harem",
+    "maternal_grandfather",
+    "Hes",
+    "deliveryman",
+    "septuagenarian",
+    "damsel",
+    "paternal_grandmother",
+    "paramour",
+    "paternal_grandparents",
+    "Nun",
+    "DAD",
+    "mothering",
+    "shes",
+    "HE_'S",
+    "Nuns",
+    "teenage_daughters",
+    "auntie",
+    "widowed_mother",
+    "Girlfriends",
+    "FATHER",
+    "virile",
+    "COUPLE",
+    "grandmas",
+    "Hubby",
+    "nan",
+    "vixen",
+    "Joan_Crawford",
+    "stepdaughters",
+    "endometrial_cancer",
+    "stepsons",
+    "loins",
+    "Grandson",
+    "Mitchells",
+    "erections",
+    "Matron",
+    "Fella",
+    "daddies",
+    "ter",
+    "Sweetie",
+    "Dudes",
+    "Princesses",
+    "Lads",
+    "lioness",
+    "Mamma",
+    "virility",
+    "bros",
+    "womenfolk",
+    "Heir",
+    "BROTHERS",
+    "manliness",
+    "patriarchs",
+    "earl",
+    "sisterly",
+    "Whore",
+    "Gynaecology",
+    "countess",
+    "convents",
+    "Oratory",
+    "witch_doctor",
+    "mamas",
+    "yah",
+    "aunty",
+    "aunties",
+    "Heiress",
+    "lasses",
+    "Breasts",
+    "fairer_sex",
+    "sorority_sisters",
+    "WIFE",
+    "Laurels",
+    "penile",
+    "nuh",
+    "mah",
+    "toms",
+    "mam",
+    "Granddad",
+    "premenopausal_women",
+    "Granddaddy",
+    "nana",
+    "coeds",
+    "dames",
+    "herdsman",
+    "Mammy",
+    "Fellas",
+    "Niece",
+    "menfolk",
+    "Grandad",
+    "bloods",
+    "Gramps",
+    "damsels",
+    "Granddaughter",
+    "mamma",
+    "concubine",
+    "Oros",
+    "Blarney",
+    "filial",
+    "broads",
+    "Ethel_Kennedy",
+    "ACTRESS",
+    "Tit",
+    "fianc",
+    "Hunk",
+    "Night_Shift",
+    "wifey",
+    "Lothario",
+    "Holy_Roman_Emperor",
+    "horse_breeder",
+    "grandnephew",
+    "Lewises",
+    "Muscular",
+    "feminist_movement",
+    "Sanan",
+    "women\u00e2_\u20ac_\u2122",
+    "Fiancee",
+    "dowries",
+    "Carmelite",
+    "rah",
+    "n_roller",
+    "bay_filly",
+    "belles",
+    "Uncles",
+    "PRINCESS",
+    "womans",
+    "Homeboy",
+    "Blokes",
+    "Charmer",
+    "codger",
+    "Delta_Zeta",
+    "courtesans",
+    "grandaughter",
+    "SISTER",
+    "Highness",
+    "grandbabies",
+    "crone",
+    "Skip_Away",
+    "noblewoman",
+    "bf",
+    "jane",
+    "philandering_husband",
+    "Sisqo",
+    "mammy",
+    "daugher",
+    "director_Skip_Bertman",
+    "DAUGHTER",
+    "Royal_Highness",
+    "mannish",
+    "spinsters",
+    "Missus",
+    "madame",
+    "Godfathers",
+    "saleswomen",
+    "beaus",
+    "Risha",
+    "luh",
+    "sah",
+    "negligee",
+    "Women\u00e2_\u20ac_\u2122",
+    "Hos",
+    "salesgirl",
+    "grandmom",
+    "Grandmas",
+    "Lawsons",
+    "countrywomen",
+    "Booby",
+    "darlin",
+    "Sheiks",
+    "boyz",
+    "wifes",
+    "Bayi",
+    "Il_Duce",
+    "\u00e2_\u20ac_\u0153My",
+    "fem",
+    "daugther",
+    "Potti",
+    "hussy",
+    "tch",
+    "Gelding",
+    "stemmed_roses",
+    "Damson",
+    "puh",
+    "Tylers",
+    "neice",
+    "Mutha",
+    "GRANDMOTHER",
+    "youse",
+    "spurned_lover",
+    "mae",
+    "Britt_Ekland",
+    "clotheshorse",
+    "Carlita_Kilpatrick",
+    "Cambest",
+    "Pretty_Polly",
+    "banshees",
+    "male_chauvinist",
+    "Arliss",
+    "mommas",
+    "maidservant",
+    "Gale_Harold",
+    "Little_Bo_Peep",
+    "Cleavers",
+    "hags",
+    "blowsy",
+    "Queen_Elizabeth_I.",
+    "lassies",
+    "papas",
+    "BABE",
+    "ugly_ducklings",
+    "Jims",
+    "hellion",
+    "Beautician",
+    "coalminer",
+    "relaxin",
+    "El_Mahroug",
+    "Victoria_Secret_Angel",
+    "shepherdess",
+    "Mosco",
+    "Slacks",
+    "nanna",
+    "wifely",
+    "tomboys",
+    "LAH",
+    "hast",
+    "apo",
+    "Kaplans",
+    "milkmaid",
+    "Robin_Munis",
+    "John_Barleycorn",
+    "royal_highness",
+    "Meanie",
+    "NAH",
+    "trollop",
+    "roh",
+    "Jewess",
+    "Sheik_Hamad",
+    "mumsy",
+    "Big_Pussy",
+    "chil_dren",
+    "Aunt_Bea",
+    "basso",
+    "sista",
+    "girlies",
+    "nun_Sister",
+    "chica",
+    "Bubbas",
+    "massa",
+    "Southern_belles",
+    "Nephews",
+    "castrations",
+    "Mister_Ed",
+    "Grandsons",
+    "Calaf",
+    "Malachy_McCourt",
+    "Shamash",
+    "hey_hey",
+    "Harmen",
+    "sonofabitch",
+    "Donovans",
+    "Grannie",
+    "Kalinka",
+    "hisself",
+    "Devean",
+    "goatherd",
+    "hinds",
+    "El_Corredor",
+    "Kens",
+    "notorious_womanizer",
+    "goh",
+    "Mommas",
+    "washerwoman",
+    "Samaira",
+    "Coo_Coo",
+    "Governess",
+    "grandsire",
+    "PRINCE_WILLIAM",
+    "gramma",
+    "him.He",
+    "Coptic_priest",
+    "Corbie",
+    "Kennys",
+    "thathe",
+    "Pa_Pa",
+    "Bristols",
+    "Hotep",
+    "snowy_haired",
+    "El_Prado_Ire",
+    "Girl_hitmaker",
+    "Hurleys",
+    "St._Meinrad",
+    "sexually_perverted",
+    "authoress",
+    "Prudie",
+    "raven_haired_beauty",
+    "Bonos",
+    "domestic_shorthair",
+    "brothas",
+    "nymphet",
+    "Neelma",
+    "Seita",
+    "stud_muffin",
+    "St._Judes",
+    "yenta",
+    "bare_shouldered",
+    "Pinkney_Sr.",
+    "PRINCE_CHARLES",
+    "Bisutti",
+    "sistas",
+    "Blanche_Devereaux",
+    "Momoa",
+    "Quiff",
+    "Scotswoman",
+    "balaclava_clad_men",
+    "Louis_Leakey",
+    "dearie",
+    "vacuum_cleaner_salesman",
+    "grandads",
+    "postulant",
+    "SARAH_JESSICA_PARKER",
+    "AUNT",
+    "Prince_Dauntless",
+    "Dalys",
+    "Darkie",
+    "Czar_Nicholas",
+    "Lion_Hearted",
+    "Boy_recliner",
+    "baby_mamas",
+    "giantess",
+    "Lawd",
+    "GRANNY",
+    "fianc_e",
+    "Bilqis",
+    "WCTU",
+    "famly",
+    "Ellas",
+    "feminazis",
+    "Pentheus",
+    "MAMAS",
+    "Town_Criers",
+    "Saggy",
+    "youngman",
+    "grandam",
+    "divorc\u00e9",
+    "bosomed",
+    "roon",
+    "Simmentals",
+    "eponymous_heroine",
+    "LEYLAND",
+    "REE'",
+    "cain't",
+    "Evelynn",
+    "WAH'",
+    "sistah",
+    "Horners",
+    "Elsie_Poncher",
+    "Coochie",
+    "rat_terriers",
+    "Limousins",
+    "Buchinski",
+    "Schicchi",
+    "Carpitcher",
+    "Khwezi",
+    "HAH'",
+    "Shazza",
+    "Mackeson",
+    "ROH'",
+    "kuya",
+    "novice_nun",
+    "Shei",
+    "Elmasri",
+    "ladykiller",
+    "6yo",
+    "Yenta",
+    "SHEL",
+    "pater",
+    "Souse",
+    "Tahirah",
+    "comedian_Rodney_Dangerfield",
+    "Shottle",
+    "carryin",
+    "Sath",
+    "fa'afafine",
+    "royal_consort",
+    "hus_band",
+    "maternal_uncles",
+    "dressing_provocatively",
+    "dreamgirl",
+    "millionaire_industrialist",
+    "Georgie_Girl",
+    "Must_Be_Obeyed",
+    "joh",
+    "Arabian_stallion",
+    "ahr",
+    "mso_para_margin_0in",
+    "SOO'",
+    "Biddles",
+    "Chincoteague_Volunteer_Fire",
+    "Lisa_Miceli",
+    "gorgeous_brunette",
+    "fianc\u017d",
+    "Moved_fluently",
+    "Afternoon_Deelites",
+    "biker_dude",
+    "Vito_Spatafore",
+    "MICK_JAGGER",
+    "Adesida",
+    "Reineman",
+    "witz",
+    "Djamila",
+    "Glenroe",
+    "daddys",
+    "Romanzi",
+    "gentlewomen",
+    "Dandie_Dinmont_terrier",
+    "Excess_Ire",
+    "By_SYVJ_Staff",
+    "zan",
+    "CONFESSIONS",
+    "Magees",
+    "wimmin",
+    "tash",
+    "Theatrical_Ire",
+    "Prince_Charmings",
+    "chocolate_eclair",
+    "bron",
+    "daughers",
+    "Felly",
+    "fiftyish",
+    "Spritely",
+    "GRANDPA",
+    "distaffer",
+    "Norbertines",
+    "DAH'",
+    "leader_Muammar_Gadaffi",
+    "swains",
+    "Prince_Tomohito",
+    "Honneur",
+    "Soeur",
+    "jouster",
+    "Pharaoh_Amenhotep_III",
+    "QUEEN_ELIZABETH_II",
+    "Ne'er",
+    "Galileo_Ire",
+    "Fools_Crow",
+    "Lannisters",
+    "Devines",
+    "gonzales",
+    "columnist_Ann_Landers",
+    "Moseleys",
+    "hiz",
+    "busch",
+    "roastee",
+    "toyboys",
+    "Sheffields",
+    "grandaunt",
+    "Galvins",
+    "Giongo",
+    "geh",
+    "flame_haired_actress",
+    "Grammarian",
+    "Greg_Evigan",
+    "frontierswoman",
+    "Debele",
+    "rabs",
+    "nymphets",
+    "aai",
+    "BREE",
+    "Shaqs",
+    "ZAY",
+    "pappa",
+    "Housa",
+    "refrigerator_repairman",
+    "artificial_inseminations",
+    "chickie",
+    "Rippa",
+    "teenager_Tracy_Turnblad",
+    "homebred_colt",
+    "Abigaille",
+    "hen_pecked_husband",
+    "businesman",
+    "her.She",
+    "Kaikeyi",
+    "Stittsworth",
+    "self_proclaimed_redneck",
+    "Khella",
+    "NeW",
+    "Evers_Swindell",
+    "Asmerom_Gebreselassie",
+    "Boy_recliners",
+    "Cliff_Claven",
+    "Legge_Bourke",
+    "Costos",
+    "d'_honneur",
+    "sistahs",
+    "Cabble",
+    "sahn",
+    "CROW_AGENCY_Mont",
+    "jezebel",
+    "Harrolds",
+    "ROSARIO_DAWSON",
+    "INXS_frontman_Michael_Hutchence",
+    "Gursikh",
+    "Dadas",
+    "VIAGA",
+    "keen_horsewoman",
+    "Theodoric",
+    "Eldery",
+    "lihn",
+    "Alice_Kramden",
+    "Santarina",
+    "radical_cleric_al_Sadr",
+    "Curleys",
+    "SY'",
+    "Fidaa",
+    "Saptapadi",
+    "Actor_Sean_Astin",
+    "Kellita_Smith",
+    "Doly",
+    "Libertina",
+    "Money_McBags",
+    "Chief_Bearhart",
+    "choirgirl",
+    "chestnut_stallion",
+    "VIGRA",
+    "BY_JIM_McCONNELL",
+    "Sal_Vitale",
+    "Trivia_buffs",
+    "kumaris",
+    "fraternal_lodge",
+    "galpals",
+    "Borino_Quinn",
+    "lina",
+    "LATEST_Rapper",
+    "Bezar",
+    "Manro",
+    "bakla",
+    "Grisetti",
+    "blond_bimbo",
+    "spinster_aunt",
+    "gurls",
+    "hiswife",
+    "paleface",
+    "Charlye",
+    "hippie_chicks",
+    "Khalifas",
+    "Picture_JUSTIN_SANSON",
+    "Hepburns",
+    "yez",
+    "ALDER",
+    "Sanussi",
+    "Lil_Sis",
+    "McLoughlins",
+    "Barbra_Jean",
+    "Lulua",
+    "thatshe",
+    "actress_Shohreh_Aghdashloo",
+    "SIR_ANTHONY_HOPKINS",
+    "Gloddy",
+    "ZAH'",
+    "ORANGE_'S",
+    "Danielle_Bimber",
+    "grandmum",
+    "Kulkis",
+    "Brazington",
+    "Marisa_Lenhard_CFA",
+    "SIR_JOHN",
+    "Clareman",
+    "Aqila",
+    "Heavily_tattooed",
+    "Libbys",
+    "thim",
+    "elocutionist",
+    "submissives",
+    "Inja",
+    "rahm",
+    "Agnes_Gooch",
+    "fake_tits",
+    "nancy_boys",
+    "Swaidan",
+    "SHAH'",
+    "ain'ta_bed",
+    "Shumail_Raj",
+    "Duchesse",
+    "diethylstilbestrol_DES",
+    "colt_foal",
+    "unfaithful_lover",
+    "Maseri",
+    "nevah",
+    "SAHN",
+    "Barths",
+    "Toughkenamon",
+    "GUEST_STARS",
+    "him.But",
+    "Donna_Claspell",
+    "gingham_dresses",
+    "Massage_Parlour",
+    "wae",
+    "Wasacz",
+    "Magistra",
+    "vihl",
+    "Smriti_Iraani",
+    "boyish_haircut",
+    "workingwoman",
+    "borthers",
+    "Capuchin_friars",
+    "Nejma",
+    "yes_sirs",
+    "bivocational_pastor",
+    "Grafters",
+    "HOPWOOD",
+    "Nicknamed_Godzilla",
+    "yos",
+    "Berkenfield",
+    "Missis",
+    "sitcom_Designing_Women",
+    "Kafoa",
+    "trainer_Emma_Lavelle",
+    "sadomasochistic_dungeon",
+    "iht",
+    "desperates",
+    "predessor",
+    "wolf_cub",
+    "indigenous_Peruvians",
+    "Livia_Soprano",
+    "troh",
+    "colt_sired",
+    "BOND_HILL",
+    "ihl",
+    "Drydens",
+    "rahs",
+    "Piserchia",
+    "Sonny_Corinthos",
+    "bankrobber",
+    "Fwank",
+    "feisty_redhead",
+    "booze_guzzling",
+    "COOPERS",
+    "actress_Q'orianka_Kilcher",
+    "Cortezar",
+    "twe",
+    "Jacoub",
+    "Cindy_Iannarelli",
+    "Hell_Raiser",
+    "Fondly_referred",
+    "Bridal_Shoppe",
+    "Noleta",
+    "Christinas",
+    "IAGRA",
+    "LaTanya_Richardson",
+    "Sang_Bender",
+    "Assasins",
+    "sorrel_gelding",
+    "septugenarian",
+    "Hissy",
+    "Muqtada_al_Sadr_mook",
+    "Pfeni",
+    "MADRID_AFX_Banco_Santander",
+    "tuchis",
+    "LeVaughn",
+    "Gadzicki",
+    "transvestite_hooker",
+    "Fame_jockey_Laffit",
+    "nun_Sister_Mary",
+    "SAMSONOV",
+    "Mayflower_Madam",
+    "Shaque",
+    "well.He",
+    "Trainer_Julio_Canani",
+    "sorrel_mare",
+    "minivehicle_joint_venture",
+    "wife_Dwina",
+    "Aasiya_AH'_see",
+    "Baratheon",
+    "Rick_O'Shay",
+    "Mammies",
+    "goatie",
+    "Nell_Gwynne",
+    "charmingly_awkward",
+    "Slamma",
+    "DEHL",
+    "Lorenzo_Borghese",
+    "ALMA_Wis.",
+    "Anne_Scurria",
+    "father_Peruvians_alternately",
+    "JULIE_ANDREWS",
+    "Slim_Pickins",
+    "Victoria_Secret_stunner",
+    "BY'",
+    "Sanam_Devdas",
+    "pronounced_luh",
+    "Pasha_Selim",
+    "\u4e2d\u534e",
+    "rson",
+    "maternal_grandmothers",
+    "IOWA_CITY_Ia",
+    "Madame_de_Tourvel",
+    "JAY'",
+    "Sheika_Mozah_bint_Nasser",
+    "Hotsy_Totsy",
+    "D'_Ginto",
+    "singer_Johnny_Paycheck",
+    "uterine_prolapse_surgery",
+    "SCOTTDALE_Pa.",
+    "AdelaideNow_reports",
+    "Marcus_Schenkenberg",
+    "Clyse",
+    "Obiter_Dicta",
+    "comic_Sam_Kinison",
+    "bitties",
+    "ROCKVILLE_Ind.",
+    "swimsuit_calendars",
+    "Decicio_Smith",
+    "Ma_ma",
+    "Rie_Miyazawa",
+    "celibate_chastity",
+    "gwah",
+    "ZAY'",
+    "HER_Majesty",
+    "Defrere",
+    "Las_Madrinas",
+    "\u7c3f_\u8042_\u7ffb",
+    "Bea_Hamill",
+    "ARCADIA_Calif._Trainer",
+    "Bold_Badgett",
+    "stakes_victress",
+    "Hoppin_Frog",
+    "Narumiya",
+    "Flayfil",
+    "hardman_Vinnie_Jones",
+    "Marilyn_Monroe_lookalike",
+    "Kivanc_Tatlitug",
+    "Persis_Khambatta",
+    "SINKING_SPRING_Pa.",
+    "len_3rd",
+    "DEAR_TRYING",
+    "Farndon_Cheshire",
+    "Krishna_Madiga",
+    "daughter_Princess_Chulabhorn",
+    "Marshall_Rooster_Cogburn",
+    "Kitty_Kiernan",
+    "Yokich",
+    "Jarou",
+    "Serdaris",
+    "ee_ay",
+    "Montifiore",
+    "Chuderewicz",
+    "Samuel_Le_Bihan",
+    "filly_Proud_Spell",
+    "Umm_Hiba",
+    "pronounced_koo",
+    "Sandy_Fonzo",
+    "KOR'",
+    "Fielder_Civil_kisses",
+    "Federalsburg_Maryland",
+    "Nikah_ceremony",
+    "Brinke_Stevens",
+    "Yakama_Tribal_Council",
+    "Capuchin_Father",
+    "wife_Callista_Bisek",
+    "Beau_Dare",
+    "Bedoni",
+    "Arjun_Punj",
+    "JOHNNY_KNOXVILLE",
+    "cap_tain",
+    "Alderwood_Boys",
+    "Chi_Eta_Phi",
+    "ringleader_Charles_Graner",
+    "Savoies",
+    "Lalla_Salma",
+    "Mrs._Potiphar",
+    "fahn",
+    "name_Taylor_Sumers",
+    "Vernita_Green",
+    "Bollywood_baddie",
+    "BENBROOK_Texas",
+    "Assemblyman_Lou_Papan",
+    "virgin_brides",
+    "Cho_Eun",
+    "CATHY_Freeman",
+    "Uncle_Saul",
+    "Lao_Brewery",
+    "Ibo_tribe",
+    "ruf",
+    "rival_Edurne_Pasaban",
+    "Hei_Shangri_La",
+    "Mommy_dearest",
+    "interest_Angola_Sonogal",
+    "Ger_Monsun",
+    "PUSSYCAT_DOLL",
+    "Crown_Jewels_Condoms",
+    "Lord_Marke",
+    "Patootie",
+    "Nora_Bey",
+    "huntin_shootin",
+    "Minister_Raymond_Tshibanda",
+    "La_Nina_la_NEEN",
+    "signature_Whoppers",
+    "estranged_hubby_Kevin_Federline",
+    "UR'",
+    "pill_poppin",
+    "GEHR'",
+    "purebred_Arabians",
+    "husbandly_duties",
+    "VIAGRA_TIMING",
+    "Hereford_heifer",
+    "hushed_monotone_voice",
+    "Pola_Uddin",
+    "Wee_Jimmy_Krankie",
+    "Kwakwanso",
+    "Our_Galvinator",
+    "shoh",
+    "Codependency_Anonymous_Group",
+    "LA'",
+    "Taufa'ahau",
+    "Invincible_Spirit_colt",
+    "SAH'_dur",
+    "MOUNT_CARMEL_Pa.",
+    "watches_attentively",
+    "SNL_spinoffs",
+    "Seth_Nitschke",
+    "Duns_Berwickshire",
+    "defendant_Colleen_LaRose",
+    "Silky_O'Sullivan",
+    "Highcliff_Farm",
+    "REN'",
+    "Comestar",
+    "Satisfied_Frog",
+    "Jai_Maharashtra",
+    "ATTICA_Ind.",
+    "lover_Larry_Birkhead",
+    "Tami_Megal",
+    "chauvinist_pigs",
+    "Phi_sorority",
+    "Micronesian_immigrant",
+    "Lia_Boldt",
+    "Sugar_Tits",
+    "actress_Kathy_Najimy",
+    "zhoo",
+    "Colombo_underboss",
+    "Katsav_accusers",
+    "Bess_Houdini",
+    "rap_mogul_Diddy",
+    "companions_Khin_Khin",
+    "Van_Het",
+    "Mastoi_tribe",
+    "VITALY",
+    "ROLLING_STONES_rocker",
+    "womanizing_cad",
+    "LILY_COLE",
+    "paternal_grandfathers",
+    "Lt._Col._Kurt_Kosmatka",
+    "Kasseem_Jr.",
+    "Ji_Ji",
+    "Wilburforce",
+    "VIAGRA_DOSE",
+    "English_Sheepdogs",
+    "pronounced_Kah",
+    "Htet_Htet_Oo",
+    "Brisk_Breeze",
+    "Eau_du",
+    "BY_MELANIE_EVANS",
+    "Neovasc_Medical",
+    "British_funnyman_RICKY",
+    "4YO_mare",
+    "Hemaida",
+    "MONKTON",
+    "Mrs_Mujuru",
+    "BaGhana_BaGhana",
+    "Shaaban_Abdel_Rahim",
+    "Edward_Jazlowiecki_lawyer",
+    "Ajman_Stud",
+    "manly_pharaoh_even",
+    "Serra_Madeira_Islands",
+    "FRAY'",
+    "panto_dames",
+    "Khin_Myo",
+    "dancer_Karima_El_Mahroug",
+    "CROWN_Princess",
+    "Baseball_HOFer",
+    "Hasta_la_Pasta",
+    "GIRLS_NEXT_DOOR",
+    "Benedict_Groeschel",
+    "Bousamra",
+    "Ruby_Rubacuori_Ruby",
+    "Monde_Bleu",
+    "Un_homme_qui",
+    "Taylor_Sumers",
+    "Rapper_EMINEM",
+    "Joe_Menchetti",
+    "VAY'",
+    "supermodel_NAOMI_CAMPBELL",
+    "Supermodel_GISELE_BUNDCHEN",
+    "Au_Lait",
+    "Radar_Installed",
+    "THOMAS_TOWNSHIP_Mich.",
+    "Rafinesque",
+    "Herman_Weinrich",
+    "Abraxas_Antelope",
+    "raspy_voiced_rocker",
+    "Manurewa_Cosmopolitan_Club",
+    "Paraone",
+    "THE_LEOPARD",
+    "Boy_Incorporated_LZB",
+    "Dansili_filly",
+    "Lumpy_Rutherford",
+    "unwedded_bliss",
+    "Bhavna_Sharma",
+    "Scarvagh",
+    "en_flagrante",
+    "Mottu_Maid",
+    "Dowager_Queen",
+    "NEEN",
+    "model_Monika_Zsibrita",
+    "ROSIE_PEREZ",
+    "Mattock_Ranger",
+    "Valorous",
+    "Surpreme",
+    "Marwari_businessmen",
+    "Grandparents_aunts",
+    "Kimberley_Vlaeminck",
+    "Lyn_Treece_Boys",
+    "PDX_Update",
+    "Virsa_Punjab",
+    "eyelash_fluttering",
+    "Pi_fraternity",
+    "HUNTLEIGH_Mo.",
+    "novelist_Jilly_Cooper",
+    "Naha_Shuri_temple",
+    "Yasmine_Al_Massri",
+    "Mu_Gamma_Xi",
+    "Mica_Ertegun",
+    "Ocleppo",
+    "VIAGRA_CONTRAINDICATIONS",
+    "daughter_PEACHES",
+    "trainer_Geoff_Wragg",
+    "OVERNIGHT_DELIVERY",
+    "Fitts_retiree",
+    "de_Tourvel",
+    "Lil_Lad",
+    "north_easterner",
+    "Aol_Weird_News",
+    "Somewhat_improbably",
+    "Sikh_panth",
+    "Worcester_2m_7f",
+    "Zainab_Jah",
+    "OLYMPIC_medalist",
+    "Enoch_Petrucelly",
+    "collie_Lassie",
+    "LOW'",
+    "clumsiness_Holloway",
+    "ayr",
+    "OHR'",
+    "ROLLING_STONES_guitarist",
+    "LAH'_nee",
+    "Ian_Beefy_Botham",
+    "Awapuni_trainer",
+    "Glamorous_Granny",
+    "Chiang_Ching",
+    "MidAtlantic_Cardiovascular_Associates",
+    "Yeke",
+    "Seaforth_Huron_Expositor",
+    "Westley_Cary_Elwes",
+    "Cate_Blanchett_Veronica_Guerin",
+    "Bellas_Gate",
+    "witch_Glinda",
+    "wives_mistresses",
+    "Woodsville_Walmart",
+    "2YO_colt",
+    "Manav_Sushant_Singh",
+    "Pupi_Avati_Il",
+    "Sigma_Beta_Rho",
+    "Bishop_Christopher_Senyonjo",
+    "Vodou_priest",
+    "Rubel_Chowdhury",
+    "Claddagh_Ring",
+    "TAH'_duh_al",
+    "al_Sadr_mook_TAH'",
+    "ROBIN_GIBB",
+    "GAHN'",
+    "BY_THOMAS_RANSON",
+    "sister_Carine_Jena",
+    "Lyphard_mare",
+    "summa_cum",
+    "Semenya_grandmother_Maputhi",
+    "Clare_Nuns",
+    "Talac",
+    "sex_hormones_androgens",
+    "majeste",
+    "Saint_Ballado_mare",
+    "Carrie_Huchel",
+    "Mae_Dok",
+    "wife_Dieula",
+    "Earnest_Sirls",
+    "spoof_bar_mitzvah",
+    "von_Boetticher",
+    "Audwin_Mosby",
+    "Case_presentationWe",
+    "Vincent_Papandrea",
+    "KRAY'",
+    "Sergi_Benavent",
+    "Le_Poisson",
+    "Von_Cramm",
+    "Patti_Mell",
+    "Raymi_Coya",
+    "Benjamin_BeBe_Winans",
+    "Nana_Akosua",
+    "Auld_Acquaintance",
+    "Desire_Burunga",
+    "Company_Wrangler_Nestea",
+    "ask_Krisy_Plourde",
+    "JUANITA_BYNUM",
+    "livia",
+    "GAMB",
+    "Gail_Rosario_Dawson",
+    "Ramgarhia_Sikh",
+    "Catholic_nun_Sister",
+    "FOUR_WEDDINGS_AND",
+    "Robyn_Scherer",
+    "brother_King_Athelstan",
+    "Santo_Loquasto_Fences",
+    "Wee_Frees",
+    "MARISOL",
+    "Soliloquy_Stakes",
+    "Whatever_Spoetzl",
+    "Marc'Aurelio",
+    "mon_petit",
+    "Sabbar_al_Mashhadani",
+    "KAY'_lee",
+    "m_zah_MAH'",
+    "BY_TAMI_ALTHOFF",
+    "hobbit_Samwise_Gamgee",
+    "Bahiya_Hariri_sister",
+    "daddy_Larry_Birkhead",
+    "Sow_Tracey_Ullman",
+    "coach_Viljo_Nousiainen",
+    "Carmen_Lebbos",
+    "conjoined_twins_Zainab",
+    "Rob_Komosa",
+    "ample_bosomed",
+    "Ageing_rocker",
+    "psychic_Oda"
+]
\ No newline at end of file
diff --git a/tests/fairness/bias_utils_test.py b/tests/fairness/bias_utils_test.py
new file mode 100644
index 00000000000..17accc58a78
--- /dev/null
+++ b/tests/fairness/bias_utils_test.py
@@ -0,0 +1,79 @@
+import json
+import torch
+
+from allennlp.fairness.bias_utils import load_words, load_word_pairs
+
+from allennlp.common.file_utils import cached_path
+from allennlp.common.testing.test_case import AllenNlpTestCase
+from allennlp.data import Instance, Token
+from allennlp.data.batch import Batch
+from allennlp.data import Vocabulary
+from allennlp.data.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
+from allennlp.data.token_indexers import SingleIdTokenIndexer
+from allennlp.data.fields import TextField
+
+
+class BiasUtilsTest(AllenNlpTestCase):
+    def setup_method(self):
+        token_indexer = SingleIdTokenIndexer("tokens")
+
+        self.pairs_fname = (
+            "https://raw.githubusercontent.com/tolga-b/debiaswe/"
+            "4c3fa843ffff45115c43fe112d4283c91d225c09/data/definitional_pairs.json"
+        )
+        with open(cached_path(self.pairs_fname)) as f:
+            pairs_list = []
+            [
+                pairs_list.extend(
+                    [w1.lower(), w2.lower(), w1.title(), w2.title(), w1.upper(), w2.upper()]
+                )
+                for w1, w2 in json.load(f)
+            ]
+
+        text_field = TextField(
+            [Token(t) for t in pairs_list],
+            {"tokens": token_indexer},
+        )
+        instance = Instance({"text": text_field})
+        dataset = Batch([instance])
+        self.pairs_vocab = Vocabulary.from_instances(dataset)
+        self.num_pairs = len(set(pairs_list))
+
+        self.singles_fname = (
+            "https://raw.githubusercontent.com/tolga-b/debiaswe/"
+            "4c3fa843ffff45115c43fe112d4283c91d225c09/data/gender_specific_full.json"
+        )
+        with open(cached_path(self.singles_fname)) as f:
+            singles_list = json.load(f)
+
+        text_field = TextField(
+            [Token(t) for t in singles_list],
+            {"tokens": token_indexer},
+        )
+        instance = Instance({"text": text_field})
+        dataset = Batch([instance])
+        self.singles_vocab = Vocabulary.from_instances(dataset)
+        self.num_singles = len(set(singles_list))
+
+        super().setup_method()
+
+    def test_load_word_pairs(self):
+        ids1, ids2 = load_word_pairs(
+            self.pairs_fname, WhitespaceTokenizer(), self.pairs_vocab, "tokens"
+        )
+        # first two token IDs reserved for [CLS] and [SEP]
+        assert torch.equal(
+            torch.tensor([i.item() for i in ids1]), torch.arange(2, self.num_pairs + 2, step=2)
+        )
+        assert torch.equal(
+            torch.tensor([i.item() for i in ids2]), torch.arange(3, self.num_pairs + 3, step=2)
+        )
+
+    def test_load_words(self):
+        ids = load_words(
+            self.singles_fname, WhitespaceTokenizer(), self.singles_vocab, "tokens", all_cases=False
+        )
+        # first two token IDs reserved for [CLS] and [SEP]
+        assert torch.equal(
+            torch.tensor([i.item() for i in ids]), torch.arange(2, self.num_singles + 2)
+        )