From 6acbe7d0ed7c82329e00dc460a12313abbab7470 Mon Sep 17 00:00:00 2001
From: Samuel Hoffman <hoffman.sc@gmail.com>
Date: Tue, 20 Feb 2024 14:26:01 -0500
Subject: [PATCH] change source for lawschool gpa dataset

* remove tempeh requirement

Signed-off-by: Samuel Hoffman <hoffman.sc@gmail.com>
---
 aif360/datasets/law_school_gpa_dataset.py     |  32 +-
 aif360/sklearn/datasets/__init__.py           |   2 +-
 aif360/sklearn/datasets/lawschool_dataset.py  |  89 ++++
 aif360/sklearn/datasets/tempeh_datasets.py    |  59 ---
 ..._search_reduction_regression_sklearn.ipynb | 425 +++++-------------
 requirements.txt                              |   1 -
 setup.py                                      |   1 -
 tests/sklearn/test_datasets.py                |   4 +-
 8 files changed, 215 insertions(+), 398 deletions(-)
 create mode 100644 aif360/sklearn/datasets/lawschool_dataset.py
 delete mode 100644 aif360/sklearn/datasets/tempeh_datasets.py

diff --git a/aif360/datasets/law_school_gpa_dataset.py b/aif360/datasets/law_school_gpa_dataset.py
index eac3656c..0d5c992a 100644
--- a/aif360/datasets/law_school_gpa_dataset.py
+++ b/aif360/datasets/law_school_gpa_dataset.py
@@ -1,36 +1,24 @@
-import os
 import pandas as pd
+from sklearn.model_selection import train_test_split
 from aif360.datasets import RegressionDataset
-try:
-    import tempeh.configurations as tc
-except ImportError as error:
-    from logging import warning
-    warning("{}: LawSchoolGPADataset will be unavailable. To install, run:\n"
-            "pip install 'aif360[LawSchoolGPA]'".format(error))
+from aif360.sklearn.datasets.lawschool_dataset import LSAC_URL
 
 class LawSchoolGPADataset(RegressionDataset):
-    """Law School GPA dataset.
-
-    See https://github.com/microsoft/tempeh for details.
-    """
+    """Law School GPA dataset."""
 
     def __init__(self, dep_var_name='zfygpa',
-                 protected_attribute_names=['race'],
-                 privileged_classes=[['white']],
+                 protected_attribute_names=['race', 'gender'],
+                 privileged_classes=[['white'], ['male']],
                  instance_weights_name=None,
                  categorical_features=[],
                  na_values=[], custom_preprocessing=None,
                  metadata=None):
         """See :obj:`RegressionDataset` for a description of the arguments."""
-        dataset = tc.datasets["lawschool_gpa"]()
-        X_train,X_test = dataset.get_X(format=pd.DataFrame)
-        y_train, y_test = dataset.get_y(format=pd.Series)
-        A_train, A_test = dataset.get_sensitive_features(name='race',
-                                                         format=pd.Series)
-        all_train = pd.concat([X_train, y_train, A_train], axis=1)
-        all_test = pd.concat([X_test, y_test, A_test], axis=1)
-
-        df = pd.concat([all_train, all_test], axis=0)
+        df = pd.read_sas(LSAC_URL, encoding="utf-8")
+        df.race = df.race1.where(df.race1.isin(['black', 'white']))
+        df.gender = df.gender.fillna("female")
+        df = df[["race", "gender", "lsat", "ugpa", "zfygpa"]].dropna()
+        df = pd.concat(train_test_split(df, test_size=0.33, random_state=123))
 
         super(LawSchoolGPADataset, self).__init__(df=df,
             dep_var_name=dep_var_name,
diff --git a/aif360/sklearn/datasets/__init__.py b/aif360/sklearn/datasets/__init__.py
index 525b1431..8c0f1049 100644
--- a/aif360/sklearn/datasets/__init__.py
+++ b/aif360/sklearn/datasets/__init__.py
@@ -12,4 +12,4 @@
 from aif360.sklearn.datasets.openml_datasets import fetch_adult, fetch_german, fetch_bank
 from aif360.sklearn.datasets.compas_dataset import fetch_compas
 from aif360.sklearn.datasets.meps_datasets import fetch_meps
-from aif360.sklearn.datasets.tempeh_datasets import fetch_lawschool_gpa
+from aif360.sklearn.datasets.lawschool_dataset import fetch_lawschool_gpa
diff --git a/aif360/sklearn/datasets/lawschool_dataset.py b/aif360/sklearn/datasets/lawschool_dataset.py
new file mode 100644
index 00000000..66a11bfa
--- /dev/null
+++ b/aif360/sklearn/datasets/lawschool_dataset.py
@@ -0,0 +1,89 @@
+from io import BytesIO
+import os
+import urllib
+
+import pandas as pd
+from sklearn.model_selection import train_test_split
+
+from aif360.sklearn.datasets.utils import standardize_dataset, Dataset
+
+
+# cache location
+DATA_HOME_DEFAULT = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                 '..', 'data', 'raw')
+LSAC_URL = "https://github.com/jkomiyama/fairregresion/raw/master/dataset/law/lsac.sas7bdat"
+
+def fetch_lawschool_gpa(subset="all", *, data_home=None, cache=True,
+                        binary_race=True, fillna_gender="female",
+                        usecols=["race", "gender", "lsat", "ugpa"],
+                        dropcols=None, numeric_only=False, dropna=True):
+    """Load the Law School GPA dataset.
+
+    Optionally binarizes 'race' to 'white' (privileged) or 'black' (unprivileged).
+    The other protected attribute is gender ('male' is privileged and 'female'
+    is unprivileged). The outcome variable is standardized first year GPA
+    ('zfygpa'). Note: this is a continuous variable, i.e., a regression task.
+
+    Args:
+        subset ({'train', 'test', or 'all'}, optional): Select the dataset to
+            load: 'train' for the training set, 'test' for the test set, 'all'
+            for both.
+        data_home (string, optional): Specify another download and cache folder
+            for the datasets. By default all AIF360 datasets are stored in
+            'aif360/sklearn/data/raw' subfolders.
+        cache (bool): Whether to cache downloaded datasets.
+        binary_race (bool, optional): Filter only white and black students.
+        fillna_gender (str or None, optional): Fill NA values for gender with
+            this value. If `None`, leave as NA. Note: this is used for backward-
+            compatibility with tempeh and may be dropped in later versions.
+        usecols (single label or list-like, optional): Feature column(s) to
+            keep. All others are dropped.
+        dropcols (single label or list-like, optional): Feature column(s) to
+            drop.
+        numeric_only (bool): Drop all non-numeric feature columns.
+        dropna (bool): Drop rows with NAs.
+
+    Returns:
+        namedtuple: Tuple containing X, y, and sample_weights for the Law School
+        GPA dataset accessible by index or name.
+    """
+    if subset not in {'train', 'test', 'all'}:
+        raise ValueError("subset must be either 'train', 'test', or 'all'; "
+                         "cannot be {}".format(subset))
+
+    cache_path = os.path.join(data_home or DATA_HOME_DEFAULT,
+                              os.path.basename(LSAC_URL))
+    if cache and os.path.isfile(cache_path):
+        df = pd.read_sas(cache_path, encoding="utf-8")
+    else:
+        data = urllib.request.urlopen(LSAC_URL).read()
+        if cache:
+            os.makedirs(os.path.dirname (cache_path), exist_ok=True)
+            with open(cache_path, 'wb') as f:
+                f.write(data)
+        df = pd.read_sas(BytesIO(data), format="sas7bdat", encoding="utf-8")
+
+    df.race = df.race1.astype('category')
+    if binary_race:
+        df.race = df.race.cat.set_categories(['black', 'white'], ordered=True)
+
+    # for backwards-compatibility with tempeh
+    if fillna_gender is not None:
+        df.gender = df.gender.fillna(fillna_gender)
+    df.gender = df.gender.astype('category').cat.set_categories(
+        ['female', 'male'], ordered=True)
+
+    ds = standardize_dataset(df, prot_attr=['race', 'gender'], target='zfygpa',
+                               usecols=usecols, dropcols=dropcols,
+                               numeric_only=numeric_only, dropna=dropna)
+
+    # for backwards-compatibility with tempeh
+    train_X, test_X, train_y, test_y = train_test_split(*ds, test_size=0.33, random_state=123)
+    if subset == "train":
+        return Dataset(train_X, train_y)
+    elif subset == "test":
+        return Dataset(test_X, test_y)
+    else:
+        X = pd.concat([train_X, test_X], axis=0)
+        y = pd.concat([train_y, test_y], axis=0)
+        return Dataset(X, y)
diff --git a/aif360/sklearn/datasets/tempeh_datasets.py b/aif360/sklearn/datasets/tempeh_datasets.py
deleted file mode 100644
index 7adae048..00000000
--- a/aif360/sklearn/datasets/tempeh_datasets.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import pandas as pd
-try:
-    import tempeh.configurations as tc
-except ImportError as error:
-    from logging import warning
-    warning("{}: fetch_lawschool_gpa will be unavailable. To install, run:\n"
-            "pip install 'aif360[LawSchoolGPA]'".format(error))
-
-from aif360.sklearn.datasets.utils import standardize_dataset
-
-
-def fetch_lawschool_gpa(subset="all", *, usecols=None, dropcols=None,
-                        numeric_only=False, dropna=True):
-    """Load the Law School GPA dataset
-
-    Note:
-        By default, the data is downloaded from tempeh. See
-        https://github.com/microsoft/tempeh for details.
-
-    Args:
-        subset ({'train', 'test', or 'all'}, optional): Select the dataset to
-            load: 'train' for the training set, 'test' for the test set, 'all'
-            for both.
-        usecols (single label or list-like, optional): Feature column(s) to
-            keep. All others are dropped.
-        dropcols (single label or list-like, optional): Feature column(s) to
-            drop.
-        numeric_only (bool): Drop all non-numeric feature columns.
-        dropna (bool): Drop rows with NAs. FIXME: NAs already dropped by tempeh
-
-    Returns:
-        namedtuple: Tuple containing X, y, and sample_weights for the Law School
-        GPA dataset accessible by index or name.
-    """
-    if subset not in {'train', 'test', 'all'}:
-        raise ValueError("subset must be either 'train', 'test', or 'all'; "
-                         "cannot be {}".format(subset))
-
-    dataset = tc.datasets["lawschool_gpa"]()
-    X_train, X_test = dataset.get_X(format=pd.DataFrame)
-    y_train, y_test = dataset.get_y(format=pd.Series)
-    A_train, A_test = dataset.get_sensitive_features(name='race',
-                                                     format=pd.Series)
-    all_train = pd.concat([X_train, y_train, A_train], axis=1)
-    all_test = pd.concat([X_test, y_test, A_test], axis=1)
-
-    if subset == "train":
-        df = all_train
-    elif subset == "test":
-        df = all_test
-    else:
-        df = pd.concat([all_train, all_test], axis=0)
-
-    df.race = df.race.astype('category').cat.set_categories(
-        ['black', 'white'], ordered=True)
-
-    return standardize_dataset(df, prot_attr='race', target='zfygpa',
-                               usecols=usecols, dropcols=dropcols,
-                               numeric_only=numeric_only, dropna=dropna)
diff --git a/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb b/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb
index d9fec458..ea47f6d5 100644
--- a/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb
+++ b/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb
@@ -2,12 +2,12 @@
   "cells": [
     {
       "cell_type": "markdown",
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Trusted-AI/AIF360/blob/master/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb)"
-      ],
       "metadata": {
         "id": "fgfyb_2c9WL4"
-      }
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Trusted-AI/AIF360/blob/master/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb)"
+      ]
     },
     {
       "cell_type": "markdown",
@@ -23,73 +23,24 @@
     },
     {
       "cell_type": "code",
-      "source": [
-        "#Install aif360\n",
-        "#Install Reductions from Fairlearn\n",
-        "#Install Low School GPA dataset\n",
-        "!pip install aif360[Reductions,LawSchoolGPA]"
-      ],
+      "execution_count": null,
       "metadata": {
-        "id": "4Ad7nC129i8f",
-        "outputId": "0ce37c29-d5a5-4682-968e-bb33cea9de19",
         "colab": {
           "base_uri": "https://localhost:8080/"
-        }
+        },
+        "id": "4Ad7nC129i8f",
+        "outputId": "0ce37c29-d5a5-4682-968e-bb33cea9de19"
       },
-      "execution_count": 10,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
-            "Requirement already satisfied: aif360[LawSchoolGPA,Reductions] in /usr/local/lib/python3.7/dist-packages (0.5.0)\n",
-            "Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.7.3)\n",
-            "Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.0.2)\n",
-            "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.21.6)\n",
-            "Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.3.5)\n",
-            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (3.2.2)\n",
-            "Requirement already satisfied: fairlearn~=0.7 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (0.7.0)\n",
-            "Requirement already satisfied: tempeh in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (0.1.12)\n",
-            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (2022.2.1)\n",
-            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (2.8.2)\n",
-            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (1.15.0)\n",
-            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=1.0->aif360[LawSchoolGPA,Reductions]) (1.1.0)\n",
-            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=1.0->aif360[LawSchoolGPA,Reductions]) (3.1.0)\n",
-            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (1.4.4)\n",
-            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (3.0.9)\n",
-            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (0.11.0)\n",
-            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->aif360[LawSchoolGPA,Reductions]) (4.1.1)\n",
-            "Requirement already satisfied: pytest in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (3.6.4)\n",
-            "Requirement already satisfied: memory-profiler in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (0.60.0)\n",
-            "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (2.23.0)\n",
-            "Requirement already satisfied: shap in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (0.41.0)\n",
-            "Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from memory-profiler->tempeh->aif360[LawSchoolGPA,Reductions]) (5.4.8)\n",
-            "Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (1.11.0)\n",
-            "Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (8.14.0)\n",
-            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (57.4.0)\n",
-            "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (22.1.0)\n",
-            "Requirement already satisfied: pluggy<0.8,>=0.5 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (0.7.1)\n",
-            "Requirement already satisfied: atomicwrites>=1.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (1.4.1)\n",
-            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (1.24.3)\n",
-            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (3.0.4)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (2022.6.15)\n",
-            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (2.10)\n",
-            "Requirement already satisfied: numba in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.56.2)\n",
-            "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (1.5.0)\n",
-            "Requirement already satisfied: slicer==0.0.7 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.0.7)\n",
-            "Requirement already satisfied: tqdm>4.25.0 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (4.64.1)\n",
-            "Requirement already satisfied: packaging>20.9 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (21.3)\n",
-            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.7/dist-packages (from numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.39.1)\n",
-            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (4.12.0)\n",
-            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (3.8.1)\n"
-          ]
-        }
+      "outputs": [],
+      "source": [
+        "#Install aif360\n",
+        "#Install Reductions from Fairlearn\n",
+        "!pip install aif360[Reductions]"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": null,
       "metadata": {
         "id": "sivw3vma71DE"
       },
@@ -130,33 +81,20 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": 2,
       "metadata": {
-        "id": "7Xqg6Yn771DG",
-        "outputId": "00fda049-0c6a-4c86-9ee8-f49b9e2d5161",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 238
-        }
+        },
+        "id": "7Xqg6Yn771DG",
+        "outputId": "00fda049-0c6a-4c86-9ee8-f49b9e2d5161"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "      lsat  ugpa  race\n",
-              "race                  \n",
-              "0     38.0   3.3     0\n",
-              "1     34.0   4.0     1\n",
-              "1     34.0   3.9     1\n",
-              "1     45.0   3.3     1\n",
-              "1     39.0   2.5     1"
-            ],
             "text/html": [
-              "\n",
-              "  <div id=\"df-5dc8ae11-154f-4781-a43e-7b27b819c810\">\n",
-              "    <div class=\"colab-df-container\">\n",
-              "      <div>\n",
+              "<div>\n",
               "<style scoped>\n",
               "    .dataframe tbody tr th:only-of-type {\n",
               "        vertical-align: middle;\n",
@@ -174,12 +112,14 @@
               "  <thead>\n",
               "    <tr style=\"text-align: right;\">\n",
               "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>race</th>\n",
               "      <th>lsat</th>\n",
               "      <th>ugpa</th>\n",
-              "      <th>race</th>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>race</th>\n",
+              "      <th>gender</th>\n",
               "      <th></th>\n",
               "      <th></th>\n",
               "      <th></th>\n",
@@ -187,123 +127,59 @@
               "  </thead>\n",
               "  <tbody>\n",
               "    <tr>\n",
-              "      <th>0</th>\n",
+              "      <th>0.0</th>\n",
+              "      <th>1</th>\n",
+              "      <td>0.0</td>\n",
               "      <td>38.0</td>\n",
               "      <td>3.3</td>\n",
-              "      <td>0</td>\n",
               "    </tr>\n",
               "    <tr>\n",
-              "      <th>1</th>\n",
+              "      <th rowspan=\"4\" valign=\"top\">1.0</th>\n",
+              "      <th>0</th>\n",
+              "      <td>1.0</td>\n",
               "      <td>34.0</td>\n",
               "      <td>4.0</td>\n",
-              "      <td>1</td>\n",
               "    </tr>\n",
               "    <tr>\n",
-              "      <th>1</th>\n",
+              "      <th>0</th>\n",
+              "      <td>1.0</td>\n",
               "      <td>34.0</td>\n",
               "      <td>3.9</td>\n",
-              "      <td>1</td>\n",
               "    </tr>\n",
               "    <tr>\n",
-              "      <th>1</th>\n",
+              "      <th>0</th>\n",
+              "      <td>1.0</td>\n",
               "      <td>45.0</td>\n",
               "      <td>3.3</td>\n",
-              "      <td>1</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
+              "      <td>1.0</td>\n",
               "      <td>39.0</td>\n",
               "      <td>2.5</td>\n",
-              "      <td>1</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
-              "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5dc8ae11-154f-4781-a43e-7b27b819c810')\"\n",
-              "              title=\"Convert this dataframe to an interactive table.\"\n",
-              "              style=\"display:none;\">\n",
-              "        \n",
-              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-              "       width=\"24px\">\n",
-              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
-              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
-              "  </svg>\n",
-              "      </button>\n",
-              "      \n",
-              "  <style>\n",
-              "    .colab-df-container {\n",
-              "      display:flex;\n",
-              "      flex-wrap:wrap;\n",
-              "      gap: 12px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert {\n",
-              "      background-color: #E8F0FE;\n",
-              "      border: none;\n",
-              "      border-radius: 50%;\n",
-              "      cursor: pointer;\n",
-              "      display: none;\n",
-              "      fill: #1967D2;\n",
-              "      height: 32px;\n",
-              "      padding: 0 0 0 0;\n",
-              "      width: 32px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert:hover {\n",
-              "      background-color: #E2EBFA;\n",
-              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-              "      fill: #174EA6;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert {\n",
-              "      background-color: #3B4455;\n",
-              "      fill: #D2E3FC;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert:hover {\n",
-              "      background-color: #434B5C;\n",
-              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-              "      fill: #FFFFFF;\n",
-              "    }\n",
-              "  </style>\n",
-              "\n",
-              "      <script>\n",
-              "        const buttonEl =\n",
-              "          document.querySelector('#df-5dc8ae11-154f-4781-a43e-7b27b819c810 button.colab-df-convert');\n",
-              "        buttonEl.style.display =\n",
-              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
-              "\n",
-              "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-5dc8ae11-154f-4781-a43e-7b27b819c810');\n",
-              "          const dataTable =\n",
-              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
-              "                                                     [key], {});\n",
-              "          if (!dataTable) return;\n",
-              "\n",
-              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
-              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
-              "            + ' to learn more about interactive tables.';\n",
-              "          element.innerHTML = '';\n",
-              "          dataTable['output_type'] = 'display_data';\n",
-              "          await google.colab.output.renderOutput(dataTable, element);\n",
-              "          const docLink = document.createElement('div');\n",
-              "          docLink.innerHTML = docLinkHtml;\n",
-              "          element.appendChild(docLink);\n",
-              "        }\n",
-              "      </script>\n",
-              "    </div>\n",
-              "  </div>\n",
-              "  "
+              "</div>"
+            ],
+            "text/plain": [
+              "             race  lsat  ugpa\n",
+              "race gender                  \n",
+              "0.0  1        0.0  38.0   3.3\n",
+              "1.0  0        1.0  34.0   4.0\n",
+              "     0        1.0  34.0   3.9\n",
+              "     0        1.0  45.0   3.3\n",
+              "     1        1.0  39.0   2.5"
             ]
           },
+          "execution_count": 2,
           "metadata": {},
-          "execution_count": 3
+          "output_type": "execute_result"
         }
       ],
       "source": [
-        "X_train, y_train = fetch_lawschool_gpa(\"train\", numeric_only=True)\n",
-        "X_test, y_test = fetch_lawschool_gpa(\"test\", numeric_only=True)\n",
+        "X_train, y_train = fetch_lawschool_gpa(\"train\", numeric_only=True, dropcols=\"gender\")\n",
+        "X_test, y_test = fetch_lawschool_gpa(\"test\", numeric_only=True, dropcols=\"gender\")\n",
         "X_train.head()"
       ]
     },
@@ -318,33 +194,20 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": 3,
       "metadata": {
-        "id": "7CxY3K8i71DI",
-        "outputId": "c14dec8a-8a45-4992-af71-fd6cc5761cc0",
         "colab": {
           "base_uri": "https://localhost:8080/",
           "height": 238
-        }
+        },
+        "id": "7CxY3K8i71DI",
+        "outputId": "c14dec8a-8a45-4992-af71-fd6cc5761cc0"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
-            "text/plain": [
-              "          lsat   ugpa  race\n",
-              "race                       \n",
-              "0     0.729730  0.825   0.0\n",
-              "1     0.621622  1.000   1.0\n",
-              "1     0.621622  0.975   1.0\n",
-              "1     0.918919  0.825   1.0\n",
-              "1     0.756757  0.625   1.0"
-            ],
             "text/html": [
-              "\n",
-              "  <div id=\"df-242eaf4b-880d-4104-86f9-c9eaa52f4b73\">\n",
-              "    <div class=\"colab-df-container\">\n",
-              "      <div>\n",
+              "<div>\n",
               "<style scoped>\n",
               "    .dataframe tbody tr th:only-of-type {\n",
               "        vertical-align: middle;\n",
@@ -362,12 +225,14 @@
               "  <thead>\n",
               "    <tr style=\"text-align: right;\">\n",
               "      <th></th>\n",
+              "      <th></th>\n",
+              "      <th>race</th>\n",
               "      <th>lsat</th>\n",
               "      <th>ugpa</th>\n",
-              "      <th>race</th>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>race</th>\n",
+              "      <th>gender</th>\n",
               "      <th></th>\n",
               "      <th></th>\n",
               "      <th></th>\n",
@@ -375,118 +240,54 @@
               "  </thead>\n",
               "  <tbody>\n",
               "    <tr>\n",
-              "      <th>0</th>\n",
+              "      <th>0.0</th>\n",
+              "      <th>1</th>\n",
+              "      <td>0.0</td>\n",
               "      <td>0.729730</td>\n",
               "      <td>0.825</td>\n",
-              "      <td>0.0</td>\n",
               "    </tr>\n",
               "    <tr>\n",
-              "      <th>1</th>\n",
+              "      <th rowspan=\"4\" valign=\"top\">1.0</th>\n",
+              "      <th>0</th>\n",
+              "      <td>1.0</td>\n",
               "      <td>0.621622</td>\n",
               "      <td>1.000</td>\n",
-              "      <td>1.0</td>\n",
               "    </tr>\n",
               "    <tr>\n",
-              "      <th>1</th>\n",
+              "      <th>0</th>\n",
+              "      <td>1.0</td>\n",
               "      <td>0.621622</td>\n",
               "      <td>0.975</td>\n",
-              "      <td>1.0</td>\n",
               "    </tr>\n",
               "    <tr>\n",
-              "      <th>1</th>\n",
+              "      <th>0</th>\n",
+              "      <td>1.0</td>\n",
               "      <td>0.918919</td>\n",
               "      <td>0.825</td>\n",
-              "      <td>1.0</td>\n",
               "    </tr>\n",
               "    <tr>\n",
               "      <th>1</th>\n",
+              "      <td>1.0</td>\n",
               "      <td>0.756757</td>\n",
               "      <td>0.625</td>\n",
-              "      <td>1.0</td>\n",
               "    </tr>\n",
               "  </tbody>\n",
               "</table>\n",
-              "</div>\n",
-              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-242eaf4b-880d-4104-86f9-c9eaa52f4b73')\"\n",
-              "              title=\"Convert this dataframe to an interactive table.\"\n",
-              "              style=\"display:none;\">\n",
-              "        \n",
-              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
-              "       width=\"24px\">\n",
-              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
-              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
-              "  </svg>\n",
-              "      </button>\n",
-              "      \n",
-              "  <style>\n",
-              "    .colab-df-container {\n",
-              "      display:flex;\n",
-              "      flex-wrap:wrap;\n",
-              "      gap: 12px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert {\n",
-              "      background-color: #E8F0FE;\n",
-              "      border: none;\n",
-              "      border-radius: 50%;\n",
-              "      cursor: pointer;\n",
-              "      display: none;\n",
-              "      fill: #1967D2;\n",
-              "      height: 32px;\n",
-              "      padding: 0 0 0 0;\n",
-              "      width: 32px;\n",
-              "    }\n",
-              "\n",
-              "    .colab-df-convert:hover {\n",
-              "      background-color: #E2EBFA;\n",
-              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
-              "      fill: #174EA6;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert {\n",
-              "      background-color: #3B4455;\n",
-              "      fill: #D2E3FC;\n",
-              "    }\n",
-              "\n",
-              "    [theme=dark] .colab-df-convert:hover {\n",
-              "      background-color: #434B5C;\n",
-              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
-              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
-              "      fill: #FFFFFF;\n",
-              "    }\n",
-              "  </style>\n",
-              "\n",
-              "      <script>\n",
-              "        const buttonEl =\n",
-              "          document.querySelector('#df-242eaf4b-880d-4104-86f9-c9eaa52f4b73 button.colab-df-convert');\n",
-              "        buttonEl.style.display =\n",
-              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
-              "\n",
-              "        async function convertToInteractive(key) {\n",
-              "          const element = document.querySelector('#df-242eaf4b-880d-4104-86f9-c9eaa52f4b73');\n",
-              "          const dataTable =\n",
-              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
-              "                                                     [key], {});\n",
-              "          if (!dataTable) return;\n",
-              "\n",
-              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
-              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
-              "            + ' to learn more about interactive tables.';\n",
-              "          element.innerHTML = '';\n",
-              "          dataTable['output_type'] = 'display_data';\n",
-              "          await google.colab.output.renderOutput(dataTable, element);\n",
-              "          const docLink = document.createElement('div');\n",
-              "          docLink.innerHTML = docLinkHtml;\n",
-              "          element.appendChild(docLink);\n",
-              "        }\n",
-              "      </script>\n",
-              "    </div>\n",
-              "  </div>\n",
-              "  "
+              "</div>"
+            ],
+            "text/plain": [
+              "             race      lsat   ugpa\n",
+              "race gender                       \n",
+              "0.0  1        0.0  0.729730  0.825\n",
+              "1.0  0        1.0  0.621622  1.000\n",
+              "     0        1.0  0.621622  0.975\n",
+              "     0        1.0  0.918919  0.825\n",
+              "     1        1.0  0.756757  0.625"
             ]
           },
+          "execution_count": 3,
           "metadata": {},
-          "execution_count": 4
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -518,24 +319,24 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": 4,
       "metadata": {
-        "id": "y3mrAFw471DL",
-        "outputId": "d7bd400b-2fc1-426b-baab-93139fb48739",
         "colab": {
           "base_uri": "https://localhost:8080/"
-        }
+        },
+        "id": "y3mrAFw471DL",
+        "outputId": "d7bd400b-2fc1-426b-baab-93139fb48739"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": [
               "0.7400826321650612"
             ]
           },
+          "execution_count": 4,
           "metadata": {},
-          "execution_count": 5
+          "output_type": "execute_result"
         }
       ],
       "source": [
@@ -557,28 +358,28 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": 5,
       "metadata": {
-        "id": "F97LRCdf71DM",
-        "outputId": "fe6f95a7-fdb9-4a47-bd39-284ef726f983",
         "colab": {
           "base_uri": "https://localhost:8080/"
-        }
+        },
+        "id": "F97LRCdf71DM",
+        "outputId": "fe6f95a7-fdb9-4a47-bd39-284ef726f983"
       },
       "outputs": [
         {
-          "output_type": "execute_result",
           "data": {
             "text/plain": [
-              "0.2039259052574467"
+              "0.20392590525744636"
             ]
           },
+          "execution_count": 5,
           "metadata": {},
-          "execution_count": 6
+          "output_type": "execute_result"
         }
       ],
       "source": [
-        "lr_mae_diff = difference(mean_absolute_error, y_test, y_pred)\n",
+        "lr_mae_diff = difference(mean_absolute_error, y_test, y_pred, prot_attr=\"race\")\n",
         "lr_mae_diff"
       ]
     },
@@ -602,7 +403,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": 6,
       "metadata": {
         "id": "TxejOHFK71DN"
       },
@@ -622,27 +423,27 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": 7,
       "metadata": {
-        "id": "B63yaCCf71DN",
-        "outputId": "e3e2c851-b3f0-4296-e868-a46b78899364",
         "colab": {
           "base_uri": "https://localhost:8080/"
-        }
+        },
+        "id": "B63yaCCf71DN",
+        "outputId": "e3e2c851-b3f0-4296-e868-a46b78899364"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
-            "0.7622719376746615\n"
+            "0.7622719376746614\n"
           ]
         }
       ],
       "source": [
         "np.random.seed(0) #need for reproducibility\n",
-        "grid_search_red = GridSearchReduction(prot_attr=\"race\", \n",
-        "                                      estimator=estimator, \n",
+        "grid_search_red = GridSearchReduction(prot_attr=\"race\",\n",
+        "                                      estimator=estimator,\n",
         "                                      constraints=\"BoundedGroupLoss\",\n",
         "                                      loss=\"Absolute\",\n",
         "                                      min_val=y_train.min(),\n",
@@ -660,25 +461,25 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 9,
+      "execution_count": 8,
       "metadata": {
-        "id": "WPd4i-Zj71DO",
-        "outputId": "d40bcb93-fcb1-405b-ffc3-a72ef29157e1",
         "colab": {
           "base_uri": "https://localhost:8080/"
-        }
+        },
+        "id": "WPd4i-Zj71DO",
+        "outputId": "d40bcb93-fcb1-405b-ffc3-a72ef29157e1"
       },
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
-            "0.06122151904963524\n"
+            "0.06122151904963535\n"
           ]
         }
       ],
       "source": [
-        "gs_mae_diff = difference(mean_absolute_error, y_test, gs_pred)\n",
+        "gs_mae_diff = difference(mean_absolute_error, y_test, gs_pred, prot_attr=\"race\")\n",
         "print(gs_mae_diff)\n",
         "\n",
         "#Check if difference decreased\n",
@@ -687,6 +488,9 @@
     }
   ],
   "metadata": {
+    "colab": {
+      "provenance": []
+    },
     "kernelspec": {
       "display_name": "Python 3.9.7 ('aif360')",
       "language": "python",
@@ -702,17 +506,14 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.9.7"
+      "version": "3.11.4"
     },
     "vscode": {
       "interpreter": {
         "hash": "d0c5ced7753e77a483fec8ff7063075635521cce6e0bd54998c8f174742209dd"
       }
-    },
-    "colab": {
-      "provenance": []
     }
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
\ No newline at end of file
+}
diff --git a/requirements.txt b/requirements.txt
index 25213c0a..d15a8e2f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,6 @@ tensorflow>=1.13.1
 torch
 cvxpy>=1.0
 fairlearn>=0.7.0
-tempeh
 rpy2==3.4.5
 skorch==0.11.0
 inFairness>=0.2.2
diff --git a/setup.py b/setup.py
index ba05dfd8..216cf553 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,6 @@
     'FairAdapt': ['rpy2'],
     'inFairness': ['skorch', 'inFairness>=0.2.2'],
     'notebooks': ['jupyter', 'tqdm', 'igraph[plotting]', 'lightgbm', 'seaborn', 'ipympl'],
-    'LawSchoolGPA': ['tempeh'],
     'OptimalTransport': ['pot'],
 }
 extras['tests'] = reduce(lambda l1, l2: l1+l2, extras.values(), ['pytest>=3.5', 'pytest-cov>=2.8.1'])
diff --git a/tests/sklearn/test_datasets.py b/tests/sklearn/test_datasets.py
index 0fefc835..e7752b24 100644
--- a/tests/sklearn/test_datasets.py
+++ b/tests/sklearn/test_datasets.py
@@ -214,9 +214,9 @@ def test_fetch_lawschool_gpa():
     """Tests Law School GPA dataset shapes with various options."""
     gpa = fetch_lawschool_gpa()
     assert len(gpa) == 2
-    assert gpa.X.shape == (22342, 3)
+    assert gpa.X.shape == (22342, 4)
     assert gpa.y.nunique() > 2  # regression
-    assert fetch_lawschool_gpa(numeric_only=True, dropna=False).X.shape == (22342, 3)
+    assert fetch_lawschool_gpa(numeric_only=True, dropna=False).X.shape == (27478, 4)
 
 def test_lawschool_matches_old():
     """Tests Law School GPA dataset matches original version."""