From 6acbe7d0ed7c82329e00dc460a12313abbab7470 Mon Sep 17 00:00:00 2001 From: Samuel Hoffman Date: Tue, 20 Feb 2024 14:26:01 -0500 Subject: [PATCH] change source for lawschool gpa dataset * remove tempeh requirement Signed-off-by: Samuel Hoffman --- aif360/datasets/law_school_gpa_dataset.py | 32 +- aif360/sklearn/datasets/__init__.py | 2 +- aif360/sklearn/datasets/lawschool_dataset.py | 89 ++++ aif360/sklearn/datasets/tempeh_datasets.py | 59 --- ..._search_reduction_regression_sklearn.ipynb | 425 +++++------------- requirements.txt | 1 - setup.py | 1 - tests/sklearn/test_datasets.py | 4 +- 8 files changed, 215 insertions(+), 398 deletions(-) create mode 100644 aif360/sklearn/datasets/lawschool_dataset.py delete mode 100644 aif360/sklearn/datasets/tempeh_datasets.py diff --git a/aif360/datasets/law_school_gpa_dataset.py b/aif360/datasets/law_school_gpa_dataset.py index eac3656c..0d5c992a 100644 --- a/aif360/datasets/law_school_gpa_dataset.py +++ b/aif360/datasets/law_school_gpa_dataset.py @@ -1,36 +1,24 @@ -import os import pandas as pd +from sklearn.model_selection import train_test_split from aif360.datasets import RegressionDataset -try: - import tempeh.configurations as tc -except ImportError as error: - from logging import warning - warning("{}: LawSchoolGPADataset will be unavailable. To install, run:\n" - "pip install 'aif360[LawSchoolGPA]'".format(error)) +from aif360.sklearn.datasets.lawschool_dataset import LSAC_URL class LawSchoolGPADataset(RegressionDataset): - """Law School GPA dataset. - - See https://github.com/microsoft/tempeh for details. - """ + """Law School GPA dataset.""" def __init__(self, dep_var_name='zfygpa', - protected_attribute_names=['race'], - privileged_classes=[['white']], + protected_attribute_names=['race', 'gender'], + privileged_classes=[['white'], ['male']], instance_weights_name=None, categorical_features=[], na_values=[], custom_preprocessing=None, metadata=None): """See :obj:`RegressionDataset` for a description of the arguments.""" - dataset = tc.datasets["lawschool_gpa"]() - X_train,X_test = dataset.get_X(format=pd.DataFrame) - y_train, y_test = dataset.get_y(format=pd.Series) - A_train, A_test = dataset.get_sensitive_features(name='race', - format=pd.Series) - all_train = pd.concat([X_train, y_train, A_train], axis=1) - all_test = pd.concat([X_test, y_test, A_test], axis=1) - - df = pd.concat([all_train, all_test], axis=0) + df = pd.read_sas(LSAC_URL, encoding="utf-8") + df.race = df.race1.where(df.race1.isin(['black', 'white'])) + df.gender = df.gender.fillna("female") + df = df[["race", "gender", "lsat", "ugpa", "zfygpa"]].dropna() + df = pd.concat(train_test_split(df, test_size=0.33, random_state=123)) super(LawSchoolGPADataset, self).__init__(df=df, dep_var_name=dep_var_name, diff --git a/aif360/sklearn/datasets/__init__.py b/aif360/sklearn/datasets/__init__.py index 525b1431..8c0f1049 100644 --- a/aif360/sklearn/datasets/__init__.py +++ b/aif360/sklearn/datasets/__init__.py @@ -12,4 +12,4 @@ from aif360.sklearn.datasets.openml_datasets import fetch_adult, fetch_german, fetch_bank from aif360.sklearn.datasets.compas_dataset import fetch_compas from aif360.sklearn.datasets.meps_datasets import fetch_meps -from aif360.sklearn.datasets.tempeh_datasets import fetch_lawschool_gpa +from aif360.sklearn.datasets.lawschool_dataset import fetch_lawschool_gpa diff --git a/aif360/sklearn/datasets/lawschool_dataset.py b/aif360/sklearn/datasets/lawschool_dataset.py new file mode 100644 index 00000000..66a11bfa --- /dev/null +++ b/aif360/sklearn/datasets/lawschool_dataset.py @@ -0,0 +1,89 @@ +from io import BytesIO +import os +import urllib + +import pandas as pd +from sklearn.model_selection import train_test_split + +from aif360.sklearn.datasets.utils import standardize_dataset, Dataset + + +# cache location +DATA_HOME_DEFAULT = os.path.join(os.path.dirname(os.path.abspath(__file__)), + '..', 'data', 'raw') +LSAC_URL = "https://github.com/jkomiyama/fairregresion/raw/master/dataset/law/lsac.sas7bdat" + +def fetch_lawschool_gpa(subset="all", *, data_home=None, cache=True, + binary_race=True, fillna_gender="female", + usecols=["race", "gender", "lsat", "ugpa"], + dropcols=None, numeric_only=False, dropna=True): + """Load the Law School GPA dataset. + + Optionally binarizes 'race' to 'white' (privileged) or 'black' (unprivileged). + The other protected attribute is gender ('male' is privileged and 'female' + is unprivileged). The outcome variable is standardized first year GPA + ('zfygpa'). Note: this is a continuous variable, i.e., a regression task. + + Args: + subset ({'train', 'test', or 'all'}, optional): Select the dataset to + load: 'train' for the training set, 'test' for the test set, 'all' + for both. + data_home (string, optional): Specify another download and cache folder + for the datasets. By default all AIF360 datasets are stored in + 'aif360/sklearn/data/raw' subfolders. + cache (bool): Whether to cache downloaded datasets. + binary_race (bool, optional): Filter only white and black students. + fillna_gender (str or None, optional): Fill NA values for gender with + this value. If `None`, leave as NA. Note: this is used for backward- + compatibility with tempeh and may be dropped in later versions. + usecols (single label or list-like, optional): Feature column(s) to + keep. All others are dropped. + dropcols (single label or list-like, optional): Feature column(s) to + drop. + numeric_only (bool): Drop all non-numeric feature columns. + dropna (bool): Drop rows with NAs. + + Returns: + namedtuple: Tuple containing X, y, and sample_weights for the Law School + GPA dataset accessible by index or name. + """ + if subset not in {'train', 'test', 'all'}: + raise ValueError("subset must be either 'train', 'test', or 'all'; " + "cannot be {}".format(subset)) + + cache_path = os.path.join(data_home or DATA_HOME_DEFAULT, + os.path.basename(LSAC_URL)) + if cache and os.path.isfile(cache_path): + df = pd.read_sas(cache_path, encoding="utf-8") + else: + data = urllib.request.urlopen(LSAC_URL).read() + if cache: + os.makedirs(os.path.dirname (cache_path), exist_ok=True) + with open(cache_path, 'wb') as f: + f.write(data) + df = pd.read_sas(BytesIO(data), format="sas7bdat", encoding="utf-8") + + df.race = df.race1.astype('category') + if binary_race: + df.race = df.race.cat.set_categories(['black', 'white'], ordered=True) + + # for backwards-compatibility with tempeh + if fillna_gender is not None: + df.gender = df.gender.fillna(fillna_gender) + df.gender = df.gender.astype('category').cat.set_categories( + ['female', 'male'], ordered=True) + + ds = standardize_dataset(df, prot_attr=['race', 'gender'], target='zfygpa', + usecols=usecols, dropcols=dropcols, + numeric_only=numeric_only, dropna=dropna) + + # for backwards-compatibility with tempeh + train_X, test_X, train_y, test_y = train_test_split(*ds, test_size=0.33, random_state=123) + if subset == "train": + return Dataset(train_X, train_y) + elif subset == "test": + return Dataset(test_X, test_y) + else: + X = pd.concat([train_X, test_X], axis=0) + y = pd.concat([train_y, test_y], axis=0) + return Dataset(X, y) diff --git a/aif360/sklearn/datasets/tempeh_datasets.py b/aif360/sklearn/datasets/tempeh_datasets.py deleted file mode 100644 index 7adae048..00000000 --- a/aif360/sklearn/datasets/tempeh_datasets.py +++ /dev/null @@ -1,59 +0,0 @@ -import pandas as pd -try: - import tempeh.configurations as tc -except ImportError as error: - from logging import warning - warning("{}: fetch_lawschool_gpa will be unavailable. To install, run:\n" - "pip install 'aif360[LawSchoolGPA]'".format(error)) - -from aif360.sklearn.datasets.utils import standardize_dataset - - -def fetch_lawschool_gpa(subset="all", *, usecols=None, dropcols=None, - numeric_only=False, dropna=True): - """Load the Law School GPA dataset - - Note: - By default, the data is downloaded from tempeh. See - https://github.com/microsoft/tempeh for details. - - Args: - subset ({'train', 'test', or 'all'}, optional): Select the dataset to - load: 'train' for the training set, 'test' for the test set, 'all' - for both. - usecols (single label or list-like, optional): Feature column(s) to - keep. All others are dropped. - dropcols (single label or list-like, optional): Feature column(s) to - drop. - numeric_only (bool): Drop all non-numeric feature columns. - dropna (bool): Drop rows with NAs. FIXME: NAs already dropped by tempeh - - Returns: - namedtuple: Tuple containing X, y, and sample_weights for the Law School - GPA dataset accessible by index or name. - """ - if subset not in {'train', 'test', 'all'}: - raise ValueError("subset must be either 'train', 'test', or 'all'; " - "cannot be {}".format(subset)) - - dataset = tc.datasets["lawschool_gpa"]() - X_train, X_test = dataset.get_X(format=pd.DataFrame) - y_train, y_test = dataset.get_y(format=pd.Series) - A_train, A_test = dataset.get_sensitive_features(name='race', - format=pd.Series) - all_train = pd.concat([X_train, y_train, A_train], axis=1) - all_test = pd.concat([X_test, y_test, A_test], axis=1) - - if subset == "train": - df = all_train - elif subset == "test": - df = all_test - else: - df = pd.concat([all_train, all_test], axis=0) - - df.race = df.race.astype('category').cat.set_categories( - ['black', 'white'], ordered=True) - - return standardize_dataset(df, prot_attr='race', target='zfygpa', - usecols=usecols, dropcols=dropcols, - numeric_only=numeric_only, dropna=dropna) diff --git a/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb b/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb index d9fec458..ea47f6d5 100644 --- a/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb +++ b/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb @@ -2,12 +2,12 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Trusted-AI/AIF360/blob/master/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb)" - ], "metadata": { "id": "fgfyb_2c9WL4" - } + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Trusted-AI/AIF360/blob/master/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb)" + ] }, { "cell_type": "markdown", @@ -23,73 +23,24 @@ }, { "cell_type": "code", - "source": [ - "#Install aif360\n", - "#Install Reductions from Fairlearn\n", - "#Install Low School GPA dataset\n", - "!pip install aif360[Reductions,LawSchoolGPA]" - ], + "execution_count": null, "metadata": { - "id": "4Ad7nC129i8f", - "outputId": "0ce37c29-d5a5-4682-968e-bb33cea9de19", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "4Ad7nC129i8f", + "outputId": "0ce37c29-d5a5-4682-968e-bb33cea9de19" }, - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: aif360[LawSchoolGPA,Reductions] in /usr/local/lib/python3.7/dist-packages (0.5.0)\n", - "Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.7.3)\n", - "Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.0.2)\n", - "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.21.6)\n", - "Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.3.5)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (3.2.2)\n", - "Requirement already satisfied: fairlearn~=0.7 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (0.7.0)\n", - "Requirement already satisfied: tempeh in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (0.1.12)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (2022.2.1)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (2.8.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (1.15.0)\n", - "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=1.0->aif360[LawSchoolGPA,Reductions]) (1.1.0)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=1.0->aif360[LawSchoolGPA,Reductions]) (3.1.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (1.4.4)\n", - "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (3.0.9)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (0.11.0)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->aif360[LawSchoolGPA,Reductions]) (4.1.1)\n", - "Requirement already satisfied: pytest in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (3.6.4)\n", - "Requirement already satisfied: memory-profiler in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (0.60.0)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (2.23.0)\n", - "Requirement already satisfied: shap in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (0.41.0)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from memory-profiler->tempeh->aif360[LawSchoolGPA,Reductions]) (5.4.8)\n", - "Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (1.11.0)\n", - "Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (8.14.0)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (57.4.0)\n", - "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (22.1.0)\n", - "Requirement already satisfied: pluggy<0.8,>=0.5 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (0.7.1)\n", - "Requirement already satisfied: atomicwrites>=1.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (1.4.1)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (1.24.3)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (2022.6.15)\n", - "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (2.10)\n", - "Requirement already satisfied: numba in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.56.2)\n", - "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (1.5.0)\n", - "Requirement already satisfied: slicer==0.0.7 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.0.7)\n", - "Requirement already satisfied: tqdm>4.25.0 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (4.64.1)\n", - "Requirement already satisfied: packaging>20.9 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (21.3)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.7/dist-packages (from numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (4.12.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (3.8.1)\n" - ] - } + "outputs": [], + "source": [ + "#Install aif360\n", + "#Install Reductions from Fairlearn\n", + "!pip install aif360[Reductions]" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "id": "sivw3vma71DE" }, @@ -130,33 +81,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { - "id": "7Xqg6Yn771DG", - "outputId": "00fda049-0c6a-4c86-9ee8-f49b9e2d5161", "colab": { "base_uri": "https://localhost:8080/", "height": 238 - } + }, + "id": "7Xqg6Yn771DG", + "outputId": "00fda049-0c6a-4c86-9ee8-f49b9e2d5161" }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " lsat ugpa race\n", - "race \n", - "0 38.0 3.3 0\n", - "1 34.0 4.0 1\n", - "1 34.0 3.9 1\n", - "1 45.0 3.3 1\n", - "1 39.0 2.5 1" - ], "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "
\n", - " " + "
" + ], + "text/plain": [ + " race lsat ugpa\n", + "race gender \n", + "0.0 1 0.0 38.0 3.3\n", + "1.0 0 1.0 34.0 4.0\n", + " 0 1.0 34.0 3.9\n", + " 0 1.0 45.0 3.3\n", + " 1 1.0 39.0 2.5" ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ - "X_train, y_train = fetch_lawschool_gpa(\"train\", numeric_only=True)\n", - "X_test, y_test = fetch_lawschool_gpa(\"test\", numeric_only=True)\n", + "X_train, y_train = fetch_lawschool_gpa(\"train\", numeric_only=True, dropcols=\"gender\")\n", + "X_test, y_test = fetch_lawschool_gpa(\"test\", numeric_only=True, dropcols=\"gender\")\n", "X_train.head()" ] }, @@ -318,33 +194,20 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { - "id": "7CxY3K8i71DI", - "outputId": "c14dec8a-8a45-4992-af71-fd6cc5761cc0", "colab": { "base_uri": "https://localhost:8080/", "height": 238 - } + }, + "id": "7CxY3K8i71DI", + "outputId": "c14dec8a-8a45-4992-af71-fd6cc5761cc0" }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " lsat ugpa race\n", - "race \n", - "0 0.729730 0.825 0.0\n", - "1 0.621622 1.000 1.0\n", - "1 0.621622 0.975 1.0\n", - "1 0.918919 0.825 1.0\n", - "1 0.756757 0.625 1.0" - ], "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "
\n", - " " + "
" + ], + "text/plain": [ + " race lsat ugpa\n", + "race gender \n", + "0.0 1 0.0 0.729730 0.825\n", + "1.0 0 1.0 0.621622 1.000\n", + " 0 1.0 0.621622 0.975\n", + " 0 1.0 0.918919 0.825\n", + " 1 1.0 0.756757 0.625" ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], "source": [ @@ -518,24 +319,24 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { - "id": "y3mrAFw471DL", - "outputId": "d7bd400b-2fc1-426b-baab-93139fb48739", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "y3mrAFw471DL", + "outputId": "d7bd400b-2fc1-426b-baab-93139fb48739" }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.7400826321650612" ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ @@ -557,28 +358,28 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { - "id": "F97LRCdf71DM", - "outputId": "fe6f95a7-fdb9-4a47-bd39-284ef726f983", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "F97LRCdf71DM", + "outputId": "fe6f95a7-fdb9-4a47-bd39-284ef726f983" }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ - "0.2039259052574467" + "0.20392590525744636" ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], "source": [ - "lr_mae_diff = difference(mean_absolute_error, y_test, y_pred)\n", + "lr_mae_diff = difference(mean_absolute_error, y_test, y_pred, prot_attr=\"race\")\n", "lr_mae_diff" ] }, @@ -602,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "id": "TxejOHFK71DN" }, @@ -622,27 +423,27 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { - "id": "B63yaCCf71DN", - "outputId": "e3e2c851-b3f0-4296-e868-a46b78899364", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "B63yaCCf71DN", + "outputId": "e3e2c851-b3f0-4296-e868-a46b78899364" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "0.7622719376746615\n" + "0.7622719376746614\n" ] } ], "source": [ "np.random.seed(0) #need for reproducibility\n", - "grid_search_red = GridSearchReduction(prot_attr=\"race\", \n", - " estimator=estimator, \n", + "grid_search_red = GridSearchReduction(prot_attr=\"race\",\n", + " estimator=estimator,\n", " constraints=\"BoundedGroupLoss\",\n", " loss=\"Absolute\",\n", " min_val=y_train.min(),\n", @@ -660,25 +461,25 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { - "id": "WPd4i-Zj71DO", - "outputId": "d40bcb93-fcb1-405b-ffc3-a72ef29157e1", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "WPd4i-Zj71DO", + "outputId": "d40bcb93-fcb1-405b-ffc3-a72ef29157e1" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "0.06122151904963524\n" + "0.06122151904963535\n" ] } ], "source": [ - "gs_mae_diff = difference(mean_absolute_error, y_test, gs_pred)\n", + "gs_mae_diff = difference(mean_absolute_error, y_test, gs_pred, prot_attr=\"race\")\n", "print(gs_mae_diff)\n", "\n", "#Check if difference decreased\n", @@ -687,6 +488,9 @@ } ], "metadata": { + "colab": { + "provenance": [] + }, "kernelspec": { "display_name": "Python 3.9.7 ('aif360')", "language": "python", @@ -702,17 +506,14 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.11.4" }, "vscode": { "interpreter": { "hash": "d0c5ced7753e77a483fec8ff7063075635521cce6e0bd54998c8f174742209dd" } - }, - "colab": { - "provenance": [] } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/requirements.txt b/requirements.txt index 25213c0a..d15a8e2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ tensorflow>=1.13.1 torch cvxpy>=1.0 fairlearn>=0.7.0 -tempeh rpy2==3.4.5 skorch==0.11.0 inFairness>=0.2.2 diff --git a/setup.py b/setup.py index ba05dfd8..216cf553 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ 'FairAdapt': ['rpy2'], 'inFairness': ['skorch', 'inFairness>=0.2.2'], 'notebooks': ['jupyter', 'tqdm', 'igraph[plotting]', 'lightgbm', 'seaborn', 'ipympl'], - 'LawSchoolGPA': ['tempeh'], 'OptimalTransport': ['pot'], } extras['tests'] = reduce(lambda l1, l2: l1+l2, extras.values(), ['pytest>=3.5', 'pytest-cov>=2.8.1']) diff --git a/tests/sklearn/test_datasets.py b/tests/sklearn/test_datasets.py index 0fefc835..e7752b24 100644 --- a/tests/sklearn/test_datasets.py +++ b/tests/sklearn/test_datasets.py @@ -214,9 +214,9 @@ def test_fetch_lawschool_gpa(): """Tests Law School GPA dataset shapes with various options.""" gpa = fetch_lawschool_gpa() assert len(gpa) == 2 - assert gpa.X.shape == (22342, 3) + assert gpa.X.shape == (22342, 4) assert gpa.y.nunique() > 2 # regression - assert fetch_lawschool_gpa(numeric_only=True, dropna=False).X.shape == (22342, 3) + assert fetch_lawschool_gpa(numeric_only=True, dropna=False).X.shape == (27478, 4) def test_lawschool_matches_old(): """Tests Law School GPA dataset matches original version."""