Skip to content

Commit

Permalink
update sphinx requirement (#512)
Browse files Browse the repository at this point in the history
* add missing try-except imports
* clean up docstrings
* remove explicit __all__ from metrics.py
* drop mdss_bias_scan
* fix parameters missing trailing underscore bug
  • Loading branch information
hoffmansc authored Feb 21, 2024
1 parent 3974e29 commit d2ba8c4
Show file tree
Hide file tree
Showing 12 changed files with 59 additions and 66 deletions.
25 changes: 15 additions & 10 deletions aif360/metrics/ot_metric.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from typing import Union
import pandas as pd
import numpy as np
import ot
try:
import ot
except ImportError as error:
from logging import warning
warning("{}: ot_distance will be unavailable. To install, run:\n"
"pip install 'aif360[OptimalTransport]'".format(error))
from sklearn.preprocessing import LabelEncoder

def _normalize(distribution1, distribution2):
Expand All @@ -17,7 +22,7 @@ def _normalize(distribution1, distribution2):
extra = -np.minimum(np.min(distribution1), np.min(distribution2))
distribution1 += extra
distribution2 += extra

total_of_distribution1 = np.sum(distribution1)
if total_of_distribution1 != 0:
distribution1 /= total_of_distribution1
Expand Down Expand Up @@ -75,10 +80,10 @@ def _evaluate(
if prot_attr is None:
initial_distribution, required_distribution, matrix_distance = _transform(ground_truth, classifier, cost_matrix)
return ot.emd2(a=initial_distribution, b=required_distribution, M=matrix_distance, numItermax=num_iters)

if not ground_truth.nunique() == 2:
raise ValueError(f"Expected to have exactly 2 target values, got {ground_truth.nunique()}.")

# Calculate EMD between ground truth distribution and distribution of each group
emds = {}
for sa_val in sorted(prot_attr.unique()):
Expand Down Expand Up @@ -137,7 +142,7 @@ def ot_distance(
# Assert correct mode passed
if mode not in ['binary', 'continuous', 'nominal', 'ordinal']:
raise ValueError(f"Expected one of {['binary', 'continuous', 'nominal', 'ordinal']}, got {mode}.")

# Assert correct types passed to ground_truth, classifier and prot_attr
if not isinstance(ground_truth, (pd.Series, str)):
raise TypeError(f"ground_truth: expected pd.Series or str, got {type(ground_truth)}")
Expand All @@ -148,17 +153,17 @@ def ot_distance(
raise TypeError(f"classifier: expected pd.DataFrame for {mode} mode, got {type(classifier)}")
if prot_attr is not None and not isinstance(prot_attr, (pd.Series, str)):
raise TypeError(f"prot_attr: expected pd.Series or str, got {type(prot_attr)}")

# Assert correct type passed to cost_matrix
if cost_matrix is not None and not isinstance(cost_matrix, np.ndarray):
raise TypeError(f"cost_matrix: expected numpy.ndarray, got {type(cost_matrix)}")

# Assert scoring is "Wasserstein1"
if not scoring == "Wasserstein1":
raise ValueError(f"Scoring mode can only be \"Wasserstein1\", got {scoring}")

grt = ground_truth.copy()

if classifier is not None:
cls = classifier.copy()
if prot_attr is not None:
Expand All @@ -171,7 +176,7 @@ def ot_distance(
sat.index = grt.index
else:
sat = None

uniques = list(grt.unique())
if mode == "binary":
if len(uniques) > 2:
Expand Down
25 changes: 13 additions & 12 deletions aif360/sklearn/datasets/openml_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,19 +168,20 @@ def fetch_german(*, data_home=None, cache=True, binary_age=True, usecols=None,
dropcols=dropcols, numeric_only=numeric_only,
dropna=dropna)

def fetch_bank(*, data_home=None, cache=True, binary_age=True, percent10=False,
def fetch_bank(*, data_home=None, cache=True, binary_age=True, percent10=False,
usecols=None, dropcols=['duration'], numeric_only=False, dropna=False):
"""Load the Bank Marketing Dataset.
The protected attribute is 'age' (binarized by default as suggested by [#lequy22]:
age >= 25 and age <60 is considered privileged and age< 25 or age >= 60 unprivileged;
see the binary_age flag to keep this continuous). The outcome variable is 'deposit':
The protected attribute is 'age' (binarized by default as suggested by [#lequy22]_:
age >= 25 and age <60 is considered privileged and age< 25 or age >= 60 unprivileged;
see the binary_age flag to keep this continuous). The outcome variable is 'deposit':
'yes' or 'no'.
References:
.. [#lequy22] Le Quy, Tai, et al. "A survey on datasets for fairness‐aware machine
learning." Wiley Interdisciplinary Reviews: Data Mining and Knowledge
Discovery 12.3 (2022): e1452.
References:
.. [#lequy22] `Le Quy, Tai, et al. "A survey on datasets for fairness-
aware machine learning." Wiley Interdisciplinary Reviews: Data Mining
and Knowledge Discovery 12.3 (2022): e1452.
<https://wires.onlinelibrary.wiley.com/doi/pdf/10.1002/widm.1452>`_
Note:
By default, the data is downloaded from OpenML. See the `bank-marketing
Expand Down Expand Up @@ -235,15 +236,15 @@ def fetch_bank(*, data_home=None, cache=True, binary_age=True, percent10=False,
df[col] = df[col].cat.remove_categories('unknown')
df.education = df.education.astype('category').cat.reorder_categories(
['primary', 'secondary', 'tertiary'], ordered=True)

# binarize protected attribute (but not corresponding feature)
age = (pd.cut(df.age, [0, 24, 60, 100], ordered=False,
labels=[0, 1, 0] if numeric_only
labels=[0, 1, 0] if numeric_only
else ['<25 or >=60', '25-60', '<25 or >=60'])
if binary_age else 'age')
age = age.cat.reorder_categories([0, 1] if numeric_only
age = age.cat.reorder_categories([0, 1] if numeric_only
else ['<25 or >=60', '25-60'])

return standardize_dataset(df, prot_attr=[age], target='deposit',
usecols=usecols, dropcols=dropcols,
numeric_only=numeric_only, dropna=dropna)
14 changes: 10 additions & 4 deletions aif360/sklearn/inprocessing/infairness.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from inFairness import fairalgo
try:
from inFairness import fairalgo
from skorch import NeuralNet
from skorch.dataset import unpack_data, Dataset as Dataset_
from skorch.utils import is_pandas_ndframe
except ImportError as error:
from logging import warning
warning("{}: SenSeI and SenSR will be unavailable. To install, run:\n"
"pip install 'aif360[inFairness]'".format(error))
Dataset_ = NeuralNet = object
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils.multiclass import type_of_target
from sklearn.exceptions import NotFittedError
from skorch import NeuralNet
from skorch.dataset import unpack_data, Dataset as Dataset_
from skorch.utils import is_pandas_ndframe


class Dataset(Dataset_):
Expand Down
24 changes: 0 additions & 24 deletions aif360/sklearn/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,6 @@
from aif360.detectors.mdss.MDSS import MDSS


__all__ = [
# meta-metrics
'difference', 'ratio', 'intersection', 'one_vs_rest',
# scorer factory
'make_scorer',
# helpers
'num_samples', 'num_pos_neg',
'specificity_score', 'base_rate', 'selection_rate', 'smoothed_base_rate',
'smoothed_selection_rate', 'generalized_fpr', 'generalized_fnr',
# group fairness
'ot_distance', 'statistical_parity_difference', 'disparate_impact_ratio',
'equal_opportunity_difference', 'average_odds_difference', 'average_predictive_value_difference',
'average_odds_error', 'class_imbalance', 'kl_divergence',
'conditional_demographic_disparity', 'smoothed_edf',
'df_bias_amplification', 'mdss_bias_score',
# individual fairness
'generalized_entropy_index', 'generalized_entropy_error',
'between_group_generalized_entropy_error', 'theil_index',
'coefficient_of_variation', 'consistency_score',
# aliases
'sensitivity_score', 'mean_difference', 'false_negative_rate_error',
'false_positive_rate_error'
]

# ============================= META-METRICS ===================================
def difference(func, y_true, y_pred=None, prot_attr=None, priv_group=1,
sample_weight=None, **kwargs):
Expand Down
2 changes: 0 additions & 2 deletions aif360/sklearn/postprocessing/reject_option_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ def __init__(self, prot_attr=None, threshold=0.5, margin=0.1):
margin (scalar): Half width of the critical region. Estimates within
the critical region are "rejected" and assigned according to
their group. Must be between 0 and min(threshold, 1-threshold).
metric ('statistical_parity', 'average_odds', 'equal_opportunity',
or callable):
"""
self.prot_attr = prot_attr
self.threshold = threshold
Expand Down
2 changes: 1 addition & 1 deletion docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = python -msphinx
SPHINXBUILD ?= sphinx-build
SPHINXPROJ = aif360
SOURCEDIR = source
BUILDDIR = build
Expand Down
4 changes: 2 additions & 2 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ fairlearn>=0.7.0
pytest>=3.5.0

# docs
sphinx==1.8.6
sphinx==7.2.6
jinja2==3.0.3
sphinx_rtd_theme==0.4.3
sphinx_rtd_theme==2.0.0
15 changes: 11 additions & 4 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,16 @@
'sphinx.ext.intersphinx',
'sphinx.ext.mathjax']

flv = tuple(map(int, fairlearn.__version__.split('.')))
if flv > (0, 7, 0) and flv[-1] == 0:
flv = flv[:-1]
flv = '.'.join(map(str, flv))
intersphinx_mapping = {
'numpy': ('https://docs.scipy.org/doc/numpy/', None),
'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
'numpy': ('https://numpy.org/doc/stable/', None),
'scipy': ('https://docs.scipy.org/doc/scipy/', None),
'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
'sklearn': ('https://scikit-learn.org/stable/', None),
'fairlearn': (f'https://fairlearn.github.io/v{fairlearn.__version__}/', None),
'fairlearn': (f'https://fairlearn.org/v{flv}/', None),
'python': ('https://docs.python.org/{}.{}'.format(*sys.version_info), None),
'inFairness': ('https://ibm.github.io/inFairness/', None),
'skorch': ('https://skorch.readthedocs.io/en/stable/', None),
Expand Down Expand Up @@ -97,7 +101,7 @@
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
language = 'en'

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
Expand All @@ -117,6 +121,9 @@
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False

# If true, parameters with trailing underscores will be properly escaped.
strip_signature_backslash = True


# -- Options for HTML output ----------------------------------------------

Expand Down
1 change: 0 additions & 1 deletion docs/source/modules/sklearn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ Group fairness metrics
metrics.smoothed_edf
metrics.df_bias_amplification
metrics.between_group_generalized_entropy_error
metrics.mdss_bias_scan
metrics.mdss_bias_score

Individual fairness metrics
Expand Down
1 change: 1 addition & 0 deletions docs/source/templates/class.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
.. currentmodule:: {{ module }}

.. autoclass:: {{ objname }}
:members:

{% block methods %}
{% if methods %}
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ pytest>=3.5.0
pytest-cov>=2.8.1

# docs
sphinx==1.8.6
sphinx==7.2.6
jinja2==3.0.3
sphinx_rtd_theme==0.4.3
sphinx_rtd_theme==2.0.0
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from setuptools import setup, find_packages
from functools import reduce
from itertools import chain

long_description = """The AI Fairness 360 toolkit is an open-source library to help detect and mitigate bias in machine
learning models. The AI Fairness 360 Python package includes a comprehensive set of metrics for datasets and models to
Expand All @@ -25,9 +25,9 @@
'notebooks': ['jupyter', 'tqdm', 'igraph[plotting]', 'lightgbm', 'seaborn', 'ipympl'],
'OptimalTransport': ['pot'],
}
extras['tests'] = reduce(lambda l1, l2: l1+l2, extras.values(), ['pytest>=3.5', 'pytest-cov>=2.8.1'])
extras['docs'] = ['sphinx<2', 'jinja2<3.1.0', 'sphinx_rtd_theme']
extras['all'] = list(reduce(lambda s, l: s.union(l), extras.values(), set()))
extras['tests'] = list(chain(*extras.values(), ['pytest>=3.5', 'pytest-cov>=2.8.1']))
extras['docs'] = ['sphinx', 'jinja2<3.1.0', 'sphinx_rtd_theme']
extras['all'] = list(set(chain(*extras.values())))

setup(name='aif360',
version=version,
Expand Down

0 comments on commit d2ba8c4

Please sign in to comment.