Skip to content

Commit

Permalink
added small explainer to demo_deterministic_reranking
Browse files Browse the repository at this point in the history
Signed-off-by: Andrii Kliachkin <andrew.klyachkin@gmail.com>
  • Loading branch information
andrewklayk committed Sep 17, 2023
1 parent ef13174 commit 9bda6e7
Showing 1 changed file with 23 additions and 91 deletions.
114 changes: 23 additions & 91 deletions examples/demo_deterministic_reranking.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 16,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -94,7 +94,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -167,7 +167,7 @@
"5 b 60"
]
},
"execution_count": 2,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -196,85 +196,17 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:\n",
"`load_boston` has been removed from scikit-learn since version 1.2.\n",
"\n",
"The Boston housing prices dataset has an ethical problem: as\n",
"investigated in [1], the authors of this dataset engineered a\n",
"non-invertible variable \"B\" assuming that racial self-segregation had a\n",
"positive impact on house prices [2]. Furthermore the goal of the\n",
"research that led to the creation of this dataset was to study the\n",
"impact of air quality but it did not give adequate demonstration of the\n",
"validity of this assumption.\n",
"\n",
"The scikit-learn maintainers therefore strongly discourage the use of\n",
"this dataset unless the purpose of the code is to study and educate\n",
"about ethical issues in data science and machine learning.\n",
"\n",
"In this special case, you can fetch the dataset from the original\n",
"source::\n",
"\n",
" import pandas as pd\n",
" import numpy as np\n",
"\n",
" data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n",
" raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n",
" data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n",
" target = raw_df.values[1::2, 2]\n",
"\n",
"Alternative datasets include the California housing dataset and the\n",
"Ames housing dataset. You can load the datasets as follows::\n",
"\n",
" from sklearn.datasets import fetch_california_housing\n",
" housing = fetch_california_housing()\n",
"\n",
"for the California housing dataset and::\n",
"\n",
" from sklearn.datasets import fetch_openml\n",
" housing = fetch_openml(name=\"house_prices\", as_frame=True)\n",
"\n",
"for the Ames housing dataset.\n",
"\n",
"[1] M Carlisle.\n",
"\"Racist data destruction?\"\n",
"<https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>\n",
"\n",
"[2] Harrison Jr, David, and Daniel L. Rubinfeld.\n",
"\"Hedonic housing prices and the demand for clean air.\"\n",
"Journal of environmental economics and management 5.1 (1978): 81-102.\n",
"<https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>\n",
": LawSchoolGPADataset will be unavailable. To install, run:\n",
"pip install 'aif360[LawSchoolGPA]'\n",
"WARNING:root:No module named 'tensorflow': AdversarialDebiasing will be unavailable. To install, run:\n",
"pip install 'aif360[AdversarialDebiasing]'\n",
"WARNING:root:No module named 'tensorflow': AdversarialDebiasing will be unavailable. To install, run:\n",
"pip install 'aif360[AdversarialDebiasing]'\n",
"WARNING:root:No module named 'fairlearn': ExponentiatedGradientReduction will be unavailable. To install, run:\n",
"pip install 'aif360[Reductions]'\n",
"WARNING:root:No module named 'fairlearn': GridSearchReduction will be unavailable. To install, run:\n",
"pip install 'aif360[Reductions]'\n",
"c:\\Users\\andre\\miniconda3\\envs\\aif\\lib\\site-packages\\torch\\_functorch\\deprecated.py:58: UserWarning: We've integrated functorch into PyTorch. As the final step of the integration, functorch.vmap is deprecated as of PyTorch 2.0 and will be deleted in a future version of PyTorch >= 2.3. Please use torch.vmap instead; see the PyTorch 2.0 release notes and/or the torch.func migration guide for more details https://pytorch.org/docs/master/func.migrating.html\n",
" warn_deprecated('vmap', 'torch.vmap')\n",
"WARNING:root:No module named 'fairlearn': GridSearchReduction will be unavailable. To install, run:\n",
"pip install 'aif360[Reductions]'\n"
]
}
],
"outputs": [],
"source": [
"from aif360.datasets import RegressionDataset\n",
"from aif360.algorithms.postprocessing.deterministic_reranking import DeterministicReranking"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -295,7 +227,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -318,7 +250,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 21,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -391,7 +323,7 @@
"6 0.0 50.0"
]
},
"execution_count": 6,
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -429,7 +361,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -454,7 +386,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 23,
"metadata": {},
"outputs": [
{
Expand All @@ -480,7 +412,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -500,7 +432,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 25,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -720,7 +652,7 @@
"10563 1.0 1.0 1.000000 0.900 0.909091"
]
},
"execution_count": 10,
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -740,7 +672,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 26,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -960,7 +892,7 @@
"5951 0.0 0.0 0.482759 0.210526 0.848485"
]
},
"execution_count": 11,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -995,7 +927,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1004,7 +936,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 28,
"metadata": {},
"outputs": [
{
Expand All @@ -1013,7 +945,7 @@
"(18, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])"
]
},
"execution_count": 13,
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1025,7 +957,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 29,
"metadata": {},
"outputs": [
{
Expand All @@ -1034,7 +966,7 @@
"(9, [1, 3, 5, 7, 9, 11, 13, 15, 17])"
]
},
"execution_count": 14,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1056,12 +988,12 @@
"\n",
"where $score(j)$ denotes the score of the item at position $j$.\n",
"\n",
"Setting `normalized` to `True` normalizes the metric against the DCG of top `r` elements of the full dataset by score, allowing us to compare the fair ranking to a purely score-based one."
"Setting `normalized` to `True` normalizes the metric against the DCG of the top `r` elements of the full dataset by score, allowing us to compare the fair ranking to a purely score-based one. Normalized DCG takes values from 0 to 1."
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 30,
"metadata": {},
"outputs": [
{
Expand Down

0 comments on commit 9bda6e7

Please sign in to comment.