added small explainer to demo_deterministic_reranking

Signed-off-by: Andrii Kliachkin <andrew.klyachkin@gmail.com>
Trusted-AI · Sep 17, 2023 · 9bda6e7 · 9bda6e7
1 parent ef13174
commit 9bda6e7
Showing 1 changed file with 23 additions and 91 deletions.
diff --git a/examples/demo_deterministic_reranking.ipynb b/examples/demo_deterministic_reranking.ipynb
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -94,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -167,7 +167,7 @@
        "5     b     60"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -196,85 +196,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:root:\n",
-      "`load_boston` has been removed from scikit-learn since version 1.2.\n",
-      "\n",
-      "The Boston housing prices dataset has an ethical problem: as\n",
-      "investigated in [1], the authors of this dataset engineered a\n",
-      "non-invertible variable \"B\" assuming that racial self-segregation had a\n",
-      "positive impact on house prices [2]. Furthermore the goal of the\n",
-      "research that led to the creation of this dataset was to study the\n",
-      "impact of air quality but it did not give adequate demonstration of the\n",
-      "validity of this assumption.\n",
-      "\n",
-      "The scikit-learn maintainers therefore strongly discourage the use of\n",
-      "this dataset unless the purpose of the code is to study and educate\n",
-      "about ethical issues in data science and machine learning.\n",
-      "\n",
-      "In this special case, you can fetch the dataset from the original\n",
-      "source::\n",
-      "\n",
-      "    import pandas as pd\n",
-      "    import numpy as np\n",
-      "\n",
-      "    data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n",
-      "    raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n",
-      "    data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n",
-      "    target = raw_df.values[1::2, 2]\n",
-      "\n",
-      "Alternative datasets include the California housing dataset and the\n",
-      "Ames housing dataset. You can load the datasets as follows::\n",
-      "\n",
-      "    from sklearn.datasets import fetch_california_housing\n",
-      "    housing = fetch_california_housing()\n",
-      "\n",
-      "for the California housing dataset and::\n",
-      "\n",
-      "    from sklearn.datasets import fetch_openml\n",
-      "    housing = fetch_openml(name=\"house_prices\", as_frame=True)\n",
-      "\n",
-      "for the Ames housing dataset.\n",
-      "\n",
-      "[1] M Carlisle.\n",
-      "\"Racist data destruction?\"\n",
-      "<https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>\n",
-      "\n",
-      "[2] Harrison Jr, David, and Daniel L. Rubinfeld.\n",
-      "\"Hedonic housing prices and the demand for clean air.\"\n",
-      "Journal of environmental economics and management 5.1 (1978): 81-102.\n",
-      "<https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>\n",
-      ": LawSchoolGPADataset will be unavailable. To install, run:\n",
-      "pip install 'aif360[LawSchoolGPA]'\n",
-      "WARNING:root:No module named 'tensorflow': AdversarialDebiasing will be unavailable. To install, run:\n",
-      "pip install 'aif360[AdversarialDebiasing]'\n",
-      "WARNING:root:No module named 'tensorflow': AdversarialDebiasing will be unavailable. To install, run:\n",
-      "pip install 'aif360[AdversarialDebiasing]'\n",
-      "WARNING:root:No module named 'fairlearn': ExponentiatedGradientReduction will be unavailable. To install, run:\n",
-      "pip install 'aif360[Reductions]'\n",
-      "WARNING:root:No module named 'fairlearn': GridSearchReduction will be unavailable. To install, run:\n",
-      "pip install 'aif360[Reductions]'\n",
-      "c:\\Users\\andre\\miniconda3\\envs\\aif\\lib\\site-packages\\torch\\_functorch\\deprecated.py:58: UserWarning: We've integrated functorch into PyTorch. As the final step of the integration, functorch.vmap is deprecated as of PyTorch 2.0 and will be deleted in a future version of PyTorch >= 2.3. Please use torch.vmap instead; see the PyTorch 2.0 release notes and/or the torch.func migration guide for more details https://pytorch.org/docs/master/func.migrating.html\n",
-      "  warn_deprecated('vmap', 'torch.vmap')\n",
-      "WARNING:root:No module named 'fairlearn': GridSearchReduction will be unavailable. To install, run:\n",
-      "pip install 'aif360[Reductions]'\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from aif360.datasets import RegressionDataset\n",
     "from aif360.algorithms.postprocessing.deterministic_reranking import DeterministicReranking"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -295,7 +227,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -318,7 +250,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -391,7 +323,7 @@
        "6    0.0   50.0"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -429,7 +361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -454,7 +386,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -480,7 +412,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -500,7 +432,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -720,7 +652,7 @@
        "10563   1.0     1.0  1.000000  0.900  0.909091"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -740,7 +672,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -960,7 +892,7 @@
        "5951    0.0     0.0  0.482759  0.210526  0.848485"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -995,7 +927,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1004,7 +936,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -1013,7 +945,7 @@
        "(18, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1025,7 +957,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -1034,7 +966,7 @@
        "(9, [1, 3, 5, 7, 9, 11, 13, 15, 17])"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1056,12 +988,12 @@
     "\n",
     "where $score(j)$ denotes the score of the item at position $j$.\n",
     "\n",
-    "Setting `normalized` to `True` normalizes the metric against the DCG of top `r` elements of the full dataset by score, allowing us to compare the fair ranking to a purely score-based one."
+    "Setting `normalized` to `True` normalizes the metric against the DCG of the top `r` elements of the full dataset by score, allowing us to compare the fair ranking to a purely score-based one. Normalized DCG takes values from 0 to 1."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {