Deploying to gh-pages from @ 8befa74 🚀

brainets · Sep 13, 2024 · e6039df · e6039df
1 parent dacf1f5
commit e6039df
Show file tree

Hide file tree

Showing 79 changed files with 335 additions and 312 deletions.
diff --git a/_downloads/071683bb9f5d321841ee9dd62eee27a4/plot_sim_red_syn.zip b/_downloads/071683bb9f5d321841ee9dd62eee27a4/plot_sim_red_syn.zip
diff --git a/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip b/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
diff --git a/_downloads/17dd9833b54c0c0905f696d14e67b05d/plot_bootstrapping.zip b/_downloads/17dd9833b54c0c0905f696d14e67b05d/plot_bootstrapping.zip
diff --git a/_downloads/1e58f0eaeee8456990c2959bfc1babe9/plot_oinfo.zip b/_downloads/1e58f0eaeee8456990c2959bfc1babe9/plot_oinfo.zip
diff --git a/_downloads/31d2b25c7ef60a5789246c25d83787c6/plot_ml_vs_it.zip b/_downloads/31d2b25c7ef60a5789246c25d83787c6/plot_ml_vs_it.zip
diff --git a/_downloads/34da59110fbcc6d2797595ef80bd7328/plot_inspect_results.zip b/_downloads/34da59110fbcc6d2797595ef80bd7328/plot_inspect_results.zip
diff --git a/_downloads/4481fca80fcd36eb6fde9d9cb4e3134c/plot_entropies.py b/_downloads/4481fca80fcd36eb6fde9d9cb4e3134c/plot_entropies.py
@@ -29,13 +29,14 @@
 # -----------------------------------
 #
 # Let us define several estimators of entropy. We are going to use the GC
-# (Gaussian Copula), the KNN (k Nearest Neighbor) and the kernel-based
-# estimator.
+# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based
+# estimator and the histogram estimator.
 
 # list of estimators to compare
 metrics = {
     "GC": get_entropy("gc"),
     "Gaussian": get_entropy(method="gauss"),
+    "Histogram": get_entropy(method="histogram"),
     "KNN-3": get_entropy("knn", k=3),
     "Kernel": get_entropy("kernel"),
 }

diff --git a/_downloads/4a44859767d9a5abc2a411d84f3464c0/plot_entropies_mvar.ipynb b/_downloads/4a44859767d9a5abc2a411d84f3464c0/plot_entropies_mvar.ipynb
@@ -22,7 +22,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Definition of estimators of entropy\n\nLet us define several estimators of entropy. We are going to use the GC\n(Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based\nand the Gaussian estimators.\n\n"
+        "## Definition of estimators of entropy\n\nLet us define several estimators of entropy. We are going to use the GC\n(Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based, the\nGaussian estimators and the histogram estimator.\n\n"
       ]
     },
     {
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "# list of estimators to compare\nmetrics = {\n    \"GC\": get_entropy(\"gc\"),\n    \"Gaussian\": get_entropy(\"gauss\"),\n    \"KNN-3\": get_entropy(\"knn\", k=3),\n    \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 5"
+        "# list of estimators to compare\nmetrics = {\n    \"GC\": get_entropy(\"gc\"),\n    \"Gaussian\": get_entropy(\"gauss\"),\n    \"Histogram\": get_entropy(\"histogram\"),\n    \"KNN-3\": get_entropy(\"knn\", k=3),\n    \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 5"
       ]
     },
     {

diff --git a/_downloads/4edd620b8310fd78193d26c454d1ca13/plot_rsi.zip b/_downloads/4edd620b8310fd78193d26c454d1ca13/plot_rsi.zip
diff --git a/_downloads/5643cba16e58fee7f03fd86cb1cd9854/plot_entropy_high_dimensional.ipynb b/_downloads/5643cba16e58fee7f03fd86cb1cd9854/plot_entropy_high_dimensional.ipynb
@@ -22,7 +22,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Definition of entropy estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information), KNN\n(k Nearest Neighbor) and a Gaussian kernel-based estimator.\n\n"
+        "## Definition of entropy estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information), KNN\n(k Nearest Neighbor), a Gaussian kernel-based estimator and the histogram\nestimator.\n\n"
       ]
     },
     {
@@ -33,14 +33,14 @@
       },
       "outputs": [],
       "source": [
-        "# list of estimators to compare\nmetrics = {\n    \"GCMI\": get_entropy(\"gc\", biascorrect=False),\n    \"KNN-3\": get_entropy(\"knn\", k=3),\n    \"KNN-10\": get_entropy(\"knn\", k=10),\n    \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(100, 10000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(ent, ent_theoric, ax):\n    \"\"\"Plotting function.\"\"\"\n    for n_m, metric_name in enumerate(ent.keys()):\n        # get the entropies\n        x = ent[metric_name]\n\n        # get the color\n        color = f\"C{n_m}\"\n\n        # estimate lower and upper bounds of the [5, 95]th percentile interval\n        x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n        # plot the MI as a function of the number of samples and interval\n        ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n        ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n    # plot the theoretical value\n    ax.axhline(\n        ent_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n    )\n    ax.legend()\n    ax.set_xlabel(\"Number of samples\")\n    ax.set_ylabel(\"Entropy [bits]\")"
+        "# list of estimators to compare\nmetrics = {\n    \"GCMI\": get_entropy(\"gc\", biascorrect=False),\n    \"KNN-3\": get_entropy(\"knn\", k=3),\n    \"KNN-10\": get_entropy(\"knn\", k=10),\n    \"Kernel\": get_entropy(\"kernel\"),\n    \"Histogram\": get_entropy(\"histogram\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 15).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(ent, ent_theoric, ax):\n    \"\"\"Plotting function.\"\"\"\n    for n_m, metric_name in enumerate(ent.keys()):\n        # get the entropies\n        x = ent[metric_name]\n\n        # get the color\n        color = f\"C{n_m}\"\n\n        # estimate lower and upper bounds of the [5, 95]th percentile interval\n        x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n        # plot the MI as a function of the number of samples and interval\n        ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n        ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n    # plot the theoretical value\n    ax.axhline(\n        ent_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n    )\n    ax.legend()\n    ax.set_xlabel(\"Number of samples\")\n    ax.set_ylabel(\"Entropy [bits]\")"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Entropy of data sampled from multinormal distribution\n\nLet variables $X_1,X_2,...,X_n$ have a multivariate normal distribution\n$\\mathcal{N}(\\vec{\\mu}, \\Sigma)$ the theoretical entropy in bits is\ndefined by :\n\n\\begin{align}H(X) = \\frac{1}{2} \\times log_{2}({|\\Sigma|}(2\\pi e)^{n})\\end{align}\n\n\n"
+        "##############################################################################\n Entropy of data sampled from multinormal distribution\n -------------------------------------------\n\n Let variables $X_1,X_2,...,X_n$ have a multivariate normal distribution\n $\\mathcal{N}(\\vec{\\mu}, \\Sigma)$ the theoretical entropy in bits is\n defined by :\n\n .. math::\n     H(X) = \\frac{1}{2} \\times log_{2}({|\\Sigma|}(2\\pi e)^{n})\n\n\n"
       ]
     },
     {

diff --git a/_downloads/58edfc8265b6af8f03b242ea4656b10f/plot_syn_phiID.zip b/_downloads/58edfc8265b6af8f03b242ea4656b10f/plot_syn_phiID.zip
diff --git a/_downloads/675ecb045af78b81dbd01cbea1d59fa1/plot_entropies_mvar.zip b/_downloads/675ecb045af78b81dbd01cbea1d59fa1/plot_entropies_mvar.zip
diff --git a/_downloads/69a8436c098192f96580d8e9cd2f5d4d/plot_entropies.zip b/_downloads/69a8436c098192f96580d8e9cd2f5d4d/plot_entropies.zip
diff --git a/_downloads/6df867479300c4bb75728e18f2fcc139/plot_infotopo.zip b/_downloads/6df867479300c4bb75728e18f2fcc139/plot_infotopo.zip
diff --git a/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip b/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
diff --git a/_downloads/7aa0a86e987814b09694d9eac79452ec/plot_mi.py b/_downloads/7aa0a86e987814b09694d9eac79452ec/plot_mi.py
@@ -31,12 +31,19 @@
 #
 # Let us define several estimators of MI. We are going to use the GC MI
 # (Gaussian Copula Mutual Information), the KNN (k Nearest Neighbor) and the
-# kernel-based estimator and using the a binning approach.
+# kernel-based estimator, using the a binning approach and the histogram
+# estimator. Please note that the histogram estimator is equivalent to the
+# binning, with a correction that relate to the difference between the Shannon
+# entropy of discrete variables and the differential entropy of continuous
+# variables. This correction in the case of mutual information (MI) is not
+# needed, because in the operation to compute the MI, the difference between
+# discrete and differential entropy cancel out.
 
 
 # create a special function for the binning approach as it requires binary data
 mi_binning_fcn = get_mi("binning", base=2)
 
+
 def mi_binning(x, y, **kwargs):
     x = digitize(x.T, **kwargs).T
     y = digitize(y.T, **kwargs).T
@@ -51,6 +58,7 @@ def mi_binning(x, y, **kwargs):
     "KNN-10": get_mi("knn", k=10),
     "Kernel": get_mi("kernel"),
     "Binning": partial(mi_binning, n_bins=4),
+    "Histogram": get_mi("histogram", n_bins=4),
 }
 
 # number of samples to simulate data

diff --git a/_downloads/88e7424e21f0645541ebb2bb32c0cfda/plot_mi_high_dimensional.zip b/_downloads/88e7424e21f0645541ebb2bb32c0cfda/plot_mi_high_dimensional.zip
diff --git a/_downloads/8d89a09731ce61b2f3e46663c6f08e84/plot_entropies_mvar.py b/_downloads/8d89a09731ce61b2f3e46663c6f08e84/plot_entropies_mvar.py
@@ -24,13 +24,14 @@
 # -----------------------------------
 #
 # Let us define several estimators of entropy. We are going to use the GC
-# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based
-# and the Gaussian estimators.
+# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based, the
+# Gaussian estimators and the histogram estimator.
 
 # list of estimators to compare
 metrics = {
     "GC": get_entropy("gc"),
     "Gaussian": get_entropy("gauss"),
+    "Histogram": get_entropy("histogram"),
     "KNN-3": get_entropy("knn", k=3),
     "Kernel": get_entropy("kernel"),
 }

diff --git a/_downloads/9b8182d6a0423d2c5a54190326bb52f0/plot_jax.zip b/_downloads/9b8182d6a0423d2c5a54190326bb52f0/plot_jax.zip
diff --git a/_downloads/a1ef8dbbc19c785bec2a5cc12335798e/plot_entropies.ipynb b/_downloads/a1ef8dbbc19c785bec2a5cc12335798e/plot_entropies.ipynb
@@ -22,7 +22,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Definition of estimators of entropy\n\nLet us define several estimators of entropy. We are going to use the GC\n(Gaussian Copula), the KNN (k Nearest Neighbor) and the kernel-based\nestimator.\n\n"
+        "## Definition of estimators of entropy\n\nLet us define several estimators of entropy. We are going to use the GC\n(Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based\nestimator and the histogram estimator.\n\n"
       ]
     },
     {
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "# list of estimators to compare\nmetrics = {\n    \"GC\": get_entropy(\"gc\"),\n    \"Gaussian\": get_entropy(method=\"gauss\"),\n    \"KNN-3\": get_entropy(\"knn\", k=3),\n    \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 15).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(h_x, h_theoric):\n    \"\"\"Plotting function.\"\"\"\n    for n_m, metric_name in enumerate(h_x.keys()):\n        # get the entropies\n        x = h_x[metric_name]\n\n        # get the color\n        color = f\"C{n_m}\"\n\n        # estimate lower and upper bounds of the [5, 95]th percentile interval\n        x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n        # plot the entropy as a function of the number of samples and interval\n        plt.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n        plt.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n    # plot the theoretical value\n    plt.axhline(\n        h_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n    )\n    plt.legend()\n    plt.xlabel(\"Number of samples\")\n    plt.ylabel(\"Entropy [bits]\")"
+        "# list of estimators to compare\nmetrics = {\n    \"GC\": get_entropy(\"gc\"),\n    \"Gaussian\": get_entropy(method=\"gauss\"),\n    \"Histogram\": get_entropy(method=\"histogram\"),\n    \"KNN-3\": get_entropy(\"knn\", k=3),\n    \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 15).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(h_x, h_theoric):\n    \"\"\"Plotting function.\"\"\"\n    for n_m, metric_name in enumerate(h_x.keys()):\n        # get the entropies\n        x = h_x[metric_name]\n\n        # get the color\n        color = f\"C{n_m}\"\n\n        # estimate lower and upper bounds of the [5, 95]th percentile interval\n        x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n        # plot the entropy as a function of the number of samples and interval\n        plt.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n        plt.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n    # plot the theoretical value\n    plt.axhline(\n        h_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n    )\n    plt.legend()\n    plt.xlabel(\"Number of samples\")\n    plt.ylabel(\"Entropy [bits]\")"
       ]
     },
     {

diff --git a/_downloads/b4b212cbccfe9b262b7e665ad86bc92a/plot_tutorial_core.zip b/_downloads/b4b212cbccfe9b262b7e665ad86bc92a/plot_tutorial_core.zip
diff --git a/_downloads/bfb0b680f07615229c434f2a031aa74c/plot_entropy_high_dimensional.py b/_downloads/bfb0b680f07615229c434f2a031aa74c/plot_entropy_high_dimensional.py
@@ -12,6 +12,7 @@
 
 """
 
+# %%
 import numpy as np
 
 from hoi.core import get_entropy
@@ -25,18 +26,20 @@
 # ---------------------------
 #
 # We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
-# (k Nearest Neighbor) and a Gaussian kernel-based estimator.
+# (k Nearest Neighbor), a Gaussian kernel-based estimator and the histogram
+# estimator.
 
 # list of estimators to compare
 metrics = {
     "GCMI": get_entropy("gc", biascorrect=False),
     "KNN-3": get_entropy("knn", k=3),
     "KNN-10": get_entropy("knn", k=10),
     "Kernel": get_entropy("kernel"),
+    "Histogram": get_entropy("histogram"),
 }
 
 # number of samples to simulate data
-n_samples = np.geomspace(100, 10000, 10).astype(int)
+n_samples = np.geomspace(20, 1000, 15).astype(int)
 
 # number of repetitions to estimate the percentile interval
 n_repeat = 10
@@ -68,6 +71,7 @@ def plot(ent, ent_theoric, ax):
     ax.set_ylabel("Entropy [bits]")
 
 
+# %%
 ###############################################################################
 # Entropy of data sampled from multinormal distribution
 # -------------------------------------------

diff --git a/_downloads/c2918c3e84c13ada93c5e128124e2386/plot_mi_high_dimensional.py b/_downloads/c2918c3e84c13ada93c5e128124e2386/plot_mi_high_dimensional.py
@@ -16,6 +16,8 @@
 Czyz et al., NeurIPS 2023 :cite:`czyz2024beyond`.
 """
 
+# %%
+
 import numpy as np
 
 from hoi.core import get_mi
@@ -28,18 +30,19 @@
 # Definition of MI estimators
 # ---------------------------
 #
-# We are going to use the GCMI (Gaussian Copula Mutual Information) and KNN
-# (k Nearest Neighbor)
+# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
+# (k Nearest Neighbor) and histogram estimator.
 
 # list of estimators to compare
 metrics = {
     "GCMI": get_mi("gc", biascorrect=False),
     "KNN-3": get_mi("knn", k=3),
     "KNN-10": get_mi("knn", k=10),
+    "Histogram": get_mi("histogram", n_bins=3),
 }
 
 # number of samples to simulate data
-n_samples = np.geomspace(1000, 10000, 10).astype(int)
+n_samples = np.geomspace(20, 1000, 15).astype(int)
 
 # number of repetitions to estimate the percentile interval
 n_repeat = 10
@@ -69,6 +72,8 @@ def plot(mi, mi_theoric, ax):
     ax.set_ylabel("Mutual-information [bits]")
 
 
+# %%
+
 ###############################################################################
 # MI of data sampled from splitted multinormal distribution
 # -------------------------------------------

diff --git a/_downloads/c2e64540f3d3ca92b3a73d57756734af/plot_mi_high_dimensional.ipynb b/_downloads/c2e64540f3d3ca92b3a73d57756734af/plot_mi_high_dimensional.ipynb
@@ -22,7 +22,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Definition of MI estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information) and KNN\n(k Nearest Neighbor)\n\n"
+        "## Definition of MI estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information), KNN\n(k Nearest Neighbor) and histogram estimator.\n\n"
       ]
     },
     {
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "# list of estimators to compare\nmetrics = {\n    \"GCMI\": get_mi(\"gc\", biascorrect=False),\n    \"KNN-3\": get_mi(\"knn\", k=3),\n    \"KNN-10\": get_mi(\"knn\", k=10),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(1000, 10000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(mi, mi_theoric, ax):\n    \"\"\"Plotting function.\"\"\"\n    for n_m, metric_name in enumerate(mi.keys()):\n        # get the entropies\n        x = mi[metric_name]\n\n        # get the color\n        color = f\"C{n_m}\"\n\n        # estimate lower and upper bounds of the [5, 95]th percentile interval\n        x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n        # plot the MI as a function of the number of samples and interval\n        ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n        ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n    # plot the theoretical value\n    ax.axhline(mi_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical MI\")\n    ax.legend()\n    ax.set_xlabel(\"Number of samples\")\n    ax.set_ylabel(\"Mutual-information [bits]\")"
+        "# list of estimators to compare\nmetrics = {\n    \"GCMI\": get_mi(\"gc\", biascorrect=False),\n    \"KNN-3\": get_mi(\"knn\", k=3),\n    \"KNN-10\": get_mi(\"knn\", k=10),\n    \"Histogram\": get_mi(\"histogram\", n_bins=3),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 15).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(mi, mi_theoric, ax):\n    \"\"\"Plotting function.\"\"\"\n    for n_m, metric_name in enumerate(mi.keys()):\n        # get the entropies\n        x = mi[metric_name]\n\n        # get the color\n        color = f\"C{n_m}\"\n\n        # estimate lower and upper bounds of the [5, 95]th percentile interval\n        x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n        # plot the MI as a function of the number of samples and interval\n        ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n        ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n    # plot the theoretical value\n    ax.axhline(mi_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical MI\")\n    ax.legend()\n    ax.set_xlabel(\"Number of samples\")\n    ax.set_ylabel(\"Mutual-information [bits]\")"
       ]
     },
     {

diff --git a/_downloads/cb54325e1a49091788b29a83388e5628/plot_mi.zip b/_downloads/cb54325e1a49091788b29a83388e5628/plot_mi.zip