Deploying to gh-pages from @ 4a4aab3 🚀

brainets · Aug 30, 2024 · 6a6ca76 · 6a6ca76
1 parent 92cccbd
commit 6a6ca76
Show file tree

Hide file tree

Showing 156 changed files with 3,044 additions and 736 deletions.
diff --git a/.buildinfo b/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 592af5c3e32f0ec823a47e0abd75e30d
+config: 999930e67a31d0b6695cfec21bc21d1a
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/_downloads/071683bb9f5d321841ee9dd62eee27a4/plot_sim_red_syn.zip b/_downloads/071683bb9f5d321841ee9dd62eee27a4/plot_sim_red_syn.zip
diff --git a/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip b/_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
diff --git a/_downloads/17dd9833b54c0c0905f696d14e67b05d/plot_bootstrapping.zip b/_downloads/17dd9833b54c0c0905f696d14e67b05d/plot_bootstrapping.zip
diff --git a/_downloads/1e58f0eaeee8456990c2959bfc1babe9/plot_oinfo.zip b/_downloads/1e58f0eaeee8456990c2959bfc1babe9/plot_oinfo.zip
diff --git a/_downloads/31d2b25c7ef60a5789246c25d83787c6/plot_ml_vs_it.zip b/_downloads/31d2b25c7ef60a5789246c25d83787c6/plot_ml_vs_it.zip
diff --git a/_downloads/34da59110fbcc6d2797595ef80bd7328/plot_inspect_results.zip b/_downloads/34da59110fbcc6d2797595ef80bd7328/plot_inspect_results.zip
diff --git a/_downloads/4edd620b8310fd78193d26c454d1ca13/plot_rsi.zip b/_downloads/4edd620b8310fd78193d26c454d1ca13/plot_rsi.zip
diff --git a/_downloads/5643cba16e58fee7f03fd86cb1cd9854/plot_entropy_high_dimensional.ipynb b/_downloads/5643cba16e58fee7f03fd86cb1cd9854/plot_entropy_high_dimensional.ipynb
@@ -0,0 +1,79 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Comparison of entropy estimators with high-dimensional data\n\nIn this example, we are going to compare estimators of entropy with\nhigh-dimensional data.\n\n1. Simulate data sampled from a multivariate normal distribution.\n2. Define estimators of entropy.\n3. Compute the entropy for a varying number of samples.\n4. See if the estimated entropy converge towards the theoretical value.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n\nfrom hoi.core import get_entropy\n\nimport matplotlib.pyplot as plt\n\nplt.style.use(\"ggplot\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Definition of entropy estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information), KNN\n(k Nearest Neighbor) and a Gaussian kernel-based estimator.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# list of estimators to compare\nmetrics = {\n    \"GCMI\": get_entropy(\"gc\", biascorrect=False),\n    \"KNN-3\": get_entropy(\"knn\", k=3),\n    \"KNN-10\": get_entropy(\"knn\", k=10),\n    \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(100, 10000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(ent, ent_theoric, ax):\n    \"\"\"Plotting function.\"\"\"\n    for n_m, metric_name in enumerate(ent.keys()):\n        # get the entropies\n        x = ent[metric_name]\n\n        # get the color\n        color = f\"C{n_m}\"\n\n        # estimate lower and upper bounds of the [5, 95]th percentile interval\n        x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n        # plot the MI as a function of the number of samples and interval\n        ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n        ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n    # plot the theoretical value\n    ax.axhline(\n        ent_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n    )\n    ax.legend()\n    ax.set_xlabel(\"Number of samples\")\n    ax.set_ylabel(\"Entropy [bits]\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Entropy of data sampled from multinormal distribution\n\nLet variables $X_1,X_2,...,X_n$ have a multivariate normal distribution\n$\\mathcal{N}(\\vec{\\mu}, \\Sigma)$ the theoretical entropy in bits is\ndefined by :\n\n\\begin{align}H(X) = \\frac{1}{2} \\times log_{2}({|\\Sigma|}(2\\pi e)^{n})\\end{align}\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# function for creating the covariance matrix with differnt modes\ndef create_cov_matrix(n_dims, cov, mode=\"dense\", k=None):\n    \"\"\"Create a covariance matrix.\"\"\"\n    # variance 1 for each dim\n    cov_matrix = np.eye(n_dims)\n    if mode == \"dense\":\n        # all dimensions, but diagonal, with covariance cov\n        cov_matrix += cov\n        cov_matrix[np.diag_indices(n_dims)] = 1\n    elif mode == \"sparse\":\n        # only pairs x_i, x_(i+1) with i < k have covariance cov\n        k = k if k is not None else n_dims\n        for i in range(n_dims - 1):\n            if i < k:\n                cov_matrix[i, i + 1] = cov\n                cov_matrix[i + 1, i] = cov\n\n    return cov_matrix\n\n\ndef compute_true_entropy(cov_matrix):\n    \"\"\"Compute the true entropy (bits).\"\"\"\n    n_dims = cov_matrix.shape[0]\n    det_cov = np.linalg.det(cov_matrix)\n    return 0.5 * np.log2(det_cov * (2 * np.pi * np.e) ** n_dims)\n\n\n# number of dimensions per variable\nn_dims = 4\n# mean\nmu = [0.0] * n_dims\n# covariance\ncovariance = 0.6\n\n# modes for the covariance matrix:\n# - dense: off diagonal elements have specified covariance\n# - sparse: only pairs xi, x_(i+1) with i < k have specified covariance\nmodes = [\"dense\", \"sparse\"]\n# number of pairs with specified covariance\nk = n_dims\n\nfig = plt.figure(figsize=(10, 5))\n# compute entropy using various metrics\nentropy = {k: np.zeros((n_repeat, len(n_samples))) for k in metrics.keys()}\nfor i, mode in enumerate(modes):\n    cov_matrix = create_cov_matrix(n_dims, covariance, mode=mode)\n    # define the theoretic entropy\n    ent_theoric = compute_true_entropy(cov_matrix)\n    ax = fig.add_subplot(1, 2, i + 1)\n\n    for n_s, s in enumerate(n_samples):\n        for n_r in range(n_repeat):\n            # generate samples from joint gaussian distribution\n            fx = np.random.multivariate_normal(mu, cov_matrix, s)\n            for metric, fcn in metrics.items():\n                # extract x and y\n                x = fx[:, :n_dims].T\n                y = fx[:, n_dims:].T\n                # compute entropy\n                entropy[metric][n_r, n_s] = fcn(x)\n\n    # plot the results\n    plot(entropy, ent_theoric, ax)\n    ax.title.set_text(f\"Mode: {mode}\")\n\nfig.suptitle(\n    \"Comparison of entropy estimators when\\nthe data is high-dimensional\",\n    fontweight=\"bold\",\n)\nfig.tight_layout()\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.19"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/_downloads/58edfc8265b6af8f03b242ea4656b10f/plot_syn_phiID.zip b/_downloads/58edfc8265b6af8f03b242ea4656b10f/plot_syn_phiID.zip
diff --git a/_downloads/675ecb045af78b81dbd01cbea1d59fa1/plot_entropies_mvar.zip b/_downloads/675ecb045af78b81dbd01cbea1d59fa1/plot_entropies_mvar.zip
diff --git a/_downloads/69a8436c098192f96580d8e9cd2f5d4d/plot_entropies.zip b/_downloads/69a8436c098192f96580d8e9cd2f5d4d/plot_entropies.zip
diff --git a/_downloads/6df867479300c4bb75728e18f2fcc139/plot_infotopo.zip b/_downloads/6df867479300c4bb75728e18f2fcc139/plot_infotopo.zip
diff --git a/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip b/_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
diff --git a/_downloads/7aa0a86e987814b09694d9eac79452ec/plot_mi.py b/_downloads/7aa0a86e987814b09694d9eac79452ec/plot_mi.py
@@ -45,7 +45,7 @@ def mi_binning(x, y, **kwargs):
 # list of estimators to compare
 metrics = {
     "GC": get_mi("gc", biascorrect=False),
-    "KNN-3": get_mi("knn", k=1),
+    "KNN-3": get_mi("knn", k=3),
     "KNN-10": get_mi("knn", k=10),
     "Kernel": get_mi("kernel"),
     "Binning": partial(mi_binning, n_bins=4),
@@ -59,7 +59,7 @@ def mi_binning(x, y, **kwargs):
 
 
 # plotting function
-def plot(mi, h_theoric):
+def plot(mi, mi_theoric):
     """Plotting function."""
     for n_m, metric_name in enumerate(mi.keys()):
         # get the entropies
@@ -76,7 +76,7 @@ def plot(mi, h_theoric):
         plt.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)
 
     # plot the theoretical value
-    plt.axhline(h_theoric, linestyle="--", color="k", label="Theoretical MI")
+    plt.axhline(mi_theoric, linestyle="--", color="k", label="Theoretical MI")
     plt.legend()
     plt.xlabel("Number of samples")
     plt.ylabel("Mutual-information [bits]")
@@ -101,7 +101,7 @@ def plot(mi, h_theoric):
 sigma_y = 1.0
 
 # covariance between x and y
-covariance = 0.3
+covariance = 0.5
 
 # covariance matrix
 cov_matrix = [[sigma_x**2, covariance], [covariance, sigma_y**2]]
@@ -111,12 +111,12 @@ def plot(mi, h_theoric):
     sigma_x**2 * sigma_y**2 / (sigma_x**2 * sigma_y**2 - covariance**2)
 )
 
-# compute entropies using various metrics
+# compute mi using various metrics
 mi = {k: np.zeros((n_repeat, len(n_samples))) for k in metrics.keys()}
 
-for metric, fcn in metrics.items():
-    for n_s, s in enumerate(n_samples):
-        for n_r in range(n_repeat):
+for n_s, s in enumerate(n_samples):
+    for n_r in range(n_repeat):
+        for metric, fcn in metrics.items():
             # generate samples from joint gaussian distribution
             fx = np.random.multivariate_normal([mu_x, mu_y], cov_matrix, s)
 

diff --git a/_downloads/88e7424e21f0645541ebb2bb32c0cfda/plot_mi_high_dimensional.zip b/_downloads/88e7424e21f0645541ebb2bb32c0cfda/plot_mi_high_dimensional.zip
diff --git a/_downloads/9b8182d6a0423d2c5a54190326bb52f0/plot_jax.zip b/_downloads/9b8182d6a0423d2c5a54190326bb52f0/plot_jax.zip
diff --git a/_downloads/b4b212cbccfe9b262b7e665ad86bc92a/plot_tutorial_core.zip b/_downloads/b4b212cbccfe9b262b7e665ad86bc92a/plot_tutorial_core.zip
diff --git a/_downloads/bfb0b680f07615229c434f2a031aa74c/plot_entropy_high_dimensional.py b/_downloads/bfb0b680f07615229c434f2a031aa74c/plot_entropy_high_dimensional.py
@@ -0,0 +1,154 @@
+"""
+Comparison of entropy estimators with high-dimensional data
+=====================================================================
+
+In this example, we are going to compare estimators of entropy with
+high-dimensional data.
+
+1. Simulate data sampled from a multivariate normal distribution.
+2. Define estimators of entropy.
+3. Compute the entropy for a varying number of samples.
+4. See if the estimated entropy converge towards the theoretical value.
+
+"""
+
+import numpy as np
+
+from hoi.core import get_entropy
+
+import matplotlib.pyplot as plt
+
+plt.style.use("ggplot")
+
+###############################################################################
+# Definition of entropy estimators
+# ---------------------------
+#
+# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
+# (k Nearest Neighbor) and a Gaussian kernel-based estimator.
+
+# list of estimators to compare
+metrics = {
+    "GCMI": get_entropy("gc", biascorrect=False),
+    "KNN-3": get_entropy("knn", k=3),
+    "KNN-10": get_entropy("knn", k=10),
+    "Kernel": get_entropy("kernel"),
+}
+
+# number of samples to simulate data
+n_samples = np.geomspace(100, 10000, 10).astype(int)
+
+# number of repetitions to estimate the percentile interval
+n_repeat = 10
+
+
+# plotting function
+def plot(ent, ent_theoric, ax):
+    """Plotting function."""
+    for n_m, metric_name in enumerate(ent.keys()):
+        # get the entropies
+        x = ent[metric_name]
+
+        # get the color
+        color = f"C{n_m}"
+
+        # estimate lower and upper bounds of the [5, 95]th percentile interval
+        x_low, x_high = np.percentile(x, [5, 95], axis=0)
+
+        # plot the MI as a function of the number of samples and interval
+        ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)
+        ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)
+
+    # plot the theoretical value
+    ax.axhline(
+        ent_theoric, linestyle="--", color="k", label="Theoretical entropy"
+    )
+    ax.legend()
+    ax.set_xlabel("Number of samples")
+    ax.set_ylabel("Entropy [bits]")
+
+
+###############################################################################
+# Entropy of data sampled from multinormal distribution
+# -------------------------------------------
+#
+# Let variables :math:`X_1,X_2,...,X_n` have a multivariate normal distribution
+# :math:`\mathcal{N}(\vec{\mu}, \Sigma)` the theoretical entropy in bits is
+# defined by :
+#
+# .. math::
+#     H(X) = \frac{1}{2} \times log_{2}({|\Sigma|}(2\pi e)^{n})
+#
+
+
+# function for creating the covariance matrix with differnt modes
+def create_cov_matrix(n_dims, cov, mode="dense", k=None):
+    """Create a covariance matrix."""
+    # variance 1 for each dim
+    cov_matrix = np.eye(n_dims)
+    if mode == "dense":
+        # all dimensions, but diagonal, with covariance cov
+        cov_matrix += cov
+        cov_matrix[np.diag_indices(n_dims)] = 1
+    elif mode == "sparse":
+        # only pairs x_i, x_(i+1) with i < k have covariance cov
+        k = k if k is not None else n_dims
+        for i in range(n_dims - 1):
+            if i < k:
+                cov_matrix[i, i + 1] = cov
+                cov_matrix[i + 1, i] = cov
+
+    return cov_matrix
+
+
+def compute_true_entropy(cov_matrix):
+    """Compute the true entropy (bits)."""
+    n_dims = cov_matrix.shape[0]
+    det_cov = np.linalg.det(cov_matrix)
+    return 0.5 * np.log2(det_cov * (2 * np.pi * np.e) ** n_dims)
+
+
+# number of dimensions per variable
+n_dims = 4
+# mean
+mu = [0.0] * n_dims
+# covariance
+covariance = 0.6
+
+# modes for the covariance matrix:
+# - dense: off diagonal elements have specified covariance
+# - sparse: only pairs xi, x_(i+1) with i < k have specified covariance
+modes = ["dense", "sparse"]
+# number of pairs with specified covariance
+k = n_dims
+
+fig = plt.figure(figsize=(10, 5))
+# compute entropy using various metrics
+entropy = {k: np.zeros((n_repeat, len(n_samples))) for k in metrics.keys()}
+for i, mode in enumerate(modes):
+    cov_matrix = create_cov_matrix(n_dims, covariance, mode=mode)
+    # define the theoretic entropy
+    ent_theoric = compute_true_entropy(cov_matrix)
+    ax = fig.add_subplot(1, 2, i + 1)
+
+    for n_s, s in enumerate(n_samples):
+        for n_r in range(n_repeat):
+            # generate samples from joint gaussian distribution
+            fx = np.random.multivariate_normal(mu, cov_matrix, s)
+            for metric, fcn in metrics.items():
+                # extract x and y
+                x = fx[:, :n_dims].T
+                y = fx[:, n_dims:].T
+                # compute entropy
+                entropy[metric][n_r, n_s] = fcn(x)
+
+    # plot the results
+    plot(entropy, ent_theoric, ax)
+    ax.title.set_text(f"Mode: {mode}")
+
+fig.suptitle(
+    "Comparison of entropy estimators when\nthe data is high-dimensional",
+    fontweight="bold",
+)
+fig.tight_layout()
+plt.show()