-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Deploying to gh-pages from @ 4a4aab3 🚀
- Loading branch information
1 parent
92cccbd
commit 6a6ca76
Showing
156 changed files
with
3,044 additions
and
736 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Sphinx build info version 1 | ||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. | ||
config: 592af5c3e32f0ec823a47e0abd75e30d | ||
config: 999930e67a31d0b6695cfec21bc21d1a | ||
tags: 645f666f9bcd5a90fca523b33c5a78b7 |
Binary file not shown.
Binary file modified
BIN
+9.99 KB
(110%)
_downloads/07fcc19ba03226cd3d83d4e40ec44385/auto_examples_python.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
79 changes: 79 additions & 0 deletions
79
_downloads/5643cba16e58fee7f03fd86cb1cd9854/plot_entropy_high_dimensional.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"\n# Comparison of entropy estimators with high-dimensional data\n\nIn this example, we are going to compare estimators of entropy with\nhigh-dimensional data.\n\n1. Simulate data sampled from a multivariate normal distribution.\n2. Define estimators of entropy.\n3. Compute the entropy for a varying number of samples.\n4. See if the estimated entropy converge towards the theoretical value.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n\nfrom hoi.core import get_entropy\n\nimport matplotlib.pyplot as plt\n\nplt.style.use(\"ggplot\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Definition of entropy estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information), KNN\n(k Nearest Neighbor) and a Gaussian kernel-based estimator.\n\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# list of estimators to compare\nmetrics = {\n \"GCMI\": get_entropy(\"gc\", biascorrect=False),\n \"KNN-3\": get_entropy(\"knn\", k=3),\n \"KNN-10\": get_entropy(\"knn\", k=10),\n \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(100, 10000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(ent, ent_theoric, ax):\n \"\"\"Plotting function.\"\"\"\n for n_m, metric_name in enumerate(ent.keys()):\n # get the entropies\n x = ent[metric_name]\n\n # get the color\n color = f\"C{n_m}\"\n\n # estimate lower and upper bounds of the [5, 95]th percentile interval\n x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n # plot the MI as a function of the number of samples and interval\n ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n # plot the theoretical value\n ax.axhline(\n ent_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n )\n ax.legend()\n ax.set_xlabel(\"Number of samples\")\n ax.set_ylabel(\"Entropy [bits]\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Entropy of data sampled from multinormal distribution\n\nLet variables $X_1,X_2,...,X_n$ have a multivariate normal distribution\n$\\mathcal{N}(\\vec{\\mu}, \\Sigma)$ the theoretical entropy in bits is\ndefined by :\n\n\\begin{align}H(X) = \\frac{1}{2} \\times log_{2}({|\\Sigma|}(2\\pi e)^{n})\\end{align}\n\n\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# function for creating the covariance matrix with differnt modes\ndef create_cov_matrix(n_dims, cov, mode=\"dense\", k=None):\n \"\"\"Create a covariance matrix.\"\"\"\n # variance 1 for each dim\n cov_matrix = np.eye(n_dims)\n if mode == \"dense\":\n # all dimensions, but diagonal, with covariance cov\n cov_matrix += cov\n cov_matrix[np.diag_indices(n_dims)] = 1\n elif mode == \"sparse\":\n # only pairs x_i, x_(i+1) with i < k have covariance cov\n k = k if k is not None else n_dims\n for i in range(n_dims - 1):\n if i < k:\n cov_matrix[i, i + 1] = cov\n cov_matrix[i + 1, i] = cov\n\n return cov_matrix\n\n\ndef compute_true_entropy(cov_matrix):\n \"\"\"Compute the true entropy (bits).\"\"\"\n n_dims = cov_matrix.shape[0]\n det_cov = np.linalg.det(cov_matrix)\n return 0.5 * np.log2(det_cov * (2 * np.pi * np.e) ** n_dims)\n\n\n# number of dimensions per variable\nn_dims = 4\n# mean\nmu = [0.0] * n_dims\n# covariance\ncovariance = 0.6\n\n# modes for the covariance matrix:\n# - dense: off diagonal elements have specified covariance\n# - sparse: only pairs xi, x_(i+1) with i < k have specified covariance\nmodes = [\"dense\", \"sparse\"]\n# number of pairs with specified covariance\nk = n_dims\n\nfig = plt.figure(figsize=(10, 5))\n# compute entropy using various metrics\nentropy = {k: np.zeros((n_repeat, len(n_samples))) for k in metrics.keys()}\nfor i, mode in enumerate(modes):\n cov_matrix = create_cov_matrix(n_dims, covariance, mode=mode)\n # define the theoretic entropy\n ent_theoric = compute_true_entropy(cov_matrix)\n ax = fig.add_subplot(1, 2, i + 1)\n\n for n_s, s in enumerate(n_samples):\n for n_r in range(n_repeat):\n # generate samples from joint gaussian distribution\n fx = np.random.multivariate_normal(mu, cov_matrix, s)\n for metric, fcn in metrics.items():\n # extract x and y\n x = fx[:, :n_dims].T\n y = fx[:, n_dims:].T\n # compute entropy\n entropy[metric][n_r, n_s] = fcn(x)\n\n # plot the results\n plot(entropy, ent_theoric, ax)\n ax.title.set_text(f\"Mode: {mode}\")\n\nfig.suptitle(\n \"Comparison of entropy estimators when\\nthe data is high-dimensional\",\n fontweight=\"bold\",\n)\nfig.tight_layout()\nplt.show()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.19" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified
BIN
+12.4 KB
(110%)
_downloads/6f1e7a639e0699d6164445b55e6c116d/auto_examples_jupyter.zip
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file added
BIN
+11.5 KB
_downloads/88e7424e21f0645541ebb2bb32c0cfda/plot_mi_high_dimensional.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
154 changes: 154 additions & 0 deletions
154
_downloads/bfb0b680f07615229c434f2a031aa74c/plot_entropy_high_dimensional.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
""" | ||
Comparison of entropy estimators with high-dimensional data | ||
===================================================================== | ||
In this example, we are going to compare estimators of entropy with | ||
high-dimensional data. | ||
1. Simulate data sampled from a multivariate normal distribution. | ||
2. Define estimators of entropy. | ||
3. Compute the entropy for a varying number of samples. | ||
4. See if the estimated entropy converge towards the theoretical value. | ||
""" | ||
|
||
import numpy as np | ||
|
||
from hoi.core import get_entropy | ||
|
||
import matplotlib.pyplot as plt | ||
|
||
plt.style.use("ggplot") | ||
|
||
############################################################################### | ||
# Definition of entropy estimators | ||
# --------------------------- | ||
# | ||
# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN | ||
# (k Nearest Neighbor) and a Gaussian kernel-based estimator. | ||
|
||
# list of estimators to compare | ||
metrics = { | ||
"GCMI": get_entropy("gc", biascorrect=False), | ||
"KNN-3": get_entropy("knn", k=3), | ||
"KNN-10": get_entropy("knn", k=10), | ||
"Kernel": get_entropy("kernel"), | ||
} | ||
|
||
# number of samples to simulate data | ||
n_samples = np.geomspace(100, 10000, 10).astype(int) | ||
|
||
# number of repetitions to estimate the percentile interval | ||
n_repeat = 10 | ||
|
||
|
||
# plotting function | ||
def plot(ent, ent_theoric, ax): | ||
"""Plotting function.""" | ||
for n_m, metric_name in enumerate(ent.keys()): | ||
# get the entropies | ||
x = ent[metric_name] | ||
|
||
# get the color | ||
color = f"C{n_m}" | ||
|
||
# estimate lower and upper bounds of the [5, 95]th percentile interval | ||
x_low, x_high = np.percentile(x, [5, 95], axis=0) | ||
|
||
# plot the MI as a function of the number of samples and interval | ||
ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name) | ||
ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2) | ||
|
||
# plot the theoretical value | ||
ax.axhline( | ||
ent_theoric, linestyle="--", color="k", label="Theoretical entropy" | ||
) | ||
ax.legend() | ||
ax.set_xlabel("Number of samples") | ||
ax.set_ylabel("Entropy [bits]") | ||
|
||
|
||
############################################################################### | ||
# Entropy of data sampled from multinormal distribution | ||
# ------------------------------------------- | ||
# | ||
# Let variables :math:`X_1,X_2,...,X_n` have a multivariate normal distribution | ||
# :math:`\mathcal{N}(\vec{\mu}, \Sigma)` the theoretical entropy in bits is | ||
# defined by : | ||
# | ||
# .. math:: | ||
# H(X) = \frac{1}{2} \times log_{2}({|\Sigma|}(2\pi e)^{n}) | ||
# | ||
|
||
|
||
# function for creating the covariance matrix with differnt modes | ||
def create_cov_matrix(n_dims, cov, mode="dense", k=None): | ||
"""Create a covariance matrix.""" | ||
# variance 1 for each dim | ||
cov_matrix = np.eye(n_dims) | ||
if mode == "dense": | ||
# all dimensions, but diagonal, with covariance cov | ||
cov_matrix += cov | ||
cov_matrix[np.diag_indices(n_dims)] = 1 | ||
elif mode == "sparse": | ||
# only pairs x_i, x_(i+1) with i < k have covariance cov | ||
k = k if k is not None else n_dims | ||
for i in range(n_dims - 1): | ||
if i < k: | ||
cov_matrix[i, i + 1] = cov | ||
cov_matrix[i + 1, i] = cov | ||
|
||
return cov_matrix | ||
|
||
|
||
def compute_true_entropy(cov_matrix): | ||
"""Compute the true entropy (bits).""" | ||
n_dims = cov_matrix.shape[0] | ||
det_cov = np.linalg.det(cov_matrix) | ||
return 0.5 * np.log2(det_cov * (2 * np.pi * np.e) ** n_dims) | ||
|
||
|
||
# number of dimensions per variable | ||
n_dims = 4 | ||
# mean | ||
mu = [0.0] * n_dims | ||
# covariance | ||
covariance = 0.6 | ||
|
||
# modes for the covariance matrix: | ||
# - dense: off diagonal elements have specified covariance | ||
# - sparse: only pairs xi, x_(i+1) with i < k have specified covariance | ||
modes = ["dense", "sparse"] | ||
# number of pairs with specified covariance | ||
k = n_dims | ||
|
||
fig = plt.figure(figsize=(10, 5)) | ||
# compute entropy using various metrics | ||
entropy = {k: np.zeros((n_repeat, len(n_samples))) for k in metrics.keys()} | ||
for i, mode in enumerate(modes): | ||
cov_matrix = create_cov_matrix(n_dims, covariance, mode=mode) | ||
# define the theoretic entropy | ||
ent_theoric = compute_true_entropy(cov_matrix) | ||
ax = fig.add_subplot(1, 2, i + 1) | ||
|
||
for n_s, s in enumerate(n_samples): | ||
for n_r in range(n_repeat): | ||
# generate samples from joint gaussian distribution | ||
fx = np.random.multivariate_normal(mu, cov_matrix, s) | ||
for metric, fcn in metrics.items(): | ||
# extract x and y | ||
x = fx[:, :n_dims].T | ||
y = fx[:, n_dims:].T | ||
# compute entropy | ||
entropy[metric][n_r, n_s] = fcn(x) | ||
|
||
# plot the results | ||
plot(entropy, ent_theoric, ax) | ||
ax.title.set_text(f"Mode: {mode}") | ||
|
||
fig.suptitle( | ||
"Comparison of entropy estimators when\nthe data is high-dimensional", | ||
fontweight="bold", | ||
) | ||
fig.tight_layout() | ||
plt.show() |
Oops, something went wrong.