From fcd0ebbe42c859aa21004b9f656e17dbe4199fc2 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Sat, 8 Apr 2017 22:35:38 -0500 Subject: [PATCH 01/13] Add contribution guidelines And update teh README to include note on installation issues. --- CONTRIBUTING.md | 21 +++++++++++++++++++++ README.rst | 4 ++++ 2 files changed, 25 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..6ec8d1d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,21 @@ +Installation issues +=================== + +Before opening an issue related to installation, please try to install PyEMD in +a fresh, empty Python 3 virtual environment and check that the problem +persists: + +```shell + pip install virtualenvwrapper + mkvirtualenv -p `which python3` pyemd + # Now we're an empty Python 3 virtual environment + pip install pyemd +``` + +PyEMD is not officially supported for (but may nonetheless work with) the following: + +- Python 2 +- Anaconda distributions +- Windows operating systems + +However, if you need to use it in these cases, pull requests are welcome! diff --git a/README.rst b/README.rst index d53c196..a661500 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,10 @@ To install the latest development version: pip install "git+https://github.com/wmayner/pyemd@develop#egg=pyemd" +Before opening an issue related to installation, please try to install PyEMD in +a fresh, empty Python 3 virtual environment and check that the problem +persists. + Usage ~~~~~ From 042c95e02980855f3e1faec8e68ecb01605e14ed Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 08:49:17 -0500 Subject: [PATCH 02/13] Fix formatting in `CONTRIBUTING` --- CONTRIBUTING.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6ec8d1d..ac22c1d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,10 +6,10 @@ a fresh, empty Python 3 virtual environment and check that the problem persists: ```shell - pip install virtualenvwrapper - mkvirtualenv -p `which python3` pyemd - # Now we're an empty Python 3 virtual environment - pip install pyemd +pip install virtualenvwrapper +mkvirtualenv -p `which python3` pyemd +# Now we're an empty Python 3 virtual environment +pip install pyemd ``` PyEMD is not officially supported for (but may nonetheless work with) the following: From ab9637aa20abf03e50ee1f6605f31249e06e78db Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 18:11:35 -0500 Subject: [PATCH 03/13] Update docstrings and argument names - Replace 'signature' with 'histogram' - Add docstring section describing the EMD in more detail - Add `Raises` section to docstrings describing the errors - Use unicode symbols instead of RST markup in docstrings - Clarify error messages --- pyemd/emd.pyx | 107 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 38 deletions(-) diff --git a/pyemd/emd.pyx b/pyemd/emd.pyx index f7982df..869ecb1 100644 --- a/pyemd/emd.pyx +++ b/pyemd/emd.pyx @@ -36,30 +36,40 @@ cdef extern from "lib/emd_hat.hpp": DEFAULT_EXTRA_MASS_PENALTY = -1.0 -def validate(first_signature, second_signature, distance_matrix): +def validate(first_histogram, second_histogram, distance_matrix): """Validate input.""" - if (first_signature.shape[0] > distance_matrix.shape[0] or - second_signature.shape[0] > distance_matrix.shape[0]): - raise ValueError('Signature dimension cannot be larger than ' - 'dimensions of distance matrix') - if (first_signature.shape[0] != second_signature.shape[0]): - raise ValueError('Signature dimensions must be equal') + if (first_histogram.shape[0] > distance_matrix.shape[0] or + second_histogram.shape[0] > distance_matrix.shape[0]): + raise ValueError('Histogram lengths cannot be greater than the ' + 'number of rows or columns of the distance matrix') + if (first_histogram.shape[0] != second_histogram.shape[0]): + raise ValueError('Histogram lengths must be equal') -def emd(np.ndarray[np.float64_t, ndim=1, mode="c"] first_signature, - np.ndarray[np.float64_t, ndim=1, mode="c"] second_signature, +def emd(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, + np.ndarray[np.float64_t, ndim=1, mode="c"] second_histogram, np.ndarray[np.float64_t, ndim=2, mode="c"] distance_matrix, extra_mass_penalty=DEFAULT_EXTRA_MASS_PENALTY): - """ - Compute the EMD between signatures with the given distance matrix. - - Args: - first_signature (np.ndarray): A 1-dimensional array of type - ``np.double``, of length :math:`N`. - second_signature (np.ndarray): A 1-dimensional array of ``np.double``, - also of length :math:`N`. - distance_matrix (np.ndarray): A 2-dimensional array of ``np.double``, - of size :math:`N \cross N`. + u""" + Return the EMD between two histograms using the given distance matrix. + + The Earth Mover's Distance is the minimal cost of turning one histogram + into another by moving around the “dirt” in the bins, where the cost of + moving one dirt from one bin to another is given by the amount of dirt + times the “ground distance” between the bins. + + Arguments: + first_histogram (np.ndarray): A 1-dimensional array of type np.float64, + of length N. + second_histogram (np.ndarray): A 1-dimensional array of np.float64, + also of length N. + distance_matrix (np.ndarray): A 2-dimensional array of np.float64, of + size at least N × N. This defines the underlyin metric, or ground + distance, by giving the pairwise distances between the histogram + bins. It must represent a metric; there is no warning if it + doesn't. + + Keyword Arguments: extra_mass_penalty: The penalty for extra mass. If you want the resulting distance to be a metric, it should be at least half the diameter of the space (maximum possible distance between any two @@ -70,28 +80,43 @@ def emd(np.ndarray[np.float64_t, ndim=1, mode="c"] first_signature, Returns: float: The EMD value. + + Raises: + ValueError: If the length of either histogram is greater than the + number of rows or columns of the distance matrix, or if the histograms + aren't the same length. """ - validate(first_signature, second_signature, distance_matrix) - return emd_hat_gd_metric_double(first_signature, - second_signature, + validate(first_histogram, second_histogram, distance_matrix) + return emd_hat_gd_metric_double(first_histogram, + second_histogram, distance_matrix, extra_mass_penalty) -def emd_with_flow(np.ndarray[np.float64_t, ndim=1, mode="c"] first_signature, - np.ndarray[np.float64_t, ndim=1, mode="c"] second_signature, +def emd_with_flow(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, + np.ndarray[np.float64_t, ndim=1, mode="c"] second_histogram, np.ndarray[np.float64_t, ndim=2, mode="c"] distance_matrix, extra_mass_penalty=DEFAULT_EXTRA_MASS_PENALTY): - """ - Compute the EMD between signatures with the given distance matrix. - - Args: - first_signature (np.ndarray): A 1-dimensional array of type - ``np.double``, of length :math:`N`. - second_signature (np.ndarray): A 1-dimensional array of ``np.double``, - also of length :math:`N`. - distance_matrix (np.ndarray): A 2-dimensional array of ``np.double``, - of size :math:`N \cross N`. + u""" + Compute the EMD between histograms with the given distance matrix. + + The Earth Mover's Distance is the minimal cost of turning one histogram + into another by moving around the “dirt” in the bins, where the cost of + moving one dirt from one bin to another is given by the amount of dirt + times the “ground distance” between the bins. + + Arguments: + first_histogram (np.ndarray): A 1-dimensional array of type np.float64, + of length N. + second_histogram (np.ndarray): A 1-dimensional array of np.float64, + also of length N. + distance_matrix (np.ndarray): A 2-dimensional array of np.float64, of + size at least N × N. This defines the underlyin metric, or ground + distance, by giving the pairwise distances between the histogram + bins. It must represent a metric; there is no warning if it + doesn't. + + Keyword Arguments: extra_mass_penalty: The penalty for extra mass. If you want the resulting distance to be a metric, it should be at least half the diameter of the space (maximum possible distance between any two @@ -101,10 +126,16 @@ def emd_with_flow(np.ndarray[np.float64_t, ndim=1, mode="c"] first_signature, used. Returns: - (float, list(float)): The EMD value and the associated minimum-cost flow. + (float, list(list(float))): The EMD value and the associated + minimum-cost flow. + + Raises: + ValueError: If the length of either histogram is greater than the + number of rows or the number of columns of the distance matrix, or if + the histograms aren't the same length. """ - validate(first_signature, second_signature, distance_matrix) - return emd_hat_gd_metric_double_with_flow_wrapper(first_signature, - second_signature, + validate(first_histogram, second_histogram, distance_matrix) + return emd_hat_gd_metric_double_with_flow_wrapper(first_histogram, + second_histogram, distance_matrix, extra_mass_penalty) From f8d5f4e30eeac83284a9b72bb1fe3639f3155ce9 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 18:40:09 -0500 Subject: [PATCH 04/13] Set up `tox` --- .gitignore | 6 ++++-- .travis.yml | 14 ++++---------- tox.ini | 7 +++++++ 3 files changed, 15 insertions(+), 12 deletions(-) create mode 100644 tox.ini diff --git a/.gitignore b/.gitignore index aecc53c..dd2c287 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,12 @@ __pycache__ -build +.cache +.tox .env .ropeproject *.so *.pyc -dist MANIFEST *.egg* +build +dist pyemd/emd.cpp diff --git a/.travis.yml b/.travis.yml index fc8f418..15b1425 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,10 @@ sudo: false language: python python: -- '3.3' -- '3.4' -- '3.5' -- '3.6' -install: -- pip install Cython -- make -- pip install -e . -- pip install pytest -script: python -m pytest + - '2.7' + - '3.6' +install: pip install tox-travis +script: tox notifications: email: false slack: diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..f403818 --- /dev/null +++ b/tox.ini @@ -0,0 +1,7 @@ +[tox] +envlist = py{27,36} + +[testenv] +deps = pytest +commands = make test +whitelist_externals = make From 345ef9a85f1f00504f8e12beacf65beff94e6ff3 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 18:40:21 -0500 Subject: [PATCH 05/13] Fix typo in docstring --- pyemd/emd.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pyemd/emd.pyx b/pyemd/emd.pyx index 869ecb1..a1fa037 100644 --- a/pyemd/emd.pyx +++ b/pyemd/emd.pyx @@ -110,11 +110,11 @@ def emd_with_flow(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, of length N. second_histogram (np.ndarray): A 1-dimensional array of np.float64, also of length N. - distance_matrix (np.ndarray): A 2-dimensional array of np.float64, of - size at least N × N. This defines the underlyin metric, or ground - distance, by giving the pairwise distances between the histogram - bins. It must represent a metric; there is no warning if it - doesn't. + distance_matrix (np.ndarray): A 2-dimensional array of type np.float64, + of size at least N × N. This defines the underlying metric, or + ground distance, by giving the pairwise distances between the + histogram bins. It must represent a metric; there is no warning if + it doesn't. Keyword Arguments: extra_mass_penalty: The penalty for extra mass. If you want the From e1ff11b53f282cbd732398e6be55779cd129df09 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 18:47:48 -0500 Subject: [PATCH 06/13] Update README --- README.rst | 77 ++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/README.rst b/README.rst index a661500..45b52f2 100644 --- a/README.rst +++ b/README.rst @@ -14,10 +14,6 @@ Distance `_ that allows it to be used with NumPy. **If you use this code, please cite the papers listed at the end of this document.** -This wrapper does not expose the full functionality of the underlying -implementation; it can only used be with the ``np.float`` data type, and with a -symmetric distance matrix that represents a true metric. See the documentation -for the original Pele and Werman library for the other options it provides. Installation ~~~~~~~~~~~~ @@ -28,16 +24,11 @@ To install the latest release: pip install pyemd -To install the latest development version: - -.. code:: bash - - pip install "git+https://github.com/wmayner/pyemd@develop#egg=pyemd" - Before opening an issue related to installation, please try to install PyEMD in a fresh, empty Python 3 virtual environment and check that the problem persists. + Usage ~~~~~ @@ -45,10 +36,11 @@ Usage >>> from pyemd import emd >>> import numpy as np - >>> first_signature = np.array([0.0, 1.0]) - >>> second_signature = np.array([5.0, 3.0]) - >>> distance_matrix = np.array([[0.0, 0.5], [0.5, 0.0]]) - >>> emd(first_signature, second_signature, distance_matrix) + >>> first_histogram = np.array([0.0, 1.0]) + >>> second_histogram = np.array([5.0, 3.0]) + >>> distance_matrix = np.array([[0.0, 0.5], + ... [0.5, 0.0]]) + >>> emd(first_histogram, second_histogram, distance_matrix) 3.5 You can also get the associated minimum-cost flow: @@ -56,49 +48,48 @@ You can also get the associated minimum-cost flow: .. code:: python >>> from pyemd import emd_with_flow - >>> emd_with_flow(first_signature, second_signature, distance_matrix) + >>> emd_with_flow(first_histogram, second_histogram, distance_matrix) (3.5, [[0.0, 0.0], [0.0, 1.0]]) + API ~~~ .. code:: python - emd(first_signature, second_signature, distance_matrix) + emd(first_histogram, second_histogram, distance_matrix) -- ``first_signature``: A 1-dimensional numpy array of ``np.float``, of size N. -- ``second_signature``: A 1-dimensional numpy array of ``np.float``, of size N. -- ``distance_matrix``: A 2-dimensional array of ``np.float``, of size NxN. Must - be symmetric and represent a metric. +- ``first_histogram``: A 1-dimensional numpy array of type ``np.float64``, of + length :math:`N`. +- ``second_histogram``: A 1-dimensional numpy array of type ``np.float64``, of + length :math:`N`. +- ``distance_matrix``: A 2-dimensional array of type ``np.float64``, of size at + least :math:`N \times N`. This defines the underlying metric, or ground + distance, by giving the pairwise distances between the histogram bins. It + must represent a metric; there is no warning if it doesn't. - -.. code:: python - - emd, flow = emd_with_flow(first_signature, second_signature, distance_matrix) - -- ``first_signature``: A 1-dimensional numpy array of ``np.float``, of size N. -- ``second_signature``: A 1-dimensional numpy array of ``np.float``, of size N. -- ``distance_matrix``: A 2-dimensional array of ``np.float``, of size NxN. Must - be symmetric and represent a metric. +The arguments to ``emd_with_flow`` are the same. Limitations and Caveats ~~~~~~~~~~~~~~~~~~~~~~~ -- ``distance_matrix`` must be symmetric. -- ``distance_matrix`` is assumed to represent a true metric. This must be - enforced by the user. See the documentation in ``pyemd/lib/emd_hat.hpp``. +- ``distance_matrix`` is assumed to represent a metric; there is no check to + ensure that this is true. See the documentation in ``pyemd/lib/emd_hat.hpp`` + for more information. - The flow matrix does not contain the flows to/from the extra mass bin. -- The signatures and distance matrix must be numpy arrays of ``np.float``. The - original C++ template function can accept any numerical C++ type, but this - wrapper only instantiates the template with ``double`` (Cython converts - ``np.float`` to ``double``). If there's demand, I can add support for other - types. +- The histograms and distance matrix must be numpy arrays of type + ``np.float64``. The original C++ template function can accept any numerical + C++ type, but this wrapper only instantiates the template with ``double`` + (Cython converts ``np.float64`` to ``double``). If there's demand, I can add + support for other types. + Contributing ~~~~~~~~~~~~ -To help develop PyEMD, fork the project on GitHub and install the requirements with ``pip``. +To help develop PyEMD, fork the project on GitHub and install the requirements +with ``pip``. The ``Makefile`` defines some tasks to help with development: @@ -108,6 +99,8 @@ The ``Makefile`` defines some tasks to help with development: * ``clean``: remove the build directory and the compiled C++ extension * ``test``: run unit tests with ``py.test`` +Tests for different Python environments can be run by installing ``tox`` with +``pip install tox`` and running the ``tox`` command. Credit ~~~~~~ @@ -122,7 +115,9 @@ Credit Please cite these papers if you use this code: `````````````````````````````````````````````` -Ofir Pele and Michael Werman, "A linear time histogram metric for improved SIFT matching," in *Computer Vision - ECCV 2008*, Marseille, France, 2008, pp. 495-508. +Ofir Pele and Michael Werman, "A linear time histogram metric for improved SIFT +matching," in *Computer Vision - ECCV 2008*, Marseille, France, 2008, pp. +495-508. .. code-block:: latex @@ -136,7 +131,9 @@ Ofir Pele and Michael Werman, "A linear time histogram metric for improved SIFT publisher={Springer} } -Ofir Pele and Michael Werman, "Fast and robust earth mover's distances," in *Proc. 2009 IEEE 12th Int. Conf. on Computer Vision*, Kyoto, Japan, 2009, pp. 460-467. +Ofir Pele and Michael Werman, "Fast and robust earth mover's distances," in +*Proc. 2009 IEEE 12th Int. Conf. on Computer Vision*, Kyoto, Japan, 2009, pp. +460-467. .. code-block:: latex From 269fafba08026a2dd473029bc39fe44439f27107 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 18:48:00 -0500 Subject: [PATCH 07/13] Update badges in README --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 45b52f2..f482b7b 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,8 @@ -.. image:: https://travis-ci.org/wmayner/pyemd.svg?branch=develop +.. image:: https://img.shields.io/travis/wmayner/pyemd/develop.svg?style=flat-square&maxAge=3600 :target: https://travis-ci.org/wmayner/pyemd -.. image:: http://img.shields.io/badge/Python%203%20-compatible-brightgreen.svg +.. image:: https://img.shields.io/pypi/pyversions/pyemd.svg?style=flat-square&maxAge=86400 :target: https://wiki.python.org/moin/Python2orPython3 - :alt: Python 3 compatible + :alt: Python versions badge ************************** PyEMD: Fast EMD for Python From 3f32e7ea0f103b997803d0c020249b2f00af7bba Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 18:48:22 -0500 Subject: [PATCH 08/13] Add Python 2 classifier to `setup.py` --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4f7ce79..2a03220 100644 --- a/setup.py +++ b/setup.py @@ -98,7 +98,7 @@ def no_cythonize(extensions, **_ignore): 'Natural Language :: English', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', - 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', From 82b4ad5a21016192052542a6752ea72ba439f014 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 18:59:13 -0500 Subject: [PATCH 09/13] Add Cython install to `.travis.yml` --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 15b1425..a0a7668 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,9 @@ language: python python: - '2.7' - '3.6' -install: pip install tox-travis +install: + - pip install tox-travis + - pip install cython script: tox notifications: email: false From ca97510bf259858e484c6fedafdde2447ddd16b7 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 19:18:09 -0500 Subject: [PATCH 10/13] Remove older Python 3 classifiers from `setup.py` --- setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/setup.py b/setup.py index 2a03220..f2a74a7 100644 --- a/setup.py +++ b/setup.py @@ -99,9 +99,6 @@ def no_cythonize(extensions, **_ignore): 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6' ], ) From 8e39c14b1fd8faae5b0d304319a9b9b1e7dea1f8 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 19:18:34 -0500 Subject: [PATCH 11/13] Bump version to 0.4.4 --- pyemd/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyemd/__about__.py b/pyemd/__about__.py index 1bfce1c..2da8b2f 100644 --- a/pyemd/__about__.py +++ b/pyemd/__about__.py @@ -5,7 +5,7 @@ """PyEMD metadata""" __title__ = 'pyemd' -__version__ = '0.4.3' +__version__ = '0.4.4' __description__ = ("A Python wrapper for Ofir Pele and Michael Werman's " "implementation of the Earth Mover's Distance.") __author__ = 'Will Mayner' From 4490be44b5335e118312f4b467a6875844a7714b Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 19:20:09 -0500 Subject: [PATCH 12/13] Add general Python classifiers to `setup.py` --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index f2a74a7..cf39977 100644 --- a/setup.py +++ b/setup.py @@ -98,7 +98,9 @@ def no_cythonize(extensions, **_ignore): 'Natural Language :: English', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', + 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6' ], ) From d43104e8a48bbab16731debc0a7099fa36fdee13 Mon Sep 17 00:00:00 2001 From: Will Mayner Date: Tue, 18 Jul 2017 23:12:36 -0500 Subject: [PATCH 13/13] Fix typos in docstrings --- pyemd/emd.pyx | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/pyemd/emd.pyx b/pyemd/emd.pyx index a1fa037..6744a56 100644 --- a/pyemd/emd.pyx +++ b/pyemd/emd.pyx @@ -50,13 +50,12 @@ def emd(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, np.ndarray[np.float64_t, ndim=1, mode="c"] second_histogram, np.ndarray[np.float64_t, ndim=2, mode="c"] distance_matrix, extra_mass_penalty=DEFAULT_EXTRA_MASS_PENALTY): - u""" - Return the EMD between two histograms using the given distance matrix. + u"""Return the EMD between two histograms using the given distance matrix. The Earth Mover's Distance is the minimal cost of turning one histogram into another by moving around the “dirt” in the bins, where the cost of - moving one dirt from one bin to another is given by the amount of dirt - times the “ground distance” between the bins. + moving dirt from one bin to another is given by the amount of dirt times + the “ground distance” between the bins. Arguments: first_histogram (np.ndarray): A 1-dimensional array of type np.float64, @@ -64,7 +63,7 @@ def emd(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, second_histogram (np.ndarray): A 1-dimensional array of np.float64, also of length N. distance_matrix (np.ndarray): A 2-dimensional array of np.float64, of - size at least N × N. This defines the underlyin metric, or ground + size at least N × N. This defines the underlying metric, or ground distance, by giving the pairwise distances between the histogram bins. It must represent a metric; there is no warning if it doesn't. @@ -97,24 +96,23 @@ def emd_with_flow(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, np.ndarray[np.float64_t, ndim=1, mode="c"] second_histogram, np.ndarray[np.float64_t, ndim=2, mode="c"] distance_matrix, extra_mass_penalty=DEFAULT_EXTRA_MASS_PENALTY): - u""" - Compute the EMD between histograms with the given distance matrix. + u"""Return the EMD between two histograms using the given distance matrix. The Earth Mover's Distance is the minimal cost of turning one histogram into another by moving around the “dirt” in the bins, where the cost of - moving one dirt from one bin to another is given by the amount of dirt - times the “ground distance” between the bins. + moving dirt from one bin to another is given by the amount of dirt times + the “ground distance” between the bins. Arguments: first_histogram (np.ndarray): A 1-dimensional array of type np.float64, of length N. second_histogram (np.ndarray): A 1-dimensional array of np.float64, also of length N. - distance_matrix (np.ndarray): A 2-dimensional array of type np.float64, - of size at least N × N. This defines the underlying metric, or - ground distance, by giving the pairwise distances between the - histogram bins. It must represent a metric; there is no warning if - it doesn't. + distance_matrix (np.ndarray): A 2-dimensional array of np.float64, of + size at least N × N. This defines the underlying metric, or ground + distance, by giving the pairwise distances between the histogram + bins. It must represent a metric; there is no warning if it + doesn't. Keyword Arguments: extra_mass_penalty: The penalty for extra mass. If you want the @@ -131,8 +129,8 @@ def emd_with_flow(np.ndarray[np.float64_t, ndim=1, mode="c"] first_histogram, Raises: ValueError: If the length of either histogram is greater than the - number of rows or the number of columns of the distance matrix, or if - the histograms aren't the same length. + number of rows or columns of the distance matrix, or if the histograms + aren't the same length. """ validate(first_histogram, second_histogram, distance_matrix) return emd_hat_gd_metric_double_with_flow_wrapper(first_histogram,