From 444cbf685472c830e1150a2d1668698ccc8babf5 Mon Sep 17 00:00:00 2001
From: Erin Sheldon <erin.sheldon@gmail.com>
Date: Tue, 20 Aug 2024 11:10:44 -0400
Subject: [PATCH 1/6] make numpy_util.match work for non-integer inputs

---
 esutil/numpy_util.py            | 52 ++++++++++++++++--------------
 esutil/tests/test_numpy_util.py | 56 +++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 24 deletions(-)

diff --git a/esutil/numpy_util.py b/esutil/numpy_util.py
index 608a9cf..8717549 100644
--- a/esutil/numpy_util.py
+++ b/esutil/numpy_util.py
@@ -1509,40 +1509,44 @@ def rem_dup(arr, flag, values=False):
 
 def match(arr1input, arr2input, presorted=False):
     """
-    NAME:
-        match
+    Match two arrays, returning the indicies of matches for each array, or
+    empty arrays if no matches are found.  This means arr1[ind1] == arr2[ind2]
+    is true for all corresponding pairs.
 
-    CALLING SEQUENCE:
-        ind1,ind2 = match(arr1, arr2, presorted=False)
+    arr1 must contain only unique inputs, but arr2 may be non-unique.
 
-    PURPOSE:
-        Match two numpy arrays.  Return the indices of the matches or empty
-        arrays if no matches are found.  This means arr1[ind1] == arr2[ind2] is
-        true for all corresponding pairs.  arr1 must contain only unique
-        inputs, but arr2 may be non-unique.
-        If you know arr1 is sorted, set presorted=True and it will run
-        even faster
+    If you know arr1 is sorted, set presorted=True and it will run even faster
 
-    METHOD:
-        uses searchsorted with some sugar.  Much faster than old version
-        based on IDL code.
 
-    REVISION HISTORY:
-        Created 2015, Eli Rykoff, SLAC.
+    Parameters
+    ----------
+    arr1: array
+        The first array, which must have unique elements.
+    arr2: array
+        The second array.
+    presorted: bool, optional
+        If set to True, the first array is assumed to be sorted.
 
+    Returns
+    -------
+    ind1, ind2: array, array
+        The index arrays of matches for each array
+
+    Revision history
+    -----------------
+    Created 2015, Eli Rykoff, SLAC.
     """
 
     # make sure 1D
     arr1 = np.atleast_1d(arr1input)
     arr2 = np.atleast_1d(arr2input)
 
-    # check for integer data...
-    if not issubclass(arr1.dtype.type, np.integer) or not issubclass(
-        arr2.dtype.type, np.integer
-    ):
-        mess = "Error: only works with integer types, got %s %s"
-        mess = mess % (arr1.dtype.type, arr2.dtype.type)
-        raise ValueError(mess)
+    el = arr1input[0]
+
+    if isinstance(el, str) or isinstance(el, bytes):
+        is_string = True
+    else:
+        is_string = False
 
     if (arr1.size == 0) or (arr2.size == 0):
         mess = "Error: arr1 and arr2 must each be non-zero length"
@@ -1563,7 +1567,7 @@ def match(arr1input, arr2input, presorted=False):
     sub1 = np.searchsorted(arr1, arr2, sorter=st1)
 
     # check for out-of-bounds at the high end if necessary
-    if arr2.max() > arr1.max():
+    if is_string or arr2.max() > arr1.max():
         (bad,) = np.where(sub1 == arr1.size)
         sub1[bad] = arr1.size - 1
 
diff --git a/esutil/tests/test_numpy_util.py b/esutil/tests/test_numpy_util.py
index 90cebad..ff4571d 100644
--- a/esutil/tests/test_numpy_util.py
+++ b/esutil/tests/test_numpy_util.py
@@ -58,3 +58,59 @@ def test_split_array():
     assert np.all(chunks[6] == [18, 19, 20])
     assert np.all(chunks[7] == [21, 22, 23])
     assert np.all(chunks[8] == [24])
+
+
+@pytest.mark.parametrize('presorted', [True, False])
+def test_match_int(presorted):
+    a1 = np.array([3, 10, 8, 4, 7])
+    a2 = np.array([8, 3])
+
+    if not presorted:
+        ind = np.array([4, 1, 0, 2, 3])
+        m1, m2 = eu.numpy_util.match(a1[ind], a2)
+        assert np.all(m1 == [3, 2])
+    else:
+        m1, m2 = eu.numpy_util.match(a1, a2)
+        assert np.all(m1 == [2, 0])
+
+
+@pytest.mark.parametrize('presorted', [True, False])
+def test_match_float(presorted):
+    a1 = np.array([1.25, 6.61, 8.51, 9.91, 11.25])
+    a2 = np.array([6.61, 9.91])
+
+    if not presorted:
+        ind = np.array([4, 1, 0, 2, 3])
+        m1, m2 = eu.numpy_util.match(a1[ind], a2)
+        assert np.all(m1 == [1, 4])
+    else:
+        m1, m2 = eu.numpy_util.match(a1, a2)
+        assert np.all(m1 == [1, 3])
+
+
+@pytest.mark.parametrize('presorted', [True, False])
+def test_match_str(presorted):
+    a1 = np.array(['blah', 'goodbye', 'hello', 'stuff', 'things'])
+    a2 = np.array(['goodbye', 'things', 'zz'])
+
+    if not presorted:
+        ind = np.array([3, 4, 0, 2, 1])
+        m1, m2 = eu.numpy_util.match(a1[ind], a2)
+        assert np.all(m1 == [4, 1])
+    else:
+        m1, m2 = eu.numpy_util.match(a1, a2)
+        assert np.all(m1 == [1, 4])
+
+
+@pytest.mark.parametrize('presorted', [True, False])
+def test_match_none(presorted):
+    a1 = np.array(['blah', 'goodbye', 'hello', 'stuff', 'things'])
+    a2 = np.array(['zz', 'bb'])
+
+    if not presorted:
+        ind = np.array([3, 4, 0, 2, 1])
+        m1, m2 = eu.numpy_util.match(a1[ind], a2)
+    else:
+        m1, m2 = eu.numpy_util.match(a1, a2)
+
+    assert m1.size == 0 and m2.size == 0

From 079c0b0b7d6192d05bbf00a1a1333060f1f606a2 Mon Sep 17 00:00:00 2001
From: Erin Sheldon <erin.sheldon@gmail.com>
Date: Tue, 20 Aug 2024 13:03:30 -0400
Subject: [PATCH 2/6] rename test to test_match_nomatch

---
 esutil/tests/test_numpy_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esutil/tests/test_numpy_util.py b/esutil/tests/test_numpy_util.py
index ff4571d..9f2fa60 100644
--- a/esutil/tests/test_numpy_util.py
+++ b/esutil/tests/test_numpy_util.py
@@ -103,7 +103,7 @@ def test_match_str(presorted):
 
 
 @pytest.mark.parametrize('presorted', [True, False])
-def test_match_none(presorted):
+def test_match_nomatch(presorted):
     a1 = np.array(['blah', 'goodbye', 'hello', 'stuff', 'things'])
     a2 = np.array(['zz', 'bb'])
 

From bae187d632393dedb6560fdf938c99ef0494ac3c Mon Sep 17 00:00:00 2001
From: Erin Sheldon <erin.sheldon@gmail.com>
Date: Tue, 20 Aug 2024 13:13:43 -0400
Subject: [PATCH 3/6] update doc

---
 esutil/numpy_util.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/esutil/numpy_util.py b/esutil/numpy_util.py
index 8717549..3e454ed 100644
--- a/esutil/numpy_util.py
+++ b/esutil/numpy_util.py
@@ -1511,7 +1511,8 @@ def match(arr1input, arr2input, presorted=False):
     """
     Match two arrays, returning the indicies of matches for each array, or
     empty arrays if no matches are found.  This means arr1[ind1] == arr2[ind2]
-    is true for all corresponding pairs.
+    is true for all corresponding pairs.  For floating-point data this implies
+    exact matching with no floating-point tolerance.
 
     arr1 must contain only unique inputs, but arr2 may be non-unique.
 

From 95abee44c106ddca013a3afbcf784dc2f50652c5 Mon Sep 17 00:00:00 2001
From: Erin Sheldon <erin.sheldon@gmail.com>
Date: Tue, 20 Aug 2024 13:15:48 -0400
Subject: [PATCH 4/6] update release notes, bump version

---
 RELEASE_NOTES        | 7 +++++++
 esutil/__init__.py   | 2 +-
 esutil/numpy_util.py | 3 ++-
 setup.py             | 2 +-
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index d22f059..55105f6 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,3 +1,10 @@
+0.6.15 (not yet released)
+------
+
+Enhancements
+
+    - numpy_util.match works for data types other than int
+
 0.6.14
 ------
 
diff --git a/esutil/__init__.py b/esutil/__init__.py
index bf7de63..b0287c8 100644
--- a/esutil/__init__.py
+++ b/esutil/__init__.py
@@ -83,7 +83,7 @@ class for gauss-legendre integration, which relies on the gauleg C++ extension.
 
 import sys
 
-__version__ = "0.6.14"
+__version__ = "0.6.15"
 
 def version():
     return __version__
diff --git a/esutil/numpy_util.py b/esutil/numpy_util.py
index 3e454ed..fcf8702 100644
--- a/esutil/numpy_util.py
+++ b/esutil/numpy_util.py
@@ -1512,7 +1512,8 @@ def match(arr1input, arr2input, presorted=False):
     Match two arrays, returning the indicies of matches for each array, or
     empty arrays if no matches are found.  This means arr1[ind1] == arr2[ind2]
     is true for all corresponding pairs.  For floating-point data this implies
-    exact matching with no floating-point tolerance.
+    exact matching with no floating-point tolerance.  The data type can be
+    string or bytes.
 
     arr1 must contain only unique inputs, but arr2 may be non-unique.
 
diff --git a/setup.py b/setup.py
index 1e5fe6d..c5dae01 100644
--- a/setup.py
+++ b/setup.py
@@ -244,7 +244,7 @@ def build_extensions(self):
 
 setup(
     name="esutil",
-    version="0.6.14",
+    version="0.6.15",
     author="Erin Scott Sheldon",
     author_email="erin.sheldon@gmail.com",
     classifiers=classifiers,

From 19551bd06cd51235184ba12797215bf6d8b56273 Mon Sep 17 00:00:00 2001
From: Erin Sheldon <erin.sheldon@gmail.com>
Date: Tue, 20 Aug 2024 13:46:39 -0400
Subject: [PATCH 5/6] better note

---
 RELEASE_NOTES | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 55105f6..2819459 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -3,7 +3,7 @@
 
 Enhancements
 
-    - numpy_util.match works for data types other than int
+    - numpy_util.match works for non-integer data types
 
 0.6.14
 ------

From 6273689e00161e9716bf8f7eed076fa1dd16fbc4 Mon Sep 17 00:00:00 2001
From: Erin Sheldon <erin.sheldon@gmail.com>
Date: Tue, 20 Aug 2024 14:01:46 -0400
Subject: [PATCH 6/6] docs

---
 esutil/numpy_util.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/esutil/numpy_util.py b/esutil/numpy_util.py
index fcf8702..d19ebc8 100644
--- a/esutil/numpy_util.py
+++ b/esutil/numpy_util.py
@@ -1512,8 +1512,9 @@ def match(arr1input, arr2input, presorted=False):
     Match two arrays, returning the indicies of matches for each array, or
     empty arrays if no matches are found.  This means arr1[ind1] == arr2[ind2]
     is true for all corresponding pairs.  For floating-point data this implies
-    exact matching with no floating-point tolerance.  The data type can be
-    string or bytes.
+    exact matching with no floating-point tolerance.
+
+    The data type can be int, float, string or bytes.
 
     arr1 must contain only unique inputs, but arr2 may be non-unique.