Merge branch 'master' into release

staircase-dev · May 4, 2022 · f89d0b5 · f89d0b5
2 parents d8b9b44 + ff8c88f
commit f89d0b5
Show file tree

Hide file tree

Showing 13 changed files with 515 additions and 33 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,16 +1,16 @@
 repos:
   - repo: https://github.com/pycqa/isort
-    rev: 5.8.0
+    rev: 5.10.1
     hooks:
       - id: isort
         args: ["--profile", "black", "--filter-files"]
         name: isort
   - repo: https://github.com/psf/black
-    rev: 21.6b0
+    rev: 22.3.0
     hooks:
       - id: black
         exclude: ^piso/test_data*
   - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.2
+    rev: 4.0.1
     hooks:
     -   id: flake8
diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst
@@ -20,4 +20,5 @@ Accessors
    ArrayAccessor.complement
    ArrayAccessor.contains
    ArrayAccessor.split
+   ArrayAccessor.bridge
    ArrayAccessor.adjacency_matrix
diff --git a/docs/reference/package.rst b/docs/reference/package.rst
@@ -22,6 +22,7 @@ Top level functions
    complement
    contains
    split
+   bridge
    lookup
    join
    adjacency_matrix
diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst
@@ -5,6 +5,16 @@ Release notes
 ========================
 
 
+**v0.9.0 2022-05-04**
+
+- Extended :func:`piso.adjacency_matrix` and :meth:`ArrayAccessor.adjacency_matrix() <piso.accessor.ArrayAccessor.adjacency_matrix>` to take multiple interval array parameters
+
+Added the following methods
+
+- :func:`piso.bridge`
+- :meth:`ArrayAccessor.bridge() <piso.accessor.ArrayAccessor.bridge>`
+
+
 **v0.8.0 2022-01-29**
 
 - Added `bins` parameter to :func:`piso.coverage` and :meth:`ArrayAccessor.coverage() <piso.accessor.ArrayAccessor.coverage>`

diff --git a/piso/__init__.py b/piso/__init__.py
@@ -1,5 +1,6 @@
 from piso.graph import adjacency_matrix
 from piso.intervalarray import (
+    bridge,
     complement,
     contains,
     coverage,

diff --git a/piso/accessor.py b/piso/accessor.py
@@ -175,13 +175,21 @@ def split(self, x):
         )
 
     @Appender(docstrings.adjacency_matrix_docstring, join="\n", indents=1)
-    def adjacency_matrix(self, edges="intersect", include_index=True):
+    def adjacency_matrix(self, *interval_arrays, edges="intersect", include_index=True):
         return graph.adjacency_matrix(
             self._interval_array,
+            *interval_arrays,
             edges=edges,
             include_index=include_index,
         )
 
+    @Appender(docstrings.bridge_docstring, join="\n", indents=1)
+    def bridge(self, threshold):
+        return intervalarray.bridge(
+            self._interval_array,
+            threshold,
+        )
+
 
 def _register_accessors():
     _register_accessor("piso", pd.IntervalIndex)(ArrayAccessor)

diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py
@@ -948,3 +948,50 @@ def join_params(list_of_param_strings):
 [8, 9]     True    True    True   False    False
 [9, 10]    True    True    True   False    False
 """
+
+
+bridge_docstring = """
+Given a set of intervals, and a threshold, merges intervals which are separated by a gap less than
+or equal to the threshold.  Overlapping intervals will be merged, regardless of threshold value.
+
+The intervals are contained in the object the accessor belongs to.  They may be left-closed,
+right-closed, both, or neither, and contain overlapping intervals.
+
+Parameters
+----------
+threshold : scalar
+    The value should belong to the domain that arises from a subtraction over the domain of the intervals.
+    For instance, if intervals are timestamp data, then *threshold* should be timedelta.
+
+Returns
+----------
+:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+    Return type will be the same type as the object the accessor belongs to.
+
+
+Examples
+-----------
+
+>>> import pandas as pd
+>>> import piso
+>>> piso.register_accessors()
+
+>>> arr = pd.arrays.IntervalArray.from_tuples(
+...     [(0, 4), (3, 5), (7, 8), (11, 12)],
+... )
+
+>>> arr.piso.bridge(threshold=1)
+<IntervalArray>
+[(0.0, 5.0], (7.0, 8.0], (11.0, 12.0]]
+Length: 3, closed: right, dtype: interval[float64]
+
+>>> arr.piso.bridge(threshold=2)
+<IntervalArray>
+[(0.0, 8.0], (11.0, 12.0]]
+Length: 2, closed: right, dtype: interval[float64]
+
+>>> arr.piso.bridge(threshold=3)
+<IntervalArray>
+[(0.0, 12.0]]
+Length: 1, closed: right, dtype: interval[float64]
+"""
diff --git a/piso/docstrings/intervalarray.py b/piso/docstrings/intervalarray.py
@@ -893,3 +893,51 @@ def join_params(list_of_param_strings):
               closed='neither',
               dtype='interval[float64]')
 """
+
+
+bridge_docstring = """
+Given a set of intervals, and a threshold, merges intervals which are separated by a gap less than
+or equal to the threshold.  Overlapping intervals will be merged, regardless of threshold value.
+
+The intervals are contained in the object the accessor belongs to.  They may be left-closed,
+right-closed, both, or neither, and contain overlapping intervals.
+
+Parameters
+----------
+interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+    Contains the (possibly overlapping) intervals.  May be left-closed or right-closed.
+threshold : scalar
+    The value should belong to the domain that arises from a subtraction over the domain of the intervals.
+    For instance, if intervals are timestamp data, then *threshold* should be timedelta.
+
+Returns
+----------
+:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+    Return type will be the same type as the object the accessor belongs to.
+
+
+Examples
+-----------
+
+>>> import pandas as pd
+>>> import piso
+
+>>> arr = pd.arrays.IntervalArray.from_tuples(
+...     [(0, 4), (3, 5), (7, 8), (11, 12)],
+... )
+
+>>> piso.bridge(arr, threshold=1)
+<IntervalArray>
+[(0.0, 5.0], (7.0, 8.0], (11.0, 12.0]]
+Length: 3, closed: right, dtype: interval[float64]
+
+>>> piso.bridge(arr, threshold=2)
+<IntervalArray>
+[(0.0, 8.0], (11.0, 12.0]]
+Length: 2, closed: right, dtype: interval[float64]
+
+>>> piso.bridge(arr, threshold=3)
+<IntervalArray>
+[(0.0, 12.0]]
+Length: 1, closed: right, dtype: interval[float64]
+"""
diff --git a/piso/graph.py b/piso/graph.py
@@ -1,21 +1,100 @@
+import itertools
+
 import numpy as np
 import pandas as pd
-from pandas.core.indexes import interval
 
+from piso.intervalarray import _validate_array_of_intervals_arrays
+
+
+def _adj_mat_intersection(lefts, rights, closed, fill_diagonal=True):
+    result = np.greater.outer(rights, lefts) & np.less.outer(lefts, rights)
+    if closed == "both":
+        result = result | np.equal.outer(rights, lefts) | np.equal.outer(lefts, rights)
+    if fill_diagonal:
+        np.fill_diagonal(result, False)
+    return result
+
+
+def _adjacency_matrix_set_of_intervals(interval_array, edges, include_index):
+    if edges == "intersect":
+        result = _adj_mat_intersection(
+            interval_array.left, interval_array.right, interval_array.closed
+        )
+    elif edges == "disjoint":
+        result = ~_adj_mat_intersection(
+            interval_array.left,
+            interval_array.right,
+            interval_array.closed,
+            fill_diagonal=False,
+        )
+    else:
+        raise ValueError(f"Invalid value for edges parameter: {edges}")
+
+    if include_index:
+        result = pd.DataFrame(result, index=interval_array, columns=interval_array)
+
+    return result
+
+
+def _adjacency_matrix_set_of_sets(*interval_arrays, edges, include_index):
+    _validate_array_of_intervals_arrays(*interval_arrays, validate_intervals=False)
+    lefts = list(itertools.chain.from_iterable([ia.left for ia in interval_arrays]))
+    rights = list(itertools.chain.from_iterable([ia.right for ia in interval_arrays]))
+    closed = interval_arrays[0].closed
+
+    if edges == "intersect":
+        component_result = _adj_mat_intersection(lefts, rights, closed)
+        numpy_logical = np.logical_or
+    elif edges == "disjoint":
+        component_result = ~_adj_mat_intersection(
+            lefts, rights, closed, fill_diagonal=False
+        )
+        numpy_logical = np.logical_and
+    else:
+        raise ValueError(f"Invalid value for edges parameter: {edges}")
+
+    index = np.cumsum([0] + [len(ia) for ia in interval_arrays[:-1]])
+    result = numpy_logical.reduceat(
+        numpy_logical.reduceat(component_result, index, axis=0), index, axis=1
+    )
+    np.fill_diagonal(result, False)
+
+    if include_index:
+        result = pd.DataFrame(
+            result,
+            index=range(len(interval_arrays)),
+            columns=range(len(interval_arrays)),
+        )
+
+    return result
 
-def adjacency_matrix(interval_array, edges="intersect", include_index=True):
+
+def adjacency_matrix(
+    interval_array, *interval_arrays, edges="intersect", include_index=True
+):
     """
     Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph.
 
-    The set of nodes correspond to intervals and the edges are defined by the relationship
+    The nodes correspond to sets and the edges are defined by the relationship
     defined by the *edges* parameter.
 
+    What is considered a set is determined by the number of positional arguments used, that is, determined by the
+    size of *interval_arrays*.
+
+    If *interval_arrays* is empty then the sets are considered to be the intervals contained in *interval_array*.
+
+    If *interval_arrays* is not empty then the sets are considered to be *interval_array* and the elements in *interval_arrays*.
+    Each of these arrays is assumed to contain disjoint intervals (and satisfy the definition of a set).  Any array containing
+    overlaps between intervals will be mapped to one with disjoint intervals via a union operation.
+
     Note that the diagonal is defined with False values by default.
 
     Parameters
     ----------
     interval_array : :class:`pandas.arrays.IntervalArray` or :class:`pandas.IntervalIndex`
-        Contains the intervals.
+        The first (and possibly only) operand.
+    *interval_arrays : argument list of :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+        Must contain at least one argument.
     edges : {"intersect", "disjoint"}, default "intersect"
         Defines the relationship that edges between nodes represent.
     include_index : bool, default True
@@ -60,30 +139,29 @@ def adjacency_matrix(interval_array, edges="intersect", include_index=True):
     [5, 7]     True   False   False    True     True
     [8, 9]     True    True    True   False    False
     [9, 10]    True    True    True   False    False
-    """
-    if edges == "intersect":
-        result = _adj_mat_intersection(interval_array)
-    elif edges == "disjoint":
-        result = ~_adj_mat_intersection(interval_array, fill_diagonal=False)
-    else:
-        raise ValueError(f"Invalid value for edges parameter: {edges}")
-
-    if include_index:
-        result = pd.DataFrame(result, index=interval_array, columns=interval_array)
-
-    return result
 
+    >>> ii1 = pd.IntervalIndex.from_tuples([(0,3), (2,8), (11,15)], closed="left")
+    >>> ii2 = pd.IntervalIndex.from_tuples([(3,5), (7,12), (16,20)], closed="left")
+    >>> ii3 = pd.IntervalIndex.from_tuples([(9,11), (25,26)], closed="left")
+    >>> ii4 = pd.IntervalIndex.from_tuples([(23,24)], closed="left")
+
+    >>> piso.adjacency_matrix(ii1,ii2,ii3,ii4)
+                0	    1   	2   	3
+    0	False	True	False	False
+    1	True	False	True	False
+    2	False	True	False	False
+    3	False	False	False	False
+
+    >>> piso.adjacency_matrix(ii1,ii2,ii3,ii4, edges="disjoint", include_index=False)
+    array([[False, False,  True,  True],
+           [False, False, False,  True],
+           [ True, False, False,  True],
+           [ True,  True,  True, False]])
+    """
 
-def _adj_mat_intersection(interval_array, fill_diagonal=True):
-    result = np.greater.outer(
-        interval_array.right, interval_array.left
-    ) & np.less.outer(interval_array.left, interval_array.right)
-    if interval_array.closed == "both":
-        result = (
-            result
-            | np.equal.outer(interval_array.right, interval_array.left)
-            | np.equal.outer(interval_array.left, interval_array.right)
+    if len(interval_arrays) == 0:
+        return _adjacency_matrix_set_of_intervals(interval_array, edges, include_index)
+    else:
+        return _adjacency_matrix_set_of_sets(
+            interval_array, *interval_arrays, edges=edges, include_index=include_index
         )
-    if fill_diagonal:
-        np.fill_diagonal(result, False)
-    return result
diff --git a/piso/intervalarray.py b/piso/intervalarray.py
@@ -262,3 +262,13 @@ def split(interval_array, x):
     return interval_array.from_arrays(
         lefts[~np.isnan(lefts)], rights[~np.isnan(rights)], closed=interval_array.closed
     )
+
+
+@Appender(docstrings.bridge_docstring, join="\n", indents=1)
+def bridge(interval_array, threshold):
+    # interval_array validation will occur in union and complement methods
+    complement_ = complement(union(interval_array))
+    return complement(
+        complement_[complement_.length > threshold],
+        (interval_array.left.min(), interval_array.right.max()),
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "piso"
-version = "0.8.0"
+version = "0.9.0"
 description = "Pandas Interval Set Operations: methods for set operations, analytics, lookups and joins on pandas' Interval, IntervalArray and IntervalIndex"
 readme = "README.md"
 authors = ["Riley Clement <venaturum@gmail.com>"]