diff --git a/docs/img/powered_by_staircase.svg b/docs/img/powered_by_staircase.svg
index cc793e6..9bd46ee 100644
--- a/docs/img/powered_by_staircase.svg
+++ b/docs/img/powered_by_staircase.svg
@@ -2,9 +2,9 @@
+
+ inkscape:pagecheckerboard="0"
+ showguides="true"
+ inkscape:guide-bbox="true">
+
+
@@ -95,115 +104,119 @@
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
- transform="translate(377.75726,326.24246)">
+ transform="translate(632.60692,319.18132)">
+ width="802.06549"
+ height="142.67062"
+ x="-631.41364"
+ y="-318.31769" />
+ id="g1223"
+ transform="translate(-259.36723,77.589223)">
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id="path7928"
+ d="M 3.7846419,-314.6503 H 133.02875"
+ style="fill:none;stroke:#828282;stroke-width:2.05538;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:2.8;stroke-dasharray:none;stroke-opacity:1"
+ inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png"
+ inkscape:export-xdpi="50"
+ inkscape:export-ydpi="50" />
+ id="path7930"
+ d="M -80.584528,-273.74018 H -4.0089881"
+ style="fill:none;stroke:#828282;stroke-width:1.58243;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+ inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png"
+ inkscape:export-xdpi="50"
+ inkscape:export-ydpi="50" />
+ id="path7932"
+ d="M 2.7262919,-314.7503 H 135.46667"
+ style="fill:#828282;fill-opacity:1;stroke:#828282;stroke-width:0.683;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:2.8;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#marker7938)"
+ inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png"
+ inkscape:export-xdpi="50"
+ inkscape:export-ydpi="50" />
-
-
-
-
-
-
+ inkscape:export-filename="C:\Users\Riley\Pictures\staircase\logo1.png"
+ inkscape:export-xdpi="50"
+ inkscape:export-ydpi="50" />
-
-
-
-
-
powered by
+ id="tspan1373">powered by
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/index.rst b/docs/index.rst
index 239418e..209c838 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -20,7 +20,7 @@ Pandas Interval Set Operations: methods for set operations, analytics, lookups a
.. image:: img/powered_by_staircase.svg
:target: https://www.staircase.dev
- :width: 200
+ :width: 300
:alt: powered_by_staircase
:align: center
diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst
index 0ad0a59..43a3545 100644
--- a/docs/reference/accessors.rst
+++ b/docs/reference/accessors.rst
@@ -19,4 +19,5 @@ Accessors
ArrayAccessor.coverage
ArrayAccessor.complement
ArrayAccessor.contains
- ArrayAccessor.get_indexer
\ No newline at end of file
+ ArrayAccessor.split
+ ArrayAccessor.adjacency_matrix
\ No newline at end of file
diff --git a/docs/reference/package.rst b/docs/reference/package.rst
index 59f084c..b9272d8 100644
--- a/docs/reference/package.rst
+++ b/docs/reference/package.rst
@@ -21,6 +21,7 @@ Top level functions
coverage
complement
contains
- get_indexer
+ split
lookup
- join
\ No newline at end of file
+ join
+ adjacency_matrix
\ No newline at end of file
diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst
index 30150bc..c37c3bf 100644
--- a/docs/release_notes/index.rst
+++ b/docs/release_notes/index.rst
@@ -5,6 +5,20 @@ Release notes
========================
+**v0.7.0 2021-11-20**
+
+Added the following methods
+
+- :func:`piso.split`
+- :func:`piso.adjacency_matrix`
+- :meth:`ArrayAccessor.split() `
+- :meth:`ArrayAccessor.adjacency_matrix() `
+
+Removed the following methods
+
+- removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer`
+
+
**v0.6.0 2021-11-05**
The following methods were extended to accommodate intervals with *closed = "both"* or *"neither"*
@@ -12,7 +26,7 @@ The following methods were extended to accommodate intervals with *closed = "bot
- :func:`piso.contains` (and :meth:`ArrayAccessor.contains() `)
- :func:`piso.get_indexer` (and :meth:`ArrayAccessor.get_indexer() `)
- :func:`piso.lookup`
-- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.get_indexer() `)
+- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.isdisjoint() `)
**v0.5.0 2021-11-02**
@@ -34,7 +48,7 @@ Added the following methods
- :func:`piso.lookup`
- :func:`piso.get_indexer`
-- :meth:`ArrayAccessor.get_indexer() `
+- :meth:`ArrayAccessor.get_indexer`
**v0.3.0 2021-10-23**
diff --git a/piso/__init__.py b/piso/__init__.py
index 87dee33..cefa917 100644
--- a/piso/__init__.py
+++ b/piso/__init__.py
@@ -1,13 +1,14 @@
+from piso.graph import adjacency_matrix
from piso.intervalarray import (
complement,
contains,
coverage,
difference,
- get_indexer,
intersection,
isdisjoint,
issubset,
issuperset,
+ split,
symmetric_difference,
union,
)
diff --git a/piso/accessor.py b/piso/accessor.py
index 4fd5e03..0813c5c 100644
--- a/piso/accessor.py
+++ b/piso/accessor.py
@@ -3,7 +3,7 @@
import pandas as pd
import piso.docstrings.accessor as docstrings
-from piso import intervalarray
+from piso import graph, intervalarray
from piso._decorators import Appender
@@ -155,13 +155,6 @@ def complement(self, domain=None):
domain,
)
- @Appender(docstrings.get_indexer_docstring, join="\n", indents=1)
- def get_indexer(self, x):
- return intervalarray.get_indexer(
- self._interval_array,
- x,
- )
-
@Appender(docstrings.contains_docstring, join="\n", indents=1)
def contains(self, x, include_index=True):
return intervalarray.contains(
@@ -170,6 +163,21 @@ def contains(self, x, include_index=True):
include_index,
)
+ @Appender(docstrings.split_docstring, join="\n", indents=1)
+ def split(self, x):
+ return intervalarray.split(
+ self._interval_array,
+ x,
+ )
+
+ @Appender(docstrings.adjacency_matrix_docstring, join="\n", indents=1)
+ def adjacency_matrix(self, edges="intersect", include_index=True):
+ return graph.adjacency_matrix(
+ self._interval_array,
+ edges=edges,
+ include_index=include_index,
+ )
+
def _register_accessors():
_register_accessor("piso", pd.IntervalIndex)(ArrayAccessor)
diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py
index d349427..e182740 100644
--- a/piso/docstrings/accessor.py
+++ b/piso/docstrings/accessor.py
@@ -1,3 +1,5 @@
+from piso.graph import adjacency_matrix
+
union_examples = """
Examples
-----------
@@ -544,7 +546,7 @@ def join_params(list_of_param_strings):
"""
Indicates whether one, or more, sets are disjoint or not.
-*interval_array* must be left-closed or right-closed if *interval_arrays is non-empty.
+*interval_array* must be left-closed or right-closed if \\*interval_arrays is non-empty.
If no arguments are provided then this restriction does not apply.
"""
+ template_doc
@@ -782,3 +784,111 @@ def join_params(list_of_param_strings):
>>> pd.IntervalIndex.from_tuples([(0,2)]).piso.contains(1, include_index=False)
array([[ True]])
"""
+
+
+split_docstring = """
+Given a set of intervals, and break points, splits the intervals into pieces wherever
+the overlap a break point.
+
+The intervals are contained in the object the accessor belongs to. They may be left-closed,
+right-closed, both, or neither, and contain overlapping intervals.
+
+Parameters
+----------
+x : scalar, or array-like of scalars
+ Values in *x* should belong to the same domain as the intervals in *interval_array*.
+ May contain duplicates and be unsorted.
+
+Returns
+----------
+:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+ Return type will be the same type as the object the accessor belongs to.
+
+Examples
+-----------
+
+>>> import pandas as pd
+>>> import piso
+>>> piso.register_accessors()
+
+>>> arr = pd.arrays.IntervalArray.from_tuples(
+... [(0, 4), (2, 5)],
+... )
+
+>>> arr.piso.split(3)
+
+[(0, 3], (3, 4], (2, 3], (3, 5]]
+Length: 4, closed: right, dtype: interval[int64]
+
+>>> arr.piso.split([3,3,3,3])
+
+[(0, 3], (3, 4], (2, 3], (3, 5]]
+Length: 4, closed: right, dtype: interval[int64]
+
+>>> arr = pd.IntervalIndex.from_tuples(
+... [(0, 4), (2, 5)], closed="neither",
+... )
+
+>>> arr.piso.split([1, 6, 4])
+IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)],
+ closed='neither',
+ dtype='interval[float64]')
+"""
+
+
+adjacency_matrix_docstring = """
+Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph.
+
+The set of nodes correspond to intervals and the edges are defined by the relationship
+defined by the *edges* parameter.
+
+Note that the diagonal is defined with False values by default.
+
+Parameters
+----------
+edges : {"intersect", "disjoint"}, default "intersect"
+ Defines the relationship that edges between nodes represent.
+include_index : bool, default True
+ If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned.
+ If False then a :class:`numpy.ndarray` is returned.
+
+Returns
+-------
+:class:`pandas.DataFrame` or :class:`numpy.ndarray`
+ Boolean valued, symmetrical, with False along diagonal.
+
+Examples
+---------
+
+>>> import pandas as pd
+>>> import piso
+>>> piso.register_accessors()
+
+>>> arr = pd.arrays.IntervalArray.from_tuples(
+... [(0,4), (3,6), (5, 7), (8,9), (9,10)],
+... closed="both",
+... )
+
+>>> arr.piso.adjacency_matrix()
+ [0, 4] [3, 6] [5, 7] [8, 9] [9, 10]
+[0, 4] False True False False False
+[3, 6] True False True False False
+[5, 7] False True False False False
+[8, 9] False False False False True
+[9, 10] False False False True False
+
+>>> arr.piso.adjacency_matrix(arr, include_index=False)
+array([[False, True, False, False, False],
+ [ True, False, True, False, False],
+ [False, True, False, False, False],
+ [False, False, False, False, True],
+ [False, False, False, True, False]])
+
+>>> arr.piso.adjacency_matrix(arr, edges="disjoint")
+ [0, 4] [3, 6] [5, 7] [8, 9] [9, 10]
+[0, 4] False False True True True
+[3, 6] False False False True True
+[5, 7] True False False True True
+[8, 9] True True True False False
+[9, 10] True True True False False
+"""
diff --git a/piso/docstrings/intervalarray.py b/piso/docstrings/intervalarray.py
index c37beba..1b5fdcf 100644
--- a/piso/docstrings/intervalarray.py
+++ b/piso/docstrings/intervalarray.py
@@ -791,3 +791,50 @@ def join_params(list_of_param_strings):
>>> piso.contains(pd.IntervalIndex.from_tuples([(0,2)]), 1, include_index=False)
array([[ True]])
"""
+
+split_docstring = """
+Given a set of intervals, and break points, splits the intervals into pieces wherever
+the overlap a break point.
+
+Parameters
+----------
+interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+ Contains the (possibly overlapping) intervals. May be left-closed, right-closed, both, or neither.
+x : scalar, or array-like of scalars
+ Values in *x* should belong to the same domain as the intervals in *interval_array*.
+ May contain duplicates and be unsorted.
+
+Returns
+----------
+:class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
+ Return type will be the same type as *interval_array*
+
+Examples
+-----------
+
+>>> import pandas as pd
+>>> import piso
+
+>>> arr = pd.arrays.IntervalArray.from_tuples(
+... [(0, 4), (2, 5)],
+... )
+
+>>> piso.split(arr, 3)
+
+[(0, 3], (3, 4], (2, 3], (3, 5]]
+Length: 4, closed: right, dtype: interval[int64]
+
+>>> piso.split(arr, [3,3,3,3])
+
+[(0, 3], (3, 4], (2, 3], (3, 5]]
+Length: 4, closed: right, dtype: interval[int64]
+
+>>> arr = pd.IntervalIndex.from_tuples(
+... [(0, 4), (2, 5)], closed="neither",
+... )
+
+>>> piso.split(arr, [1, 6, 4])
+IntervalIndex([(0.0, 1.0), (1.0, 4.0), (2.0, 4.0), (4.0, 5.0)],
+ closed='neither',
+ dtype='interval[float64]')
+"""
diff --git a/piso/graph.py b/piso/graph.py
new file mode 100644
index 0000000..39144df
--- /dev/null
+++ b/piso/graph.py
@@ -0,0 +1,89 @@
+import numpy as np
+import pandas as pd
+from pandas.core.indexes import interval
+
+
+def adjacency_matrix(interval_array, edges="intersect", include_index=True):
+ """
+ Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph.
+
+ The set of nodes correspond to intervals and the edges are defined by the relationship
+ defined by the *edges* parameter.
+
+ Note that the diagonal is defined with False values by default.
+
+ Parameters
+ ----------
+ interval_array : :class:`pandas.arrays.IntervalArray` or :class:`pandas.IntervalIndex`
+ Contains the intervals.
+ edges : {"intersect", "disjoint"}, default "intersect"
+ Defines the relationship that edges between nodes represent.
+ include_index : bool, default True
+ If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned.
+ If False then a :class:`numpy.ndarray` is returned.
+
+ Returns
+ -------
+ :class:`pandas.DataFrame` or :class:`numpy.ndarray`
+ Boolean valued, symmetrical, with False along diagonal.
+
+ Examples
+ ---------
+
+ >>> import pandas as pd
+ >>> import piso
+
+ >>> arr = pd.arrays.IntervalArray.from_tuples(
+ ... [(0,4), (3,6), (5, 7), (8,9), (9,10)],
+ ... closed="both",
+ ... )
+
+ >>> piso.adjacency_matrix(arr)
+ [0, 4] [3, 6] [5, 7] [8, 9] [9, 10]
+ [0, 4] False True False False False
+ [3, 6] True False True False False
+ [5, 7] False True False False False
+ [8, 9] False False False False True
+ [9, 10] False False False True False
+
+ >>> piso.adjacency_matrix(arr, include_index=False)
+ array([[False, True, False, False, False],
+ [ True, False, True, False, False],
+ [False, True, False, False, False],
+ [False, False, False, False, True],
+ [False, False, False, True, False]])
+
+ >>> piso.adjacency_matrix(arr, edges="disjoint")
+ [0, 4] [3, 6] [5, 7] [8, 9] [9, 10]
+ [0, 4] False False True True True
+ [3, 6] False False False True True
+ [5, 7] True False False True True
+ [8, 9] True True True False False
+ [9, 10] True True True False False
+ """
+ if edges == "intersect":
+ result = _adj_mat_intersection(interval_array)
+ elif edges == "disjoint":
+ result = ~_adj_mat_intersection(interval_array, fill_diagonal=False)
+ else:
+ raise ValueError(f"Invalid value for edges parameter: {edges}")
+
+ if include_index:
+ result = pd.DataFrame(result, index=interval_array, columns=interval_array)
+
+ return result
+
+
+def _adj_mat_intersection(interval_array, fill_diagonal=True):
+ result = np.greater.outer(
+ interval_array.right, interval_array.left
+ ) & np.less.outer(interval_array.left, interval_array.right)
+ if interval_array.closed == "both":
+ result = (
+ result
+ | np.equal.outer(interval_array.right, interval_array.left)
+ | np.equal.outer(interval_array.left, interval_array.right)
+ )
+ if fill_diagonal:
+ np.fill_diagonal(result, False)
+ return result
diff --git a/piso/intervalarray.py b/piso/intervalarray.py
index f5b873d..692360d 100644
--- a/piso/intervalarray.py
+++ b/piso/intervalarray.py
@@ -219,10 +219,21 @@ def contains(interval_array, x, include_index=True):
return result
-@Appender(docstrings.get_indexer_docstring, join="\n", indents=1)
-def get_indexer(interval_array, x):
- if not isdisjoint(interval_array):
- raise ValueError("get_indexer method is only valid for disjoint intervals.")
- ia_length = len(interval_array)
- contain_matrix = contains(interval_array, x, include_index=False)
- return (np.linspace(1, ia_length, ia_length).dot(contain_matrix) - 1).astype(int)
+@Appender(docstrings.split_docstring, join="\n", indents=1)
+def split(interval_array, x):
+ # x = pd.Series(x).values
+ x = pd.Series(sorted(set(x))).values # converting to numpy array will not work
+ contained = contains(interval_array.set_closed("neither"), x, include_index=False)
+ breakpoints = np.concatenate(
+ (
+ np.expand_dims(interval_array.left.values, 1),
+ pd.DataFrame(np.broadcast_to(x, contained.shape)).where(contained).values,
+ np.expand_dims(interval_array.right.values, 1),
+ ),
+ axis=1,
+ )
+ lefts = breakpoints[:, :-1]
+ rights = breakpoints[:, 1:]
+ return interval_array.from_arrays(
+ lefts[~np.isnan(lefts)], rights[~np.isnan(rights)], closed=interval_array.closed
+ )
diff --git a/piso/ndframe.py b/piso/ndframe.py
index 7aa0163..d792363 100644
--- a/piso/ndframe.py
+++ b/piso/ndframe.py
@@ -14,7 +14,7 @@ def lookup(frame_or_series, x):
raise ValueError("DataFrame or Series must be indexed by an IntervalIndex")
if not hasattr(x, "__len__"):
x = np.array(x, ndmin=1)
- indexer = intervalarray.get_indexer(frame_or_series.index, x)
+ indexer = frame_or_series.index.get_indexer(x)
result = frame_or_series.copy().iloc[indexer].set_axis(x)
set_nan = indexer == -1
if set_nan.any():
@@ -52,7 +52,7 @@ def _get_indexers(*dfs):
)
tiling_index = pd.IntervalIndex.from_breaks(sorted(set(breaks)))
lookups = tiling_index.left if closed == "left" else tiling_index.right
- indexers = [intervalarray.get_indexer(df.index, lookups) for df in dfs]
+ indexers = [df.index.get_indexer(lookups) for df in dfs]
return tiling_index, indexers
diff --git a/pyproject.toml b/pyproject.toml
index 00f1e53..27a2100 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "poetry.masonry.api"
[tool.poetry]
name = "piso"
-version = "0.6.0"
+version = "0.7.0"
description = "Pandas Interval Set Operations: methods for set operations, analytics, lookups and joins on pandas' Interval, IntervalArray and IntervalIndex"
readme = "README.md"
authors = ["Riley Clement "]
diff --git a/tests/test_graph.py b/tests/test_graph.py
new file mode 100644
index 0000000..cc7f4a7
--- /dev/null
+++ b/tests/test_graph.py
@@ -0,0 +1,291 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+import piso
+import piso.graph as piso_graph
+from piso import register_accessors
+
+register_accessors()
+
+
+def get_accessor_method(self, function):
+ return {
+ piso_graph.adjacency_matrix: self.piso.adjacency_matrix,
+ }[function]
+
+
+def get_package_method(function):
+ return {
+ piso_graph.adjacency_matrix: piso.adjacency_matrix,
+ }[function]
+
+
+def perform_op(*args, how, function, **kwargs):
+ # how = "supplied, accessor, or package"
+ if how == "accessor":
+ self, *args = args
+ return get_accessor_method(self, function)(*args, **kwargs)
+ elif how == "package":
+ return get_package_method(function)(*args, **kwargs)
+ else:
+ return function(*args, **kwargs)
+
+
+def map_to_dates(obj, date_type):
+ def make_date(x):
+ ts = pd.to_datetime(x, unit="d", origin="2021-09-30")
+ if date_type == "numpy":
+ return ts.to_numpy()
+ if date_type == "datetime":
+ return ts.to_pydatetime()
+ if date_type == "timedelta":
+ return ts - pd.Timestamp("2021-10-1")
+ return ts
+
+ if isinstance(obj, (pd.IntervalIndex, pd.arrays.IntervalArray)):
+ return obj.from_arrays(
+ obj.left.map(make_date),
+ obj.right.map(make_date),
+ obj.closed,
+ )
+ elif isinstance(obj, list):
+ return [make_date(x) for x in obj]
+
+
+@pytest.mark.parametrize(
+ "closed",
+ ["left", "right", "neither"],
+)
+@pytest.mark.parametrize(
+ "interval_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "include_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "date_type",
+ ["timestamp", "numpy", "datetime", "timedelta", None],
+)
+@pytest.mark.parametrize(
+ "how",
+ ["supplied", "accessor", "package"],
+)
+def test_adjacency_matrix_intersects_1(
+ closed, interval_index, include_index, date_type, how
+):
+ interval_array = pd.arrays.IntervalArray.from_tuples(
+ [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)],
+ closed=closed,
+ )
+ if interval_index:
+ interval_array = pd.IntervalIndex(interval_array)
+
+ if date_type:
+ interval_array = map_to_dates(interval_array, date_type)
+
+ expected = np.array(
+ [
+ [False, True, False, False, False],
+ [True, False, True, False, False],
+ [False, True, False, False, False],
+ [False, False, False, False, False],
+ [False, False, False, False, False],
+ ]
+ )
+
+ result = perform_op(
+ interval_array,
+ how=how,
+ function=piso_graph.adjacency_matrix,
+ edges="intersect",
+ include_index=include_index,
+ )
+ if include_index:
+ expected = pd.DataFrame(expected, columns=interval_array, index=interval_array)
+ pd.testing.assert_frame_equal(result, expected)
+ else:
+ assert np.array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+ "interval_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "include_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "date_type",
+ ["timestamp", "numpy", "datetime", "timedelta", None],
+)
+@pytest.mark.parametrize(
+ "how",
+ ["supplied", "accessor", "package"],
+)
+def test_adjacency_matrix_intersects_2(interval_index, include_index, date_type, how):
+ interval_array = pd.arrays.IntervalArray.from_tuples(
+ [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)],
+ closed="both",
+ )
+ if interval_index:
+ interval_array = pd.IntervalIndex(interval_array)
+
+ if date_type:
+ interval_array = map_to_dates(interval_array, date_type)
+
+ expected = np.array(
+ [
+ [False, True, False, False, False],
+ [True, False, True, False, False],
+ [False, True, False, False, False],
+ [False, False, False, False, True],
+ [False, False, False, True, False],
+ ]
+ )
+
+ result = perform_op(
+ interval_array,
+ how=how,
+ function=piso_graph.adjacency_matrix,
+ edges="intersect",
+ include_index=include_index,
+ )
+ if include_index:
+ expected = pd.DataFrame(expected, columns=interval_array, index=interval_array)
+ pd.testing.assert_frame_equal(result, expected)
+ else:
+ assert np.array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+ "closed",
+ ["left", "right", "neither"],
+)
+@pytest.mark.parametrize(
+ "interval_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "include_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "date_type",
+ ["timestamp", "numpy", "datetime", "timedelta", None],
+)
+@pytest.mark.parametrize(
+ "how",
+ ["supplied", "accessor", "package"],
+)
+def test_adjacency_matrix_disjoint_1(
+ closed, interval_index, include_index, date_type, how
+):
+ interval_array = pd.arrays.IntervalArray.from_tuples(
+ [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)],
+ closed=closed,
+ )
+ if interval_index:
+ interval_array = pd.IntervalIndex(interval_array)
+
+ if date_type:
+ interval_array = map_to_dates(interval_array, date_type)
+
+ expected = np.array(
+ [
+ [False, False, True, True, True],
+ [False, False, False, True, True],
+ [True, False, False, True, True],
+ [True, True, True, False, True],
+ [True, True, True, True, False],
+ ]
+ )
+
+ result = perform_op(
+ interval_array,
+ how=how,
+ function=piso_graph.adjacency_matrix,
+ edges="disjoint",
+ include_index=include_index,
+ )
+ if include_index:
+ expected = pd.DataFrame(expected, columns=interval_array, index=interval_array)
+ pd.testing.assert_frame_equal(result, expected)
+ else:
+ assert np.array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+ "interval_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "include_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "date_type",
+ ["timestamp", "numpy", "datetime", "timedelta", None],
+)
+@pytest.mark.parametrize(
+ "how",
+ ["supplied", "accessor", "package"],
+)
+def test_adjacency_matrix_disjoint_2(interval_index, include_index, date_type, how):
+ interval_array = pd.arrays.IntervalArray.from_tuples(
+ [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)],
+ closed="both",
+ )
+ if interval_index:
+ interval_array = pd.IntervalIndex(interval_array)
+
+ if date_type:
+ interval_array = map_to_dates(interval_array, date_type)
+
+ expected = np.array(
+ [
+ [False, False, True, True, True],
+ [False, False, False, True, True],
+ [True, False, False, True, True],
+ [True, True, True, False, False],
+ [True, True, True, False, False],
+ ]
+ )
+
+ result = perform_op(
+ interval_array,
+ how=how,
+ function=piso_graph.adjacency_matrix,
+ edges="disjoint",
+ include_index=include_index,
+ )
+ if include_index:
+ expected = pd.DataFrame(expected, columns=interval_array, index=interval_array)
+ pd.testing.assert_frame_equal(result, expected)
+ else:
+ assert np.array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+ "closed",
+ ["left", "right", "both", "neither"],
+)
+@pytest.mark.parametrize(
+ "how",
+ ["supplied", "accessor", "package"],
+)
+def test_adjacency_matrix_edges_exception(closed, how):
+ interval_array = pd.arrays.IntervalArray.from_tuples(
+ [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)],
+ closed=closed,
+ )
+ with pytest.raises(ValueError):
+ perform_op(
+ interval_array,
+ how=how,
+ function=piso_graph.adjacency_matrix,
+ edges="not_an_option",
+ )
diff --git a/tests/test_single_interval_array.py b/tests/test_single_interval_array.py
index ccbbf3a..7cd41e4 100644
--- a/tests/test_single_interval_array.py
+++ b/tests/test_single_interval_array.py
@@ -19,8 +19,8 @@ def get_accessor_method(self, function):
piso_intervalarray.issubset: self.piso.issubset,
piso_intervalarray.coverage: self.piso.coverage,
piso_intervalarray.complement: self.piso.complement,
- piso_intervalarray.get_indexer: self.piso.get_indexer,
piso_intervalarray.contains: self.piso.contains,
+ piso_intervalarray.split: self.piso.split,
}[function]
@@ -34,8 +34,8 @@ def get_package_method(function):
piso_intervalarray.issubset: piso.issubset,
piso_intervalarray.coverage: piso.coverage,
piso_intervalarray.complement: piso.complement,
- piso_intervalarray.get_indexer: piso.get_indexer,
piso_intervalarray.contains: piso.contains,
+ piso_intervalarray.split: piso.split,
}[function]
@@ -80,6 +80,16 @@ def make_ia3(interval_index, closed):
return ia3
+def make_ia4(interval_index, closed):
+ ia4 = pd.arrays.IntervalArray.from_tuples(
+ [(1, 4), (2, 5), (3, 6)],
+ closed=closed,
+ )
+ if interval_index:
+ ia4 = pd.IntervalIndex(ia4)
+ return ia4
+
+
def make_ia_from_tuples(interval_index, tuples, closed):
klass = pd.IntervalIndex if interval_index else pd.arrays.IntervalArray
return klass.from_tuples(tuples, closed=closed)
@@ -440,9 +450,9 @@ def test_symmetric_difference_min_overlaps_all_2(
)
-def map_to_dates(interval_array, date_type):
+def map_to_dates(obj, date_type):
def make_date(x):
- ts = pd.Timestamp(f"2021-10-{x}")
+ ts = pd.to_datetime(x, unit="d", origin="2021-09-30")
if date_type == "numpy":
return ts.to_numpy()
if date_type == "datetime":
@@ -451,11 +461,14 @@ def make_date(x):
return ts - pd.Timestamp("2021-10-1")
return ts
- return interval_array.from_arrays(
- interval_array.left.map(make_date),
- interval_array.right.map(make_date),
- interval_array.closed,
- )
+ if isinstance(obj, (pd.IntervalIndex, pd.arrays.IntervalArray)):
+ return obj.from_arrays(
+ obj.left.map(make_date),
+ obj.right.map(make_date),
+ obj.closed,
+ )
+ elif isinstance(obj, list):
+ return [make_date(x) for x in obj]
@pytest.mark.parametrize(
@@ -660,60 +673,6 @@ def test_complement(interval_index, domain, expected_tuples, closed, how):
)
-@pytest.mark.parametrize(
- "interval_index",
- [True, False],
-)
-@pytest.mark.parametrize(
- "x, closed, expected",
- [
- (3, "left", 0),
- (4, "left", -1),
- (3, "right", -1),
- (4, "right", 0),
- (3, "both", 0),
- (4, "both", 0),
- (3, "neither", -1),
- (4, "neither", -1),
- ([3, 9, 12], "left", np.array([0, 1, -1])),
- ([3, 9, 12], "right", np.array([-1, 1, -1])),
- ([3, 9, 12], "both", np.array([0, 1, -1])),
- ([3, 9, 12], "neither", np.array([-1, 1, -1])),
- ],
-)
-@pytest.mark.parametrize(
- "how",
- ["supplied", "accessor", "package"],
-)
-def test_get_indexer(interval_index, x, closed, expected, how):
- ia = make_ia3(interval_index, closed)
- result = perform_op(
- ia,
- x,
- how=how,
- function=piso_intervalarray.get_indexer,
- )
- if hasattr(expected, "__len__"):
- assert all(result == expected)
- else:
- assert result == expected
-
-
-@pytest.mark.parametrize(
- "how",
- ["supplied", "accessor", "package"],
-)
-def test_get_indexer_exception(how):
- ia = make_ia1(True, "left")
- with pytest.raises(ValueError):
- perform_op(
- ia,
- 1,
- how=how,
- function=piso_intervalarray.get_indexer,
- )
-
-
@pytest.mark.parametrize(
"interval_index",
[True, False],
@@ -774,3 +733,52 @@ def test_contains(interval_index, x, closed, expected, how, include_index):
else:
expected_result = np.array(expected)
assert (result == expected_result).all()
+
+
+@pytest.mark.parametrize(
+ "interval_index",
+ [True, False],
+)
+@pytest.mark.parametrize(
+ "x, expected_tuples",
+ [
+ ([4], [(1, 4), (2, 4), (4, 5), (3, 4), (4, 6)]),
+ ([3.5], [(1, 3.5), (3.5, 4), (2, 3.5), (3.5, 5), (3, 3.5), (3.5, 6)]),
+ ([3, 4], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]),
+ ([0, 3, 4, 7], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]),
+ ([0], [(1, 4), (2, 5), (3, 6)]),
+ ([4, 4], [(1, 4), (2, 4), (4, 5), (3, 4), (4, 6)]),
+ ([4, 3], [(1, 3), (3, 4), (2, 3), (3, 4), (4, 5), (3, 4), (4, 6)]),
+ ],
+)
+@pytest.mark.parametrize(
+ "closed",
+ ["left", "right", "both", "neither"],
+)
+@pytest.mark.parametrize(
+ "how",
+ ["supplied", "accessor", "package"],
+)
+@pytest.mark.parametrize(
+ "date_type",
+ ["timestamp", "numpy", "datetime", "timedelta", None],
+)
+def test_split(interval_index, x, expected_tuples, closed, how, date_type):
+ ia = make_ia4(interval_index, closed)
+ ia = map_to_dates(ia, date_type)
+
+ expected = make_ia_from_tuples(False, expected_tuples, closed)
+ expected = map_to_dates(expected, date_type)
+ x = map_to_dates(x, date_type)
+
+ result = perform_op(
+ ia,
+ x,
+ how=how,
+ function=piso_intervalarray.split,
+ )
+ assert_interval_array_equal(
+ result,
+ expected,
+ interval_index,
+ )