From f3acc049028b28fe8ac98e9af573d2b3d814c0ec Mon Sep 17 00:00:00 2001 From: Venaturum Date: Sat, 20 Nov 2021 12:22:07 +1100 Subject: [PATCH] adjacency_matrix methods added (GH25) (#43) * adjacency_matrix methods added (GH25) * extra tests closes #25 --- docs/reference/accessors.rst | 3 +- docs/reference/package.rst | 2 +- docs/release_notes/index.rst | 2 + piso/__init__.py | 1 + piso/accessor.py | 10 +- piso/docstrings/accessor.py | 60 ++++++++ piso/graph.py | 89 +++++++++++ tests/test_graph.py | 291 +++++++++++++++++++++++++++++++++++ 8 files changed, 455 insertions(+), 3 deletions(-) create mode 100644 piso/graph.py create mode 100644 tests/test_graph.py diff --git a/docs/reference/accessors.rst b/docs/reference/accessors.rst index 20272e7..43a3545 100644 --- a/docs/reference/accessors.rst +++ b/docs/reference/accessors.rst @@ -19,4 +19,5 @@ Accessors ArrayAccessor.coverage ArrayAccessor.complement ArrayAccessor.contains - ArrayAccessor.split \ No newline at end of file + ArrayAccessor.split + ArrayAccessor.adjacency_matrix \ No newline at end of file diff --git a/docs/reference/package.rst b/docs/reference/package.rst index 191170a..b9272d8 100644 --- a/docs/reference/package.rst +++ b/docs/reference/package.rst @@ -24,4 +24,4 @@ Top level functions split lookup join - \ No newline at end of file + adjacency_matrix \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 8b3b78c..e8f0550 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -7,7 +7,9 @@ Release notes Added the following methods - :func:`piso.split` +- :func:`piso.adjacency_matrix` - :meth:`ArrayAccessor.split() ` +- :meth:`ArrayAccessor.adjacency_matrix() ` - removed :func:`piso.get_indexer` in favour of :meth:`pandas.IntervalIndex.get_indexer` diff --git a/piso/__init__.py b/piso/__init__.py index a1449d8..cefa917 100644 --- a/piso/__init__.py +++ b/piso/__init__.py @@ -1,3 +1,4 @@ +from piso.graph import adjacency_matrix from piso.intervalarray import ( complement, contains, diff --git a/piso/accessor.py b/piso/accessor.py index 172b6fc..0813c5c 100644 --- a/piso/accessor.py +++ b/piso/accessor.py @@ -3,7 +3,7 @@ import pandas as pd import piso.docstrings.accessor as docstrings -from piso import intervalarray +from piso import graph, intervalarray from piso._decorators import Appender @@ -170,6 +170,14 @@ def split(self, x): x, ) + @Appender(docstrings.adjacency_matrix_docstring, join="\n", indents=1) + def adjacency_matrix(self, edges="intersect", include_index=True): + return graph.adjacency_matrix( + self._interval_array, + edges=edges, + include_index=include_index, + ) + def _register_accessors(): _register_accessor("piso", pd.IntervalIndex)(ArrayAccessor) diff --git a/piso/docstrings/accessor.py b/piso/docstrings/accessor.py index 7efa377..e182740 100644 --- a/piso/docstrings/accessor.py +++ b/piso/docstrings/accessor.py @@ -1,3 +1,5 @@ +from piso.graph import adjacency_matrix + union_examples = """ Examples ----------- @@ -832,3 +834,61 @@ def join_params(list_of_param_strings): closed='neither', dtype='interval[float64]') """ + + +adjacency_matrix_docstring = """ +Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph. + +The set of nodes correspond to intervals and the edges are defined by the relationship +defined by the *edges* parameter. + +Note that the diagonal is defined with False values by default. + +Parameters +---------- +edges : {"intersect", "disjoint"}, default "intersect" + Defines the relationship that edges between nodes represent. +include_index : bool, default True + If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned. + If False then a :class:`numpy.ndarray` is returned. + +Returns +------- +:class:`pandas.DataFrame` or :class:`numpy.ndarray` + Boolean valued, symmetrical, with False along diagonal. + +Examples +--------- + +>>> import pandas as pd +>>> import piso +>>> piso.register_accessors() + +>>> arr = pd.arrays.IntervalArray.from_tuples( +... [(0,4), (3,6), (5, 7), (8,9), (9,10)], +... closed="both", +... ) + +>>> arr.piso.adjacency_matrix() + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] +[0, 4] False True False False False +[3, 6] True False True False False +[5, 7] False True False False False +[8, 9] False False False False True +[9, 10] False False False True False + +>>> arr.piso.adjacency_matrix(arr, include_index=False) +array([[False, True, False, False, False], + [ True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False]]) + +>>> arr.piso.adjacency_matrix(arr, edges="disjoint") + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] +[0, 4] False False True True True +[3, 6] False False False True True +[5, 7] True False False True True +[8, 9] True True True False False +[9, 10] True True True False False +""" diff --git a/piso/graph.py b/piso/graph.py new file mode 100644 index 0000000..39144df --- /dev/null +++ b/piso/graph.py @@ -0,0 +1,89 @@ +import numpy as np +import pandas as pd +from pandas.core.indexes import interval + + +def adjacency_matrix(interval_array, edges="intersect", include_index=True): + """ + Returns a 2D array (or dataframe) of boolean values indicating edges between nodes in a graph. + + The set of nodes correspond to intervals and the edges are defined by the relationship + defined by the *edges* parameter. + + Note that the diagonal is defined with False values by default. + + Parameters + ---------- + interval_array : :class:`pandas.arrays.IntervalArray` or :class:`pandas.IntervalIndex` + Contains the intervals. + edges : {"intersect", "disjoint"}, default "intersect" + Defines the relationship that edges between nodes represent. + include_index : bool, default True + If True then a :class:`pandas.DataFrame`, indexed by the intervals, is returned. + If False then a :class:`numpy.ndarray` is returned. + + Returns + ------- + :class:`pandas.DataFrame` or :class:`numpy.ndarray` + Boolean valued, symmetrical, with False along diagonal. + + Examples + --------- + + >>> import pandas as pd + >>> import piso + + >>> arr = pd.arrays.IntervalArray.from_tuples( + ... [(0,4), (3,6), (5, 7), (8,9), (9,10)], + ... closed="both", + ... ) + + >>> piso.adjacency_matrix(arr) + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] + [0, 4] False True False False False + [3, 6] True False True False False + [5, 7] False True False False False + [8, 9] False False False False True + [9, 10] False False False True False + + >>> piso.adjacency_matrix(arr, include_index=False) + array([[False, True, False, False, False], + [ True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False]]) + + >>> piso.adjacency_matrix(arr, edges="disjoint") + [0, 4] [3, 6] [5, 7] [8, 9] [9, 10] + [0, 4] False False True True True + [3, 6] False False False True True + [5, 7] True False False True True + [8, 9] True True True False False + [9, 10] True True True False False + """ + if edges == "intersect": + result = _adj_mat_intersection(interval_array) + elif edges == "disjoint": + result = ~_adj_mat_intersection(interval_array, fill_diagonal=False) + else: + raise ValueError(f"Invalid value for edges parameter: {edges}") + + if include_index: + result = pd.DataFrame(result, index=interval_array, columns=interval_array) + + return result + + +def _adj_mat_intersection(interval_array, fill_diagonal=True): + result = np.greater.outer( + interval_array.right, interval_array.left + ) & np.less.outer(interval_array.left, interval_array.right) + if interval_array.closed == "both": + result = ( + result + | np.equal.outer(interval_array.right, interval_array.left) + | np.equal.outer(interval_array.left, interval_array.right) + ) + if fill_diagonal: + np.fill_diagonal(result, False) + return result diff --git a/tests/test_graph.py b/tests/test_graph.py new file mode 100644 index 0000000..cc7f4a7 --- /dev/null +++ b/tests/test_graph.py @@ -0,0 +1,291 @@ +import numpy as np +import pandas as pd +import pytest + +import piso +import piso.graph as piso_graph +from piso import register_accessors + +register_accessors() + + +def get_accessor_method(self, function): + return { + piso_graph.adjacency_matrix: self.piso.adjacency_matrix, + }[function] + + +def get_package_method(function): + return { + piso_graph.adjacency_matrix: piso.adjacency_matrix, + }[function] + + +def perform_op(*args, how, function, **kwargs): + # how = "supplied, accessor, or package" + if how == "accessor": + self, *args = args + return get_accessor_method(self, function)(*args, **kwargs) + elif how == "package": + return get_package_method(function)(*args, **kwargs) + else: + return function(*args, **kwargs) + + +def map_to_dates(obj, date_type): + def make_date(x): + ts = pd.to_datetime(x, unit="d", origin="2021-09-30") + if date_type == "numpy": + return ts.to_numpy() + if date_type == "datetime": + return ts.to_pydatetime() + if date_type == "timedelta": + return ts - pd.Timestamp("2021-10-1") + return ts + + if isinstance(obj, (pd.IntervalIndex, pd.arrays.IntervalArray)): + return obj.from_arrays( + obj.left.map(make_date), + obj.right.map(make_date), + obj.closed, + ) + elif isinstance(obj, list): + return [make_date(x) for x in obj] + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "neither"], +) +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_intersects_1( + closed, interval_index, include_index, date_type, how +): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, True, False, False, False], + [True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, False], + [False, False, False, False, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="intersect", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_intersects_2(interval_index, include_index, date_type, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed="both", + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, True, False, False, False], + [True, False, True, False, False], + [False, True, False, False, False], + [False, False, False, False, True], + [False, False, False, True, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="intersect", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "neither"], +) +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_disjoint_1( + closed, interval_index, include_index, date_type, how +): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, False, True, True, True], + [False, False, False, True, True], + [True, False, False, True, True], + [True, True, True, False, True], + [True, True, True, True, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="disjoint", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "interval_index", + [True, False], +) +@pytest.mark.parametrize( + "include_index", + [True, False], +) +@pytest.mark.parametrize( + "date_type", + ["timestamp", "numpy", "datetime", "timedelta", None], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_disjoint_2(interval_index, include_index, date_type, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed="both", + ) + if interval_index: + interval_array = pd.IntervalIndex(interval_array) + + if date_type: + interval_array = map_to_dates(interval_array, date_type) + + expected = np.array( + [ + [False, False, True, True, True], + [False, False, False, True, True], + [True, False, False, True, True], + [True, True, True, False, False], + [True, True, True, False, False], + ] + ) + + result = perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="disjoint", + include_index=include_index, + ) + if include_index: + expected = pd.DataFrame(expected, columns=interval_array, index=interval_array) + pd.testing.assert_frame_equal(result, expected) + else: + assert np.array_equal(result, expected) + + +@pytest.mark.parametrize( + "closed", + ["left", "right", "both", "neither"], +) +@pytest.mark.parametrize( + "how", + ["supplied", "accessor", "package"], +) +def test_adjacency_matrix_edges_exception(closed, how): + interval_array = pd.arrays.IntervalArray.from_tuples( + [(0, 4), (3, 6), (5, 7), (8, 9), (9, 10)], + closed=closed, + ) + with pytest.raises(ValueError): + perform_op( + interval_array, + how=how, + function=piso_graph.adjacency_matrix, + edges="not_an_option", + )