From 6bbc7e9de1f073e7162d7d7c2987ea9ff4839ecc Mon Sep 17 00:00:00 2001 From: Stephanie Brink Date: Sat, 20 Dec 2025 22:15:06 -0800 Subject: [PATCH 1/3] expose node ordering to caliper readers Default to true. Order nodes using caliper's node ordering, which records the order in which functions are called (rather than by alphabetical frame ordering). --- hatchet/graphframe.py | 19 ++++++++++++++----- hatchet/readers/caliper_native_reader.py | 8 ++++++-- hatchet/readers/caliper_reader.py | 10 +++++++--- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/hatchet/graphframe.py b/hatchet/graphframe.py index 70727d6c..7c9c136b 100644 --- a/hatchet/graphframe.py +++ b/hatchet/graphframe.py @@ -182,7 +182,7 @@ def from_hpctoolkit_latest( ).read() @staticmethod - def from_caliper(filename_or_stream, query=None): + def from_caliper(filename_or_stream, query=None, node_ordering=True): """Read in a Caliper .cali or .json file. Args: @@ -190,15 +190,19 @@ def from_caliper(filename_or_stream, query=None): file in `.cali` or JSON-split format, or an open file object to read one query (str): cali-query in CalQL format + node_ordering (bool): use node ordering (default to true) """ # import this lazily to avoid circular dependencies from .readers.caliper_reader import CaliperReader - return CaliperReader(filename_or_stream, query).read() + return CaliperReader(filename_or_stream, query, node_ordering).read() @staticmethod def from_caliperreader( - filename_or_caliperreader, native=False, string_attributes=[] + filename_or_caliperreader, + native=False, + string_attributes=[], + node_ordering=True, ): """Read in a native Caliper `cali` file using Caliper's python reader. @@ -208,12 +212,13 @@ def from_caliperreader( native (bool): use native or user-readable metric names (default) string_attributes (str or list, optional): Adds existing string attributes from within the caliper file to the dataframe + node_ordering (bool): use node ordering, defaults to true """ # import this lazily to avoid circular dependencies from .readers.caliper_native_reader import CaliperNativeReader return CaliperNativeReader( - filename_or_caliperreader, native, string_attributes + filename_or_caliperreader, native, string_attributes, node_ordering ).read() @staticmethod @@ -222,6 +227,7 @@ def from_timeseries( level="loop.start_iteration", native=False, string_attributes=[], + node_ordering=True, ): """Read in a native Caliper timeseries `cali` file using Caliper's python reader. @@ -236,7 +242,10 @@ def from_timeseries( from .readers.caliper_native_reader import CaliperNativeReader return CaliperNativeReader( - filename_or_caliperreader, native, string_attributes + filename_or_caliperreader, + native, + string_attributes, + node_ordering, ).read_timeseries(level=level) @staticmethod diff --git a/hatchet/readers/caliper_native_reader.py b/hatchet/readers/caliper_native_reader.py index 00ebe63f..efa4e413 100644 --- a/hatchet/readers/caliper_native_reader.py +++ b/hatchet/readers/caliper_native_reader.py @@ -45,7 +45,9 @@ class CaliperNativeReader: ), } - def __init__(self, filename_or_caliperreader, native, string_attributes): + def __init__( + self, filename_or_caliperreader, native, string_attributes, node_ordering + ): """Read in a native cali using Caliper's python reader. Args: @@ -53,6 +55,7 @@ def __init__(self, filename_or_caliperreader, native, string_attributes): a CaliperReader object native (bool): use native metric names or user-readable metric names string_attributes (str or list): Adds existing string attributes from within the caliper file to the dataframe + node_ordering (bool): if true, use node ordering """ self.filename_or_caliperreader = filename_or_caliperreader self.filename_ext = "" @@ -67,7 +70,7 @@ def __init__(self, filename_or_caliperreader, native, string_attributes): self.idx_to_node = {} self.callpath_to_idx = {} self.global_nid = 0 - self.node_ordering = False + self.node_ordering = node_ordering self.gf_list = [] self.timeseries_level = None @@ -326,6 +329,7 @@ def _create_parent(child_node, parent_callpath): self.node_ordering = True order = record["min#min#aggregate.slot"] else: + self.node_ordering = False order = self.global_nid frame = Frame({"type": node_type, "name": node_label}) order = int(order) diff --git a/hatchet/readers/caliper_reader.py b/hatchet/readers/caliper_reader.py index e4a86f93..21e0cf5c 100644 --- a/hatchet/readers/caliper_reader.py +++ b/hatchet/readers/caliper_reader.py @@ -26,18 +26,19 @@ class CaliperReader: """Read in a Caliper file (`cali` or split JSON) or file-like object.""" - def __init__(self, filename_or_stream, query=""): + def __init__(self, filename_or_stream, query="", node_ordering=True): """Read from Caliper files (`cali` or split JSON). Args: filename_or_stream (str or file-like): name of a `cali` or `cali-query` split JSON file, OR an open file object query (str): cali-query arguments (for cali file) + node_ordering (bool): use node ordering (default: true) """ self.filename_or_stream = filename_or_stream self.filename_ext = "" self.query = query - self.node_ordering = False + self.node_ordering = node_ordering self.json_data = {} self.json_cols = {} @@ -155,10 +156,13 @@ def create_graph(self): self.idx_to_label[idx] = node_label if node["column"] == self.path_col_name: - # If there is a node orderering, assign to the _hatchet_nid + # If there is a node ordering, assign to the _hatchet_nid if "Node order" in self.json_cols: self.node_ordering = True order = self.json_data[idx][0] + elif "Node order" not in self.json_cols: + self.node_ordering = False + if "parent" not in node: # since this node does not have a parent, this is a root graph_root = Node( From af4f8b6ce189ddeb165fce6804c5c2049bfcf52f Mon Sep 17 00:00:00 2001 From: Stephanie Brink Date: Mon, 22 Dec 2025 21:47:26 -0800 Subject: [PATCH 2/3] add error check, disable overwriting of node ordering - add error check if want node ordering but file doesn't support it - when creating the graph, don't overwrite the node ordering boolean based on if caliper file has specific metric - update both caliper reader and caliper native reader --- hatchet/readers/caliper_native_reader.py | 14 +++++++++++--- hatchet/readers/caliper_reader.py | 9 +++++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/hatchet/readers/caliper_native_reader.py b/hatchet/readers/caliper_native_reader.py index efa4e413..e3e98214 100644 --- a/hatchet/readers/caliper_native_reader.py +++ b/hatchet/readers/caliper_native_reader.py @@ -325,11 +325,19 @@ def _create_parent(child_node, parent_callpath): if not hnode: # set the _hatchet_nid by the node order column if it exists, else -1 - if "min#min#aggregate.slot" in record: - self.node_ordering = True + if ( + self.node_ordering + and "min#min#aggregate.slot" in record + ): + Exception( + "node ordering cannot be true if min#min#aggregate.slot is not in the record" + ) + elif ( + self.node_ordering + and "min#min#aggregate.slot" in record + ): order = record["min#min#aggregate.slot"] else: - self.node_ordering = False order = self.global_nid frame = Frame({"type": node_type, "name": node_label}) order = int(order) diff --git a/hatchet/readers/caliper_reader.py b/hatchet/readers/caliper_reader.py index 21e0cf5c..cfcfbee5 100644 --- a/hatchet/readers/caliper_reader.py +++ b/hatchet/readers/caliper_reader.py @@ -157,11 +157,12 @@ def create_graph(self): if node["column"] == self.path_col_name: # If there is a node ordering, assign to the _hatchet_nid - if "Node order" in self.json_cols: - self.node_ordering = True + if self.node_ordering and "Node order" not in self.json_cols: + Exception( + "node ordering cannot be true if min#min#aggregate.slot is not in the record" + ) + elif self.node_ordering and "Node order" in self.json_cols: order = self.json_data[idx][0] - elif "Node order" not in self.json_cols: - self.node_ordering = False if "parent" not in node: # since this node does not have a parent, this is a root From d7511db9707c4420794f24d65d6d48c19c70a875 Mon Sep 17 00:00:00 2001 From: Stephanie Brink Date: Mon, 22 Dec 2025 22:03:37 -0800 Subject: [PATCH 3/3] force node ordering to be false in unit tests --- hatchet/tests/caliper.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hatchet/tests/caliper.py b/hatchet/tests/caliper.py index 58fe7048..554fb465 100644 --- a/hatchet/tests/caliper.py +++ b/hatchet/tests/caliper.py @@ -49,7 +49,7 @@ def test_graphframe(lulesh_caliper_json): """Sanity test a GraphFrame object with known data.""" - gf = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) assert len(gf.dataframe.groupby("name")) == 24 @@ -80,7 +80,7 @@ def test_read_lulesh_json(lulesh_caliper_json): def test_calc_pi_json(calc_pi_caliper_json): """Sanity test a GraphFrame object with known data.""" - gf = GraphFrame.from_caliper(str(calc_pi_caliper_json)) + gf = GraphFrame.from_caliper(str(calc_pi_caliper_json), node_ordering=False) assert len(gf.dataframe.groupby("name")) == 100 @@ -123,8 +123,8 @@ def test_lulesh_json_stream(lulesh_caliper_cali): @pytest.mark.skipif(sys.version_info > (3, 8), reason="Temporarily allow this to fail.") def test_filter_squash_unify_caliper_data(lulesh_caliper_json): """Sanity test a GraphFrame object with known data.""" - gf1 = GraphFrame.from_caliper(str(lulesh_caliper_json)) - gf2 = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf1 = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) + gf2 = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) assert gf1.graph is not gf2.graph @@ -160,7 +160,7 @@ def test_filter_squash_unify_caliper_data(lulesh_caliper_json): def test_tree(monkeypatch, lulesh_caliper_json): """Sanity test a GraphFrame object with known data.""" monkeypatch.setattr("sys.stdout.isatty", (lambda: False)) - gf = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) output = gf.tree(metric_column="time") assert "121489.000 main" in output @@ -175,7 +175,7 @@ def test_tree(monkeypatch, lulesh_caliper_json): def test_graphframe_to_literal(lulesh_caliper_json): """Sanity test a GraphFrame object with known data.""" - gf = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) graph_literal = gf.to_literal() gf2 = GraphFrame.from_literal(graph_literal) @@ -830,7 +830,7 @@ def test_graphframe_squash_file_node_order(caliper_ordered_cali): def test_inclusive_time_calculation(lulesh_caliper_json): """Validate update_inclusive_columns() on known dataset containing per-rank data.""" - gf = GraphFrame.from_caliper(str(lulesh_caliper_json)) + gf = GraphFrame.from_caliper(str(lulesh_caliper_json), node_ordering=False) # save original time (inc) column for correctness check gf.dataframe["orig_inc_time"] = gf.dataframe["time (inc)"]