Skip to content

Commit e9b0b5b

Browse files
committed
docs & comments, split up check_varbinning (was performing two very different tasks), new pipeline unit test & minor logic fix
1 parent ba8c4a7 commit e9b0b5b

File tree

3 files changed

+86
-119
lines changed

3 files changed

+86
-119
lines changed

pisa/core/pipeline.py

+73-24
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242

4343
__author__ = "J.L. Lanfranchi, P. Eller"
4444

45-
__license__ = """Copyright (c) 2014-2018, The IceCube Collaboration
45+
__license__ = """Copyright (c) 2014-2025, The IceCube Collaboration
4646
4747
Licensed under the Apache License, Version 2.0 (the "License");
4848
you may not use this file except in compliance with the License.
@@ -69,10 +69,6 @@
6969
# a warning message. Or we just wait to see if it fails when the user runs the
7070
# code.
7171

72-
# TODO: return an OrderedDict instead of a list if the user requests
73-
# intermediate results? Or simply use the `outputs` attribute of each stage to
74-
# dynamically access this?
75-
7672

7773
class Pipeline():
7874
"""Instantiate stages according to a parsed config object; excecute
@@ -120,7 +116,8 @@ def __init__(self, config, profile=False):
120116
self._source_code_hash = None
121117

122118
if isinstance(self._output_binning, VarBinning):
123-
self.check_VarBinning()
119+
self.assert_varbinning_compat()
120+
self.assert_exclusive_varbinning()
124121

125122
# check in case someone decided to add a non-daemonflux parameter with daemon_
126123
# in it, which would potentially make penalty calculation incorrect
@@ -372,16 +369,18 @@ def get_outputs(self, **get_outputs_kwargs):
372369
else:
373370
outputs = self._get_outputs(**get_outputs_kwargs)
374371
return outputs
375-
372+
376373
def _get_outputs(self, output_binning=None, output_key=None):
377374
"""Get MapSet output"""
378375

379376
self.run()
380377

381378
if output_binning is None:
382379
output_binning = self.output_binning
383-
elif isinstance(output_binning, VarBinning):
384-
self.check_VarBinning(output_binning)
380+
if isinstance(output_binning, VarBinning):
381+
# checks also have to be done when no new output_binning is passed
382+
self.assert_varbinning_compat()
383+
self.assert_exclusive_varbinning(output_binning=output_binning)
385384
if output_key is None:
386385
output_key = self.output_key
387386

@@ -395,10 +394,11 @@ def _get_outputs(self, output_binning=None, output_key=None):
395394
outputs = self.data.get_mapset(output_key[0], error=output_key[1])
396395
else:
397396
outputs = self.data.get_mapset(output_key)
398-
399-
else: #VarBinning
400-
outputs = []
397+
398+
else:
399+
assert isinstance(output_binning, VarBinning)
401400
assert self.data.representation == "events"
401+
outputs = []
402402

403403
selections = output_binning.selections
404404
for i in range(len(output_binning.binnings)):
@@ -424,7 +424,7 @@ def _get_outputs(self, output_binning=None, output_key=None):
424424
cc.tranlation_modes[output_key] = 'sum'
425425

426426
containers.append(cc)
427-
427+
428428
dat = ContainerSet(name=self.data.name,
429429
containers=containers,
430430
representation=output_binning.binnings[i],
@@ -630,26 +630,58 @@ def hash(self):
630630
def __hash__(self):
631631
return self.hash
632632

633-
def check_VarBinning(self, output_binning=None):
634-
"""Checks if pipeline works with VarBinning and if VarBinning selections
635-
are exclusive."""
636-
# VarBinning will only work if all stages have apply_mode=events
633+
def assert_varbinning_compat(self):
634+
"""Asserts that pipeline setup is compatible with `VarBinning`:
635+
all stages need to apply to events.
636+
637+
Raises
638+
------
639+
ValueError : if at least one stage has apply_mode!='events'
640+
641+
"""
642+
incompat = []
637643
for s in self.stages:
638-
assert s.apply_mode == 'events'
644+
if not s.apply_mode == 'events':
645+
incompat.append(s)
646+
if len(incompat) >= 1:
647+
str_incompat = ", ".join(
648+
[f"{stage.stage_name}.{stage.service_name}" for stage in incompat]
649+
)
650+
raise ValueError(
651+
"When a variable binning is used, all stages need to set "
652+
f"apply_mode='events', but {str_incompat} do(es) not!"
653+
)
654+
655+
def assert_exclusive_varbinning(self, output_binning=None):
656+
"""Assert that `VarBinning` selections are mutually exclusive.
657+
This is done individually for each `Container` in `self.data`.
639658
640-
# now check if VarBinning selection is exclusive (only necessary if list)
641-
if output_binning == None:
659+
Parameters
660+
-----------
661+
output_binning : None, MultiDimBinning, VarBinning
662+
663+
Raises
664+
------
665+
ValueError : if a `VarBinning` is tested and at least two selections
666+
(if applicable) are not mutually exclusive
667+
668+
"""
669+
if output_binning is None:
642670
selections = self.output_binning.selections
643671
nselections = self.output_binning.nselections
644672
else:
645673
selections = output_binning.selections
646674
nselections = output_binning.nselections
647675
if isinstance(selections, list):
676+
# list of selection-criteria strings
648677
for c in self.data:
649678
keep = np.zeros(c.size)
650679
for i in range(nselections):
651680
keep += c.get_keep_mask(selections[i])
652-
assert np.all(keep <= 1), 'Selection is not exclusive'
681+
if not np.all(keep <= 1):
682+
raise ValueError(
683+
f"Selections {selections} are not mutually exclusive!"
684+
)
653685

654686
@property
655687
def output_binning(self):
@@ -658,14 +690,13 @@ def output_binning(self):
658690
@output_binning.setter
659691
def output_binning(self, binning):
660692
if isinstance(binning, VarBinning):
661-
self.check_VarBinning(binning)
693+
self.assert_varbinning_compat()
694+
self.assert_exclusive_varbinning(output_binning=binning)
662695
self._output_binning = binning
663696

664697

665698
def test_Pipeline():
666699
"""Unit tests for Pipeline class"""
667-
# pylint: disable=line-too-long
668-
669700
# TODO: make a test config file with hierarchy AND material selector,
670701
# uncomment / add in tests commented / removed below
671702

@@ -741,6 +772,24 @@ def test_Pipeline():
741772
#current_hier = new_hier
742773
#current_mat = new_mat
743774

775+
#
776+
# Test: a pipeline using a VarBinning
777+
#
778+
p = Pipeline("settings/pipeline/varbin_example.cfg")
779+
out = p.get_outputs()
780+
# a split into two event selections has to result in two MapSets
781+
assert len(out) == 2
782+
# a binned apply_mode has to result in a ValueError
783+
# first get a pre-existing binning
784+
binned_calc_mode = p.stages[2].calc_mode
785+
assert isinstance(binned_calc_mode, MultiDimBinning)
786+
p.stages[2].apply_mode = binned_calc_mode
787+
try:
788+
out = p.get_outputs()
789+
except ValueError:
790+
pass
791+
else:
792+
assert False
744793

745794

746795
# ----- Most of this below cang go (?) ---

pisa_examples/resources/settings/binning/example.cfg

+6-1
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,17 @@ calc_grid_coarse.order = true_energy, true_coszen
7070
calc_grid_coarse.true_energy = {'num_bins':50, 'is_log':True, 'domain':[1., 1000] * units.GeV, 'tex':r'E_{\rm true}'}
7171
calc_grid_coarse.true_coszen = {'num_bins':50, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos\,\theta_{z,{\rm true}}'}
7272

73-
# Variable binning
73+
# Example variable-binning definitions to demonstrate deviating syntax
74+
75+
# First define a simple `VarBinning` with pid serving as the split/selection
76+
# dimension on which the reco_coszen binning depends (reco_energy the same
77+
# across all pid bins)
7478
reco_var_binning.order = reco_energy, reco_coszen
7579
reco_var_binning.split = {'name':'pid', 'bin_edges': [-3.,0.,1000.], 'tex':r'{\rm PID}'}
7680
reco_var_binning.reco_energy = {'num_bins':10, 'is_log':True, 'domain':[5.,100.], 'tex':r'E_{\rm reco}'}
7781
reco_var_binning.reco_coszen = [{'num_bins':10, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'}, {'num_bins':20, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'}]
7882

83+
# Second `VarBinning` example uses selection-criteria strings to split events
7984
reco_var_binning_2.order = reco_energy, reco_coszen
8085
reco_var_binning_2.split = (true_energy > 10) & (true_coszen > 0), (true_coszen <= 0)
8186
reco_var_binning_2.reco_energy = {'num_bins':10, 'is_log':True, 'domain':[5.,100.], 'tex':r'E_{\rm reco}'}

0 commit comments

Comments
 (0)