Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Alternative variable binning approach #849

Merged
merged 20 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
fcae83c
Add variable binning and adjust output functions
JanWeldert Jan 3, 2025
8c20429
Force events representation and add histogramming to translations
JanWeldert Jan 7, 2025
35dcf0e
Adjust analysis.py and config parser
JanWeldert Jan 9, 2025
81c308b
Add example
JanWeldert Jan 10, 2025
eaf6235
Expand example a bit
JanWeldert Jan 17, 2025
3215d55
fit_hypo does not know hypo_asimov_dist
JanWeldert Feb 14, 2025
87f21fb
generalise pipeline config parser and varbinning to accept selection …
thehrh Feb 26, 2025
854e356
Make selection string work with container
JanWeldert Feb 27, 2025
eb78c69
Check if selections are exclusive
JanWeldert Feb 28, 2025
0ee0429
Define selection before checking it
JanWeldert Feb 28, 2025
ba8c4a7
Simple test and more checks for VarBinning
JanWeldert Mar 3, 2025
e9b0b5b
docs & comments, split up check_varbinning (was performing two very d…
thehrh Mar 6, 2025
9f078c6
exclusivity check only when requesting new output binning in get_outputs
thehrh Mar 6, 2025
92409cf
Add docstrings and change assert to warn
JanWeldert Mar 6, 2025
94a0529
fix undefined varbinning name in config parser, debug logging of sele…
thehrh Mar 6, 2025
7837426
superficial: comments & docstrings, thousands of lines of ipynb outpu…
thehrh Mar 6, 2025
f7a53ad
also make separate functions for parsing different types of binning f…
thehrh Mar 6, 2025
b0eabba
Merge remote-tracking branch 'origin/master' into var_bin
thehrh Mar 12, 2025
1a3dd76
adapt varbinning init and tests (require > 1 binnings, detect any bin…
thehrh Mar 12, 2025
5c2d85f
comments/NotImplementedErrors for generalized_poisson_llh in analysis…
thehrh Mar 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 73 additions & 24 deletions pisa/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

__author__ = "J.L. Lanfranchi, P. Eller"

__license__ = """Copyright (c) 2014-2018, The IceCube Collaboration
__license__ = """Copyright (c) 2014-2025, The IceCube Collaboration
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -69,10 +69,6 @@
# a warning message. Or we just wait to see if it fails when the user runs the
# code.

# TODO: return an OrderedDict instead of a list if the user requests
# intermediate results? Or simply use the `outputs` attribute of each stage to
# dynamically access this?


class Pipeline():
"""Instantiate stages according to a parsed config object; excecute
Expand Down Expand Up @@ -120,7 +116,8 @@ def __init__(self, config, profile=False):
self._source_code_hash = None

if isinstance(self._output_binning, VarBinning):
self.check_VarBinning()
self.assert_varbinning_compat()
self.assert_exclusive_varbinning()

# check in case someone decided to add a non-daemonflux parameter with daemon_
# in it, which would potentially make penalty calculation incorrect
Expand Down Expand Up @@ -372,16 +369,18 @@ def get_outputs(self, **get_outputs_kwargs):
else:
outputs = self._get_outputs(**get_outputs_kwargs)
return outputs

def _get_outputs(self, output_binning=None, output_key=None):
"""Get MapSet output"""

self.run()

if output_binning is None:
output_binning = self.output_binning
elif isinstance(output_binning, VarBinning):
self.check_VarBinning(output_binning)
if isinstance(output_binning, VarBinning):
# checks also have to be done when no new output_binning is passed
self.assert_varbinning_compat()
self.assert_exclusive_varbinning(output_binning=output_binning)
if output_key is None:
output_key = self.output_key

Expand All @@ -395,10 +394,11 @@ def _get_outputs(self, output_binning=None, output_key=None):
outputs = self.data.get_mapset(output_key[0], error=output_key[1])
else:
outputs = self.data.get_mapset(output_key)
else: #VarBinning
outputs = []

else:
assert isinstance(output_binning, VarBinning)
assert self.data.representation == "events"
outputs = []

selections = output_binning.selections
for i in range(len(output_binning.binnings)):
Expand All @@ -424,7 +424,7 @@ def _get_outputs(self, output_binning=None, output_key=None):
cc.tranlation_modes[output_key] = 'sum'

containers.append(cc)

dat = ContainerSet(name=self.data.name,
containers=containers,
representation=output_binning.binnings[i],
Expand Down Expand Up @@ -630,26 +630,58 @@ def hash(self):
def __hash__(self):
return self.hash

def check_VarBinning(self, output_binning=None):
"""Checks if pipeline works with VarBinning and if VarBinning selections
are exclusive."""
# VarBinning will only work if all stages have apply_mode=events
def assert_varbinning_compat(self):
"""Asserts that pipeline setup is compatible with `VarBinning`:
all stages need to apply to events.
Raises
------
ValueError : if at least one stage has apply_mode!='events'
"""
incompat = []
for s in self.stages:
assert s.apply_mode == 'events'
if not s.apply_mode == 'events':
incompat.append(s)
if len(incompat) >= 1:
str_incompat = ", ".join(
[f"{stage.stage_name}.{stage.service_name}" for stage in incompat]
)
raise ValueError(
"When a variable binning is used, all stages need to set "
f"apply_mode='events', but {str_incompat} do(es) not!"
)

def assert_exclusive_varbinning(self, output_binning=None):
"""Assert that `VarBinning` selections are mutually exclusive.
This is done individually for each `Container` in `self.data`.
# now check if VarBinning selection is exclusive (only necessary if list)
if output_binning == None:
Parameters
-----------
output_binning : None, MultiDimBinning, VarBinning
Raises
------
ValueError : if a `VarBinning` is tested and at least two selections
(if applicable) are not mutually exclusive
"""
if output_binning is None:
selections = self.output_binning.selections
nselections = self.output_binning.nselections
else:
selections = output_binning.selections
nselections = output_binning.nselections
if isinstance(selections, list):
# list of selection-criteria strings
for c in self.data:
keep = np.zeros(c.size)
for i in range(nselections):
keep += c.get_keep_mask(selections[i])
assert np.all(keep <= 1), 'Selection is not exclusive'
if not np.all(keep <= 1):
raise ValueError(
f"Selections {selections} are not mutually exclusive!"
)

@property
def output_binning(self):
Expand All @@ -658,14 +690,13 @@ def output_binning(self):
@output_binning.setter
def output_binning(self, binning):
if isinstance(binning, VarBinning):
self.check_VarBinning(binning)
self.assert_varbinning_compat()
self.assert_exclusive_varbinning(output_binning=binning)
self._output_binning = binning


def test_Pipeline():
"""Unit tests for Pipeline class"""
# pylint: disable=line-too-long

# TODO: make a test config file with hierarchy AND material selector,
# uncomment / add in tests commented / removed below

Expand Down Expand Up @@ -741,6 +772,24 @@ def test_Pipeline():
#current_hier = new_hier
#current_mat = new_mat

#
# Test: a pipeline using a VarBinning
#
p = Pipeline("settings/pipeline/varbin_example.cfg")
out = p.get_outputs()
# a split into two event selections has to result in two MapSets
assert len(out) == 2
# a binned apply_mode has to result in a ValueError
# first get a pre-existing binning
binned_calc_mode = p.stages[2].calc_mode
assert isinstance(binned_calc_mode, MultiDimBinning)
p.stages[2].apply_mode = binned_calc_mode
try:
out = p.get_outputs()
except ValueError:
pass
else:
assert False


# ----- Most of this below cang go (?) ---
Expand Down
7 changes: 6 additions & 1 deletion pisa_examples/resources/settings/binning/example.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,17 @@ calc_grid_coarse.order = true_energy, true_coszen
calc_grid_coarse.true_energy = {'num_bins':50, 'is_log':True, 'domain':[1., 1000] * units.GeV, 'tex':r'E_{\rm true}'}
calc_grid_coarse.true_coszen = {'num_bins':50, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos\,\theta_{z,{\rm true}}'}

# Variable binning
# Example variable-binning definitions to demonstrate deviating syntax

# First define a simple `VarBinning` with pid serving as the split/selection
# dimension on which the reco_coszen binning depends (reco_energy the same
# across all pid bins)
reco_var_binning.order = reco_energy, reco_coszen
reco_var_binning.split = {'name':'pid', 'bin_edges': [-3.,0.,1000.], 'tex':r'{\rm PID}'}
reco_var_binning.reco_energy = {'num_bins':10, 'is_log':True, 'domain':[5.,100.], 'tex':r'E_{\rm reco}'}
reco_var_binning.reco_coszen = [{'num_bins':10, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'}, {'num_bins':20, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'}]

# Second `VarBinning` example uses selection-criteria strings to split events
reco_var_binning_2.order = reco_energy, reco_coszen
reco_var_binning_2.split = (true_energy > 10) & (true_coszen > 0), (true_coszen <= 0)
reco_var_binning_2.reco_energy = {'num_bins':10, 'is_log':True, 'domain':[5.,100.], 'tex':r'E_{\rm reco}'}
Expand Down
Loading