Skip to content

Commit 87f21fb

Browse files
committed
generalise pipeline config parser and varbinning to accept selection string also
1 parent 3215d55 commit 87f21fb

File tree

3 files changed

+78
-32
lines changed

3 files changed

+78
-32
lines changed

pisa/core/binning.py

+19-11
Original file line numberDiff line numberDiff line change
@@ -3047,16 +3047,19 @@ class VarBinning(object):
30473047
30483048
"""
30493049
# pylint: enable=line-too-long
3050-
def __init__(self, binnings, cut_var, name=None, mask=None):
3051-
3052-
assert isinstance(cut_var, OneDimBinning)
3053-
assert isinstance(binnings, list) and len(binnings) == cut_var.size
3050+
def __init__(self, binnings, selections, name=None, mask=None):
3051+
3052+
assert (isinstance(selections, OneDimBinning) or
3053+
isinstance(selections, list))
3054+
assert isinstance(binnings, list) and len(binnings) == len(selections)
30543055
for b in binnings:
30553056
assert isinstance(b, MultiDimBinning)
3056-
assert cut_var.name not in b.names
3057+
if isinstance(selections, OneDimBinning):
3058+
assert selections.name not in b.names
30573059

30583060
self._binnings = binnings
3059-
self._cut_var = cut_var
3061+
self._selections = selections
3062+
self._nselections = len(selections)
30603063
self._name = name
30613064
self._names = None
30623065

@@ -3066,9 +3069,14 @@ def binnings(self):
30663069
return self._binnings
30673070

30683071
@property
3069-
def cut_var(self):
3070-
"""OneDimBinning : variable for which to use different binnings"""
3071-
return self._cut_var
3072+
def selections(self):
3073+
"""list of strs or OneDimBinning : selections for which to use different binnings"""
3074+
return self._selections
3075+
3076+
@property
3077+
def nselections(self):
3078+
"""int : number of selections with possibly different binnings"""
3079+
return self._nselections
30723080

30733081
@property
30743082
def name(self):
@@ -3079,7 +3087,7 @@ def name(self):
30793087
def names(self):
30803088
"""list of strings : names of each dimension contained plus cut var"""
30813089
if self._names is None:
3082-
self._names = [self.cut_var.name]
3090+
self._names = [self.cut_var.name] #FIXME
30833091
for b in self.binnings:
30843092
self._names.extend([n for n in b.names if n not in self._names])
30853093
return self._names
@@ -3106,7 +3114,7 @@ def __iter__(self):
31063114
return iter(self._binnings)
31073115

31083116
def __len__(self):
3109-
return len(self._binnings)
3117+
return self._nselections
31103118

31113119

31123120
def test_OneDimBinning():

pisa/utils/config_parser.py

+54-21
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@
239239

240240
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
241241
from collections import OrderedDict
242+
from collections.abc import Mapping
242243
from io import StringIO
243244
from os.path import abspath, expanduser, expandvars, isfile, join
244245
import re
@@ -620,38 +621,68 @@ def parse_pipeline_config(config):
620621
binning, _ = split(name, sep='.')
621622

622623
bin_split = config['binning'].get(binning + '.split', None)
623-
if bin_split is not None: # Use multiple MultiDimBinning(s)
624-
bin_split = OneDimBinning(**eval(config.get('binning', binning + '.split')))
625-
626-
bins = [[] for i in range(bin_split.size)]
624+
if bin_split is not None:
625+
# User requested split into several event samples with their
626+
# own MultiDimBinning definitions.
627+
try:
628+
bin_split = eval(bin_split) # pylint: disable=eval-used
629+
except:
630+
assert isinstance(bin_split, str)
631+
# Just split original str into individual selection strs
632+
bin_split = split(bin_split)
633+
else:
634+
assert isinstance(bin_split, Mapping)
635+
# If input can be parsed as dict, split events according to
636+
# the presumably contained OneDimBinning definition
637+
bin_split = OneDimBinning(**bin_split)
638+
639+
nselections = len(bin_split)
640+
# instantiate the OneDimBinnings corresponding to each selection
641+
bins = [[] for i in range(nselections)]
627642
for bin_name in order:
628643
def_raw = config.get('binning', binning + '.' + bin_name)
629-
kwargs = eval(def_raw)
644+
kwargs = eval(def_raw) # pylint: disable=eval-used
630645
if isinstance(kwargs, list):
631-
assert len(kwargs) == bin_split.size
646+
# Dedicated OneDimBinning kwargs for each selection
647+
assert len(kwargs) == nselections
632648
else:
633-
kwargs = [kwargs] * bin_split.size
634-
for i in range(bin_split.size):
635-
kw = kwargs[i]
636-
bins[i].append(OneDimBinning(bin_name, **kw))
649+
# Broadcast the universal OneDimBinning kwargs across
650+
# all selections
651+
kwargs = [kwargs] * nselections
652+
for i, kw in enumerate(kwargs):
653+
bins[i].append(OneDimBinning(name=bin_name, **kw))
637654

638655
mask = config['binning'].get(binning + '.mask', None)
639656
if mask is not None:
640-
mask = eval(mask)
657+
mask = eval(mask) # pylint: disable=eval-used
641658
if isinstance(mask[0], list):
642-
assert len(mask) == bin_split.size
659+
# Dedicated mask for each selection
660+
assert len(mask) == nselections
643661
else:
644-
mask = [mask] * bin_split.size
662+
# Broadcast the universal mask across all selections
663+
mask = [mask] * nselections
645664
else:
646-
mask = [mask] * bin_split.size
665+
# No mask for any selection
666+
mask = [None] * nselections
647667

648668
multibins = []
649-
for i in range(bin_split.size):
650-
multibins.append(MultiDimBinning(bins[i], name=binning+f"_{i}", mask=mask[i]))
669+
for i in range(nselections):
670+
mb = MultiDimBinning(
671+
dimensions=bins[i],
672+
name=binning+f"_{i}",
673+
mask=mask[i]
674+
)
675+
multibins.append(mb)
651676

652-
binning_dict[binning] = VarBinning(multibins, bin_split, name=binning)
677+
binning_dict[binning] = VarBinning(
678+
binnings=multibins,
679+
selections=bin_split,
680+
name=binning,
681+
mask=mask
682+
)
653683

654-
else: # Use only one MultiDimBinning
684+
else:
685+
# Requested only one single MultiDimBinning for all events
655686
bins = []
656687
for bin_name in order:
657688
try:
@@ -680,7 +711,7 @@ def parse_pipeline_config(config):
680711
)
681712
raise
682713
try:
683-
bins.append(OneDimBinning(bin_name, **kwargs))
714+
bins.append(OneDimBinning(name=bin_name, **kwargs))
684715
except:
685716
logging.error(
686717
"Failed to instantiate new `OneDimBinning` from '%s'"
@@ -691,9 +722,11 @@ def parse_pipeline_config(config):
691722
# Get the bin mask, if there is one
692723
mask = config['binning'].get(binning + '.mask', None)
693724
if mask is not None :
694-
mask = eval(mask)
725+
mask = eval(mask) # pylint: disable=eval-used
695726
# Create the binning object
696-
binning_dict[binning] = MultiDimBinning(bins, name=binning, mask=mask)
727+
binning_dict[binning] = MultiDimBinning(
728+
dimensions=bins, name=binning, mask=mask
729+
)
697730

698731

699732
stage_dicts = OrderedDict()

pisa_examples/resources/settings/binning/example.cfg

+5
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,8 @@ reco_var_binning.order = reco_energy, reco_coszen
7575
reco_var_binning.split = {'name':'pid', 'bin_edges': [-3.,0.,1000.], 'tex':r'{\rm PID}'}
7676
reco_var_binning.reco_energy = {'num_bins':10, 'is_log':True, 'domain':[5.,100.], 'tex':r'E_{\rm reco}'}
7777
reco_var_binning.reco_coszen = [{'num_bins':10, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'}, {'num_bins':20, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'}]
78+
79+
reco_var_binning_2.order = reco_energy, reco_coszen
80+
reco_var_binning_2.split = (true_energy > 10) & (true_coszen > 0), (true_coszen <= 0)
81+
reco_var_binning_2.reco_energy = {'num_bins':10, 'is_log':True, 'domain':[5.,100.], 'tex':r'E_{\rm reco}'}
82+
reco_var_binning_2.reco_coszen = [{'num_bins':10, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'}, {'num_bins':20, 'is_lin':True, 'domain':[-1,1], 'tex':r'\cos{\theta}_{\rm reco}'}]

0 commit comments

Comments
 (0)