icecube · thehrh · Mar 13, 2025 · Jan 3, 2025 · Jan 7, 2025 · Jan 9, 2025
diff --git a/pisa/analysis/analysis.py b/pisa/analysis/analysis.py
@@ -647,16 +647,22 @@ def __init__(
             assert hypo_maker is not None, msg
             assert data_dist is not None, msg
             assert metric is not None, msg
+            # this passes through the setter method, but it should just pass through
+            # without actually doing anything
             if hypo_maker.__class__.__name__ == "Detectors":
-                # this passes through the setter method, but it should just pass through
-                # without actually doing anything
                 self.detailed_metric_info = [self.get_detailed_metric_info(
                     data_dist=data_dist[i], hypo_asimov_dist=self.hypo_asimov_dist[i],
                     params=hypo_maker.distribution_makers[i].params, metric=metric[i],
-                    other_metrics=other_metrics, detector_name=hypo_maker.det_names[i], hypo_maker=hypo_maker
+                    other_metrics=other_metrics, detector_name=hypo_maker.det_names[i], hypo_maker=hypo_maker,
                 ) for i in range(len(data_dist))]
-            else: # DistributionMaker object
-                if 'generalized_poisson_llh' == metric[0]:
+            elif isinstance(data_dist, list): # DistributionMaker object with variable binning
+                self.detailed_metric_info = [self.get_detailed_metric_info(
+                    data_dist=data_dist[i], hypo_asimov_dist=self.hypo_asimov_dist[i],
+                    params=hypo_maker.params, metric=metric[0],
+                    other_metrics=other_metrics, detector_name=hypo_maker.detector_name, hypo_maker=hypo_maker,
+                ) for i in range(len(data_dist))]
+            else: # DistributionMaker object with regular binning
+                if metric[0] == 'generalized_poisson_llh':
                     generalized_poisson_dist = hypo_maker.get_outputs(return_sum=False, force_standard_output=False)
                     generalized_poisson_dist = merge_mapsets_together(mapset_list=generalized_poisson_dist)
                 else:
@@ -665,7 +671,7 @@ def __init__(
                 self.detailed_metric_info = self.get_detailed_metric_info(
                     data_dist=data_dist, hypo_asimov_dist=self.hypo_asimov_dist, generalized_poisson_hypo=generalized_poisson_dist,
                     params=hypo_maker.params, metric=metric[0], other_metrics=other_metrics,
-                    detector_name=hypo_maker.detector_name, hypo_maker=hypo_maker
+                    detector_name=hypo_maker.detector_name, hypo_maker=hypo_maker,
                 )
 
     def __getitem__(self, i):
@@ -1180,17 +1186,6 @@ def fit_recursively(
 
         """
 
-        if isinstance(metric, str):
-            metric = [metric]
-
-        # Before starting any fit, check if we already have a perfect match between data and template
-        # This can happen if using pseudodata that was generated with the nominal values for parameters
-        # (which will also be the initial values in the fit) and blah...
-        # If this is the case, don't both to fit and return results right away.
-
-        if isinstance(metric, str):
-            metric = [metric]
-
         # Grab the hypo map
         hypo_asimov_dist = hypo_maker.get_outputs(return_sum=True)
 
@@ -1201,12 +1196,18 @@ def fit_recursively(
             if len(metric) == 1: # One metric for all detectors
                 metric = list(metric) * len(hypo_maker.distribution_makers)
             elif len(metric) != len(hypo_maker.distribution_makers):
-                raise IndexError('Number of defined metrics does not match with number of detectors.')
-        else: # DistributionMaker object
-            assert len(metric) == 1
+                raise IndexError(f"Number of defined metrics does not match with number of detectors.")
+        elif isinstance(hypo_asimov_dist, MapSet) or isinstance(hypo_asimov_dist, list):
+            # DistributionMaker object (list means variable binning)
+            assert len(metric) == 1, f"Only one metric allowed for DistributionMaker"
+        else: 
+            raise NotImplementedError(f"hypo_maker returned output of type {type(hypo_asimov_dist)}")
 
-        # Check if the hypo matches data
-        if hypo_maker.__class__.__name__ != "Detectors" and data_dist.allclose(hypo_asimov_dist) :
+        # Before starting any fit, check if we already have a perfect match between data and template
+        # This can happen if using pseudodata that was generated with the nominal values for parameters
+        # (which will also be the initial values in the fit)
+        # If this is the case, don't both to fit and return results right away.
+        if isinstance(data_dist, MapSet) and data_dist.allclose(hypo_asimov_dist):
 
             msg = 'Initial hypo matches data, no need for fit'
             logging.info(msg)
@@ -2785,11 +2786,17 @@ def _minimizer_callable(self, scaled_param_vals, hypo_maker, data_dist,
                 metric_val = 0
                 for i in range(len(hypo_maker.distribution_makers)):
                     data = data_dist[i].metric_total(expected_values=hypo_asimov_dist[i],
-                                                  metric=metric[i], metric_kwargs=metric_kwargs)
+                                                     metric=metric[i], metric_kwargs=metric_kwargs)
                     metric_val += data
                 priors = hypo_maker.params.priors_penalty(metric=metric[0]) # uses just the "first" metric for prior
                 metric_val += priors
-            else: # DistributionMaker object
+            elif isinstance(hypo_asimov_dist, list): # DistributionMaker object with variable binning
+                metric_val = 0
+                for i in range(len(hypo_asimov_dist)):
+                    metric_val += data_dist[i].metric_total(expected_values=hypo_asimov_dist[i],
+                                                            metric=metric[0], metric_kwargs=metric_kwargs)
+                metric_val += hypo_maker.params.priors_penalty(metric=metric[0])
+            else: # DistributionMaker object with regular binning
                 if metric[0] == 'weighted_chi2':
                     actual_values = data_dist.hist['total']
                     expected_values = hypo_asimov_dist.hist['total']
@@ -3121,9 +3128,9 @@ def fit_hypo(self, data_dist, hypo_maker, metric, minimizer_settings,
             if len(metric) == 1: # One metric for all detectors
                 metric = list(metric) * len(hypo_maker.distribution_makers)
             elif len(metric) != len(hypo_maker.distribution_makers):
-                raise IndexError('Number of defined metrics does not match with number of detectors.')
-        else: # DistributionMaker object
-            assert len(metric) == 1
+                raise IndexError(f"Number of defined metrics does not match with number of detectors.")
+        else:
+            assert len(metric) == 1, f"Only one metric allowed for DistributionMaker"
 
         if check_ordering:
             if 'nh' in hypo_param_selections or 'ih' in hypo_param_selections:
@@ -3209,9 +3216,12 @@ def nofit_hypo(self, data_dist, hypo_maker, hypo_param_selections,
             if len(metric) == 1: # One metric for all detectors
                 metric = list(metric) * len(hypo_maker.distribution_makers)
             elif len(metric) != len(hypo_maker.distribution_makers):
-                raise IndexError('Number of defined metrics does not match with number of detectors.')
-        else: # DistributionMaker object
-            assert len(metric) == 1
+                raise IndexError(f"Number of defined metrics does not match with number of detectors.")
+        elif isinstance(hypo_asimov_dist, MapSet) or isinstance(hypo_asimov_dist, list):
+            # DistributionMaker object (list means variable binning)
+            assert len(metric) == 1, f"Only one metric allowed for DistributionMaker"
+        else: 
+            raise NotImplementedError(f"hypo_maker returned output of type {type(hypo_asimov_dist)}")
         fit_info.metric = metric
 
         # Assess the fit: whether the data came from the hypo_asimov_dist
@@ -3223,7 +3233,12 @@ def nofit_hypo(self, data_dist, hypo_maker, hypo_param_selections,
                     metric_val += data
                 priors = hypo_maker.params.priors_penalty(metric=metric[0]) # uses just the "first" metric for prior
                 metric_val += priors
-            else: # DistributionMaker object
+            elif isinstance(data_dist, list): # DistributionMaker object with VarBinning
+                metric_val = 0
+                for i in range(len(data_dist)):
+                    metric_val += data_dist[i].metric_total(expected_values=hypo_asimov_dist[i], metric=metric[0])
+                metric_val += hypo_maker.params.priors_penalty(metric=metric[0])
+            else: # DistributionMaker object with MultiDimBinning
 
                 if 'generalized_poisson_llh' == metric[0]:
 
@@ -3271,7 +3286,13 @@ def nofit_hypo(self, data_dist, hypo_maker, hypo_param_selections,
                 params=hypo_maker.distribution_makers[i].params, metric=metric[i],
                 other_metrics=other_metrics, detector_name=hypo_maker.det_names[i]
             ) for i in range(len(data_dist))]
-        else: # DistributionMaker object
+        elif isinstance(data_dist, list): # DistributionMaker object with VarBinning
+            fit_info.detailed_metric_info = [fit_info.get_detailed_metric_info(
+                data_dist=data_dist[i], hypo_asimov_dist=hypo_asimov_dist[i],
+                params=hypo_maker.params, metric=metric[0], other_metrics=other_metrics,
+                detector_name=hypo_maker.detector_name
+            ) for i in range(len(data_dist))]
+        else: # DistributionMaker object with MultiDimBinning
 
             if 'generalized_poisson_llh' == metric[0]:
                 generalized_poisson_dist = hypo_maker.get_outputs(return_sum=False, force_standard_output=False)

diff --git a/pisa/core/binning.py b/pisa/core/binning.py
@@ -42,8 +42,8 @@ class (MultiDimBinning) for arbitrarily many of dimensions (one or more). These
 
 __all__ = ['NAME_FIXES', 'NAME_SEPCHARS', 'NAME_FIXES_REGEXES',
            'basename', '_new_obj', 'is_binning',
-           'OneDimBinning', 'MultiDimBinning',
-           'test_OneDimBinning', 'test_MultiDimBinning']
+           'OneDimBinning', 'MultiDimBinning', 'VarBinning',
+           'test_OneDimBinning', 'test_MultiDimBinning', 'test_VarBinning']
 
 __author__ = 'J.L. Lanfranchi'
 
@@ -139,7 +139,7 @@ def new_function(cls, *args, **kwargs):
     return new_function
 
 
-class OneDimBinning(object):
+class OneDimBinning():
     # pylint: disable=line-too-long
     """Histogram-oriented binning specialized to a single dimension.
 
@@ -1481,7 +1481,7 @@ def __ne__(self, other):
         return not self.__eq__(other)
 
 
-class MultiDimBinning(object):
+class MultiDimBinning():
     # pylint: disable=line-too-long
     r"""
     Multi-dimensional binning object. This can contain one or more
@@ -1590,7 +1590,6 @@ def __init__(self, dimensions, name=None, mask=None):
         # Handle masking
         self._init_mask(mask) 
 
-
     def _init_mask(self, mask) :
         '''
         Initialize the bin mask. This can either be specified as:
@@ -1639,13 +1638,11 @@ def _init_mask(self, mask) :
         # Done, store the mask
         self._mask = mask
 
-
     @property
     def name(self):
         """Name of the dimension"""
         return self._name
 
-
     def __repr__(self):
         previous_precision = np.get_printoptions()['precision']
         np.set_printoptions(precision=18)
@@ -3043,6 +3040,143 @@ def __ne__(self, other):
         return not self.__eq__(other)
 
 
+class VarBinning():
+    # pylint: disable=line-too-long
+    r"""Binning class that allows to use multiple different analysis binnings
+    or to split up an event sample with a given analysis binning into
+    non-overlapping sub-samples - which will be verified at analysis time -
+    defined by arbitrary cuts.
+
+    A use case is for example if you want to use a different 2d binning in
+    reconstructed energy and cosine zenith for each PID bin.
+
+    Parameters
+    ----------
+    binnings : list of MultiDimBinnings
+        The MultiDimBinnings that should be used for each selection.
+
+    selections : OneDimBinning or list of strings (cut expressions)
+        Selections for creating sub-samples and for which different binnings
+        can be used. Can either be a OneDimBinning of the selection variable
+        or an arbitray selection defined in a list of strings where each string
+        contains the respective selection cuts (similar to
+        data.simple_data_loader's `mc_cuts`).
+
+    Notes
+    -----
+    The lengths of `binnings` and `selections` are required to be > 1 (otherwise,
+    just apply MC cuts the regular way) and the same (one `MultiDimBinning`
+    per selection - we don't attempt any array broadcasting).
+
+    If a `OneDimBinning` is passed to `selections`, the corresponding
+    dimension must not be a dimension of any `MultiDimBinning` in `binnings`.
+    Similarly, if a list of cut expressions is passed, none of its contained
+    variables may simultaneously serve as a `MultiDimBinning`'s dimension.
+    While such scenarios needn't always be dangerous, there are probably less
+    convoluted ways of achieving them.
+
+    A warning is issued when `selections` is of type `OneDimBinning` and
+    all `binnings` entries are identical.
+
+    """
+    # pylint: enable=line-too-long
+    def __init__(self, binnings, selections):
+
+        if not isinstance(selections, (OneDimBinning, list)):
+            raise ValueError(f"Selection type {type(selections)} not supported!")
+        # some "trivial" asserts
+        assert isinstance(binnings, list) and len(binnings) == len(selections)
+        assert len(binnings) > 1
+
+        all_equal = True
+        for b in binnings:
+            assert isinstance(b, MultiDimBinning)
+            # determine whether any variable is part of this binning
+            invalid_sel_vars = self._selection_vars_in_mdb(b, selections)
+            if invalid_sel_vars:
+                raise ValueError(
+                    f"Selection variables {invalid_sel_vars} may not"
+                    " simultaneously be part of any MultiDimBinning!"
+                )
+            all_equal = all_equal and b == binnings[0]
+
+        if isinstance(selections, OneDimBinning) and all_equal:
+            logging.warning(
+                f"All binnings are equal, namely {binnings[0]!s}. It will most"
+                " likely be advantageous to simply use the MultiDimBinning"
+                " class for the same analysis purpose."
+            )
+
+        self._binnings = binnings
+        self._selections = selections
+        self._nselections = len(selections)
+        self._names = None
+
+    def _selection_vars_in_mdb(self, binning, selections):
+        """Check which binning dimensions also appear in the selections.
+        """
+        if isinstance(selections, OneDimBinning):
+            if selections.name in binning.names:
+                return [selections.name]
+        elif isinstance(selections, Sequence):
+            # dumb way to find all cut variables in expressions (+ cut values)
+            vars_and_more = []
+            for cut_expr in selections:
+                vars_and_more.extend(re.findall(r"\b\w+\b", cut_expr))
+            # Return any dimension names that occur identically in any
+            # cut expression. Adapt the above if there is a false positive.
+            return [dim for dim in binning.names if dim in vars_and_more]
+        return []
+
+    @property
+    def binnings(self):
+        """list of MultiDimBinning : each binning in a list"""
+        return self._binnings
+
+    @property
+    def selections(self):
+        """list of strs or OneDimBinning : selections for which to use different binnings"""
+        return self._selections
+
+    @property
+    def nselections(self):
+        """int : number of selections with possibly different binnings"""
+        return self._nselections
+
+    @property
+    def names(self):
+        """list of strings : names of each (binning) dimension contained"""
+        if self._names is None:
+            self._names = []
+            for b in self.binnings:
+                self._names.append(b.names)
+        return self._names
+
+    def __pretty__(self, p, cycle):
+        """Method used by the `pretty` library for formatting"""
+        if cycle:
+            p.text('%s(...)' % self.__class__.__name__)
+        else:
+            p.begin_group(4, '%s([' % self.__class__.__name__)
+            for n, dim in enumerate(self):
+                p.breakable()
+                p.pretty(dim)
+                if n < len(self)-1:
+                    p.text(',')
+            p.end_group(4, '])')
+
+    def _repr_pretty_(self, p, cycle):
+        """Method used by e.g. ipython/Jupyter for formatting"""
+        return self.__pretty__(p, cycle)
+
+    def __iter__(self):
+        """Iterate over dimensions. Use `iterbins` to iterate over bins."""
+        return iter(self._binnings)
+
+    def __len__(self):
+        return self._nselections
+
+
 def test_OneDimBinning():
     """Unit tests for OneDimBinning class"""
     # pylint: disable=line-too-long, wrong-import-position
@@ -3499,7 +3633,41 @@ def test_MultiDimBinning():
     logging.info('<< PASS : test_MultiDimBinning >>')
 
 
+def test_VarBinning():
+    """Unit tests for VarBinning class"""
+    # pylint: disable=wrong-import-position
+
+    b1 = OneDimBinning(name='energy', num_bins=40, is_log=True,
+                       domain=[1, 80]*ureg.GeV)
+    b2 = OneDimBinning(name='coszen', num_bins=40, is_lin=True,
+                       domain=[-1, 1])
+    mdb1 = MultiDimBinning([b1, b2])
+    b3 = OneDimBinning(name='energy', num_bins=20, is_log=True,
+                       domain=[1, 80]*ureg.GeV)
+    mdb2 = MultiDimBinning([b3, b2])
+
+    selection = OneDimBinning(name='pid', is_lin=True, num_bins=2, domain=[0, 1])
+    varbin = VarBinning([mdb1, mdb2], selection)
+    assert varbin.names[0] == varbin.names[1] == ['energy', 'coszen']
+    l = [isinstance(mdb, MultiDimBinning) for mdb in varbin]
+    assert len(l) == 2 and np.all(l)
+
+    selection = ['pid < 0.5', 'pid >= 0.5']
+    varbin = VarBinning([mdb1, mdb2], selection)
+
+    selection = ['energy == 10', 'coszen < 0']
+    try:
+        varbin = VarBinning([mdb1, mdb1], selection)
+    except ValueError:
+        pass
+    else:
+        assert False
+
+    logging.info('<< PASS : test_VarBinning >>')
+
+
 if __name__ == "__main__":
     set_verbosity(1)
     test_OneDimBinning()
     test_MultiDimBinning()
+    test_VarBinning()