From 23ed41f7d71ca15ad3dd03448069a46eaee0c22f Mon Sep 17 00:00:00 2001 From: "Jack Y. Araz" Date: Sat, 11 Jan 2025 10:18:23 -0500 Subject: [PATCH] simplify the construction --- madanalysis/misc/run_recast.py | 112 ++++++++++++++----------- madanalysis/misc/statistical_models.py | 12 +-- 2 files changed, 70 insertions(+), 54 deletions(-) diff --git a/madanalysis/misc/run_recast.py b/madanalysis/misc/run_recast.py index 91998f26..9e9ac2a1 100644 --- a/madanalysis/misc/run_recast.py +++ b/madanalysis/misc/run_recast.py @@ -33,6 +33,7 @@ import sys import time from collections import OrderedDict +from math import sqrt import numpy as np from shell_command import ShellCommand @@ -49,7 +50,7 @@ from madanalysis.IOinterface.library_writer import LibraryWriter from madanalysis.misc.histfactory_reader import HF_Background, HF_Signal, get_HFID -from .statistical_models import initialise_statistical_models, compute_poi_upper_limits +from .statistical_models import compute_poi_upper_limits, initialise_statistical_models try: import spey @@ -63,6 +64,14 @@ # pylint: disable=logging-fstring-interpolation +def comb_sqr(*args, rnd: int = None) -> float: + """Combine squared values""" + val = sqrt(sum(x**2 for x in args)) + if rnd is not None: + return round(val, rnd) + return val + + class RunRecast: def __init__(self, main, dirname): self.dirname = dirname @@ -1286,57 +1295,15 @@ def compute_cls(self, analyses, dataset): ) ## Uncertainties on the rates - Error_dict = {} - if dataset.scaleup != None: - Error_dict["scale_up"] = round(dataset.scaleup, 8) - Error_dict["scale_dn"] = -round(dataset.scaledn, 8) - else: - Error_dict["scale_up"], Error_dict["scale_dn"] = 0.0, 0.0 - if dataset.pdfup != None: - Error_dict["pdf_up"] = round(dataset.pdfup, 8) - Error_dict["pdf_dn"] = -round(dataset.pdfdn, 8) - else: - Error_dict["pdf_up"], Error_dict["pdf_dn"] = 0.0, 0.0 - if self.main.recasting.THerror_combination == "linear": - Error_dict["TH_up"] = round( - Error_dict["scale_up"] + Error_dict["pdf_up"], 8 - ) - Error_dict["TH_dn"] = round( - Error_dict["scale_dn"] + Error_dict["pdf_dn"], 8 - ) - else: - Error_dict["TH_up"] = round( - math.sqrt( - Error_dict["pdf_up"] ** 2 + Error_dict["scale_up"] ** 2 - ), - 8, - ) - Error_dict["TH_dn"] = -round( - math.sqrt( - Error_dict["pdf_dn"] ** 2 + Error_dict["scale_dn"] ** 2 - ), - 8, - ) - for i in range(0, len(self.main.recasting.systematics)): - for unc in self.main.recasting.systematics: - Error_dict["sys" + str(i) + "_up"] = round( - math.sqrt( - Error_dict["TH_up"] ** 2 - + self.main.recasting.systematics[i][0] ** 2 - ), - 8, - ) - Error_dict["sys" + str(i) + "_dn"] = -round( - math.sqrt( - Error_dict["TH_dn"] ** 2 - + self.main.recasting.systematics[i][1] ** 2 - ), - 8, - ) + Error_dict = error_dict_setup( + dataset=dataset, + systematics=self.main.recasting.systematics, + linear_comb=self.main.recasting.THerror_combination == "linear", + ) ## Computation of the uncertainties on the limits regiondata_errors = {} - if dataset.xsection > 0.0 and any([x != 0 for x in Error_dict.values()]): + if dataset.xsection > 0.0 and any(x != 0 for x in Error_dict.values()): for error_key, error_value in Error_dict.items(): varied_xsec = max( round(dataset.xsection * (1.0 + error_value), 10), 0.0 @@ -2446,3 +2413,50 @@ def clean_region_name(mystr): newstr = newstr.replace("(", "_lp_") newstr = newstr.replace(")", "_rp_") return newstr + + +def error_dict_setup( + dataset, systematics: list[list[float]], linear_comb: bool +) -> dict[str, float]: + """ + Setup the error dictionary for a given dataset. + + Args: + dataset (``_type_``): dataset description + systematics (``list[list[float]]``): systematics description + + Returns: + ``dict[str, float]``: + error dictionary + """ + def comb(*args, rnd=8): + if linear_comb: + return round(sum(args), rnd) + return comb_sqr(*args, rnd=rnd) + + Error_dict = { + "scale_up": 0.0, + "scale_dn": 0.0, + "pdf_up": 0.0, + "pdf_dn": 0.0, + } + if dataset.scaleup is not None: + Error_dict["scale_up"] = round(dataset.scaleup, 8) + Error_dict["scale_dn"] = -round(dataset.scaledn, 8) + if dataset.pdfup is not None: + Error_dict["pdf_up"] = round(dataset.pdfup, 8) + Error_dict["pdf_dn"] = -round(dataset.pdfdn, 8) + Error_dict.update( + { + "TH_up": comb(Error_dict["scale_up"], Error_dict["pdf_up"]), + "TH_dn": -comb(Error_dict["scale_dn"], Error_dict["pdf_dn"]), + } + ) + for idx, syst in enumerate(systematics): + Error_dict.update( + { + f"sys{idx}_up": comb_sqr(Error_dict["TH_up"], syst[0], rnd=8), + f"sys{idx}_dn": -comb_sqr(Error_dict["TH_dn"], syst[1], rnd=8), + } + ) + return Error_dict diff --git a/madanalysis/misc/statistical_models.py b/madanalysis/misc/statistical_models.py index f2601bf1..0d36f1a9 100644 --- a/madanalysis/misc/statistical_models.py +++ b/madanalysis/misc/statistical_models.py @@ -1,5 +1,7 @@ -import spey import logging + +import spey + from .histfactory_reader import HF_Background, HF_Signal APRIORI = spey.ExpectationType.apriori @@ -128,9 +130,9 @@ def compute_poi_upper_limits( for reg, stat_model in stat_models.items(): s95 = stat_model.poi_upper_limit(expected=tag) * xsection if record_to is None: - logger.debug(f"region {reg} s95{label} = {s95:.5f} pb") - regiondata[reg]["s95" + label] = "%-20.7f" % s95 + logger.debug("region %s s95%s = %.5f pb", reg, label, s95) + regiondata[reg][f"s95{label}"] = f"{s95:20.7f}" else: - logger.debug(f"{record_to}:: region {reg} s95{label} = {s95:.5f} pb") - regiondata[record_to][reg]["s95" + label] = "%-20.7f" % s95 + logger.debug("%s:: region %s s95%s = %.5f pb", record_to, reg, label, s95) + regiondata[record_to][reg][f"s95{label}"] = f"{s95:20.7f}" return regiondata