From 5dffeda0742e78f7dd20553c557df03c0ecb8bab Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Fri, 6 Dec 2024 00:47:16 +0100 Subject: [PATCH 01/15] Implement some changes from tutorial branch in myin --- .gitignore | 1 + .gitkeep | 1 + code_generation/code_generation.py | 13 +++++++------ code_generation/configuration.py | 7 +++---- code_generation/friend_trees.py | 2 ++ code_generation/producer.py | 24 ++++++++++++------------ 6 files changed, 26 insertions(+), 22 deletions(-) create mode 100644 .gitkeep diff --git a/.gitignore b/.gitignore index 75ebcbed..ac04f820 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ build/ build*/ log/ logs/ +generation_logs/ # ignore all analysis in the config folder apart from the unittest and the template analysis_configurations/* !analysis_configurations/unittest diff --git a/.gitkeep b/.gitkeep new file mode 100644 index 00000000..378eac25 --- /dev/null +++ b/.gitkeep @@ -0,0 +1 @@ +build diff --git a/code_generation/code_generation.py b/code_generation/code_generation.py index b50d0f5f..2da348bb 100644 --- a/code_generation/code_generation.py +++ b/code_generation/code_generation.py @@ -235,7 +235,7 @@ def sort_scopes(self) -> None: self.scopes = sorted( scope for scope in self.scopes if scope != self.global_scope ) - if self.global_scope is not None: + if self.global_scope: self.scopes = [self.global_scope] + self.scopes def get_git_status(self) -> None: @@ -471,7 +471,7 @@ def generate_subsets(self, scope: str) -> None: # two cases: # 1. no global scope exists: we have to use df0 as the input df # 2. there is a global scope, jump down - if self.global_scope is None: + if not self.global_scope: if is_first: self.subset_calls[scope].append( subset.call( @@ -490,6 +490,7 @@ def generate_subsets(self, scope: str) -> None: # 1. global scope: there we have to use df0 as the input df # 2. first call of all other scopes: we have to use the # last global df as the input df + # if scope == self.global_scope and is_first: if scope == self.global_scope and is_first: self.subset_calls[scope].append( subset.call( @@ -541,7 +542,7 @@ def generate_run_commands(self) -> str: ) # convert output lists to a set to remove duplicates - if self.global_scope is not None: + if self.global_scope: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -618,7 +619,7 @@ def set_output_quantities(self) -> str: output_quantities = "{" for scope in self._outputfiles_generated.keys(): # get the outputset for the scope - if self.global_scope is not None: + if self.global_scope: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -720,7 +721,7 @@ def set_shift_quantities_map(self) -> str: self.output_commands[scope].extend(output.get_leaves_of_scope(scope)) if len(self.output_commands[scope]) > 0 and scope != self.global_scope: # convert output lists to a set to remove duplicates - if self.global_scope is not None: + if self.global_scope: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -772,7 +773,7 @@ def set_quantities_shift_map(self) -> str: self.output_commands[scope].extend(output.get_leaves_of_scope(scope)) if len(self.output_commands[scope]) > 0 and scope != self.global_scope: # convert output lists to a set to remove duplicates - if self.global_scope is not None: + if self.global_scope: global_commands = self.output_commands[self.global_scope] else: global_commands = [] diff --git a/code_generation/configuration.py b/code_generation/configuration.py index 39f88c14..59c753af 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -62,6 +62,7 @@ def __init__( available_sample_types: Union[str, List[str]], available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], + global_scope: str = "global", ): """ @@ -88,7 +89,7 @@ def __init__( self.available_scopes = set(available_scopes) self.available_outputs: Dict[str, QuantitiesStore] = {} self.available_shifts: Dict[str, Set[str]] = {} - self.global_scope = "global" + self.global_scope = global_scope self.producers: TProducerStore = {} self.unpacked_producers: TProducerStore = {} @@ -513,9 +514,7 @@ def _remove_empty_scopes(self) -> None: # we have to use a seperate list, because we cannot modify the list while iterating over it without breaking stuff scopes_to_test = [scope for scope in self.scopes] for scope in scopes_to_test: - if (len(self.producers[scope]) == 0) or ( - scope not in self.selected_scopes and scope is not self.global_scope - ): + if (len(self.producers[scope]) == 0 or scope not in self.selected_scopes) and scope is not self.global_scope: log.warning("Removing unrequested / empty scope {}".format(scope)) self.scopes.remove(scope) del self.producers[scope] diff --git a/code_generation/friend_trees.py b/code_generation/friend_trees.py index 4300a4a4..2adc7e11 100644 --- a/code_generation/friend_trees.py +++ b/code_generation/friend_trees.py @@ -44,6 +44,7 @@ def __init__( available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], input_information: Union[str, List[str]], + global_scope: str = "global", ): """Generate a configuration for a FriendTree production. @@ -67,6 +68,7 @@ def __init__( available_sample_types, available_eras, available_scopes, + global_scope=global_scope, ) self.run_nominal = False # in the main constructor, the global scope is added to the scopes list. diff --git a/code_generation/producer.py b/code_generation/producer.py index 9372c091..414e17ce 100644 --- a/code_generation/producer.py +++ b/code_generation/producer.py @@ -47,7 +47,7 @@ def __init__( "Exception (%s): Argument 'input' must be a list or a dict!" % name ) raise Exception - if not isinstance(output, list) and output is not None: + if not isinstance(output, list) and output: log.error( "Exception (%s): Argument 'output' must be a list or None!" % name ) @@ -66,7 +66,7 @@ def __init__( inputdict = input self.input: Dict[str, List[q.Quantity]] = inputdict # keep track of variable dependencies - if self.output is not None: + if self.output: for scope in self.scopes: for input_quantity in self.input[scope]: for output_quantity in self.output: @@ -142,7 +142,7 @@ def reserve_output(self, scope: str) -> None: """ - if self.output is not None: + if self.output: for output_quantity in self.output: output_quantity.reserve_scope(scope) @@ -310,7 +310,7 @@ def writecalls( ) raise Exception calls = [self.writecall(config, scope)] - if self.output is not None: + if self.output: list_of_shifts = self.output[0].get_shifts( scope ) # all entries must have same shifts @@ -412,7 +412,7 @@ def writecalls( basecall = self.call calls: List[str] = [] shifts = ["nominal"] - if self.output is not None: + if self.output: shifts.extend(self.output[0].get_shifts(scope)) for shift in shifts: # check that all config lists (and output if applicable) have same length @@ -427,7 +427,7 @@ def writecalls( % (self.vec_configs[0], key) ) raise Exception - if self.output is not None and len(self.output) != n_versions: + if self.output and len(self.output) != n_versions: log.error( "{} expects either no output or same amount as entries in config lists !".format( self @@ -440,7 +440,7 @@ def writecalls( helper_dict: Dict[Any, Any] = {} for key in self.vec_configs: helper_dict[key] = config[shift][key][i] - if self.output is not None: + if self.output: helper_dict["output"] = ( '"' + self.output[i].get_leaf(shift, scope) + '"' ) @@ -666,7 +666,7 @@ def __init__( else: self.input = dict(input) # If call is provided, this is supposed to consume output of subproducers. Creating these internal products below: - if self.call is not None: + if self.call: log.debug("Constructing {}".format(self.name)) log.debug(" --> Scopes: {}".format(self.scopes)) for scope in self.scopes: @@ -831,9 +831,9 @@ def writecalls( for producer in self.producers[scope]: # duplicate outputs of vector subproducers if they were generated automatically if ( - self.call is not None + self.call and isinstance(producer, VectorProducer) - and producer.output is not None + and producer.output ): for i in range(len(config["nominal"][producer.vec_configs[0]]) - 1): producer.output.append( @@ -932,7 +932,7 @@ def CollectProducersOutput( ) -> Set[q.Quantity]: output: Set[q.Quantity] = set() for producer in producers: - if producer.output is not None: + if producer.output: output |= set(producer.output) if isinstance(producer, ProducerGroup): try: @@ -951,7 +951,7 @@ def CollectProducerOutput( producer: Union[ProducerGroup, Producer], scope: str ) -> Set[q.Quantity]: output: Set[q.Quantity] = set() - if producer.output is not None: + if producer.output: output |= set(producer.output) if isinstance(producer, ProducerGroup): try: From 4795460b66e86fde6fd228fa190c1cc549572826 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Fri, 6 Dec 2024 00:50:14 +0100 Subject: [PATCH 02/15] Only ignore build dir content, not dir itself --- .gitignore | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index ac04f820..a2c0b195 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ # folders -build/ -build*/ +build/* log/ logs/ generation_logs/ From a6c5f835861b1dd13391779f3143ecfbd7aefa88 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Fri, 6 Dec 2024 00:55:05 +0100 Subject: [PATCH 03/15] fix --- .gitignore | 1 + .gitkeep => build/.gitkeep | 0 2 files changed, 1 insertion(+) rename .gitkeep => build/.gitkeep (100%) diff --git a/.gitignore b/.gitignore index a2c0b195..535496ed 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # folders build/* +!build/.gitkeep log/ logs/ generation_logs/ diff --git a/.gitkeep b/build/.gitkeep similarity index 100% rename from .gitkeep rename to build/.gitkeep From 932c6dd7703e825fa72e4a3202572e4046fdc68c Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Fri, 6 Dec 2024 04:35:55 +0100 Subject: [PATCH 04/15] Black --- code_generation/configuration.py | 26 +++++++++++++++----------- code_generation/producer.py | 6 +----- code_generation/systematics.py | 12 ++++++------ 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/code_generation/configuration.py b/code_generation/configuration.py index 59c753af..c0ccb4f1 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -372,16 +372,16 @@ def add_shift( if scope in shift.get_scopes(): self._add_available_shift(shift, scope) shift.apply(scope) - self.shifts[scope][ - shift.shiftname - ] = self.resolve_modifiers(shift.get_shift_config(scope)) + self.shifts[scope][shift.shiftname] = ( + self.resolve_modifiers(shift.get_shift_config(scope)) + ) else: self._add_available_shift(shift, scope) shift.apply(self.global_scope) - self.shifts[scope][ - shift.shiftname - ] = self.resolve_modifiers( - shift.get_shift_config(self.global_scope) + self.shifts[scope][shift.shiftname] = ( + self.resolve_modifiers( + shift.get_shift_config(self.global_scope) + ) ) else: for scope in scopes_to_shift: @@ -514,7 +514,9 @@ def _remove_empty_scopes(self) -> None: # we have to use a seperate list, because we cannot modify the list while iterating over it without breaking stuff scopes_to_test = [scope for scope in self.scopes] for scope in scopes_to_test: - if (len(self.producers[scope]) == 0 or scope not in self.selected_scopes) and scope is not self.global_scope: + if ( + len(self.producers[scope]) == 0 or scope not in self.selected_scopes + ) and scope is not self.global_scope: log.warning("Removing unrequested / empty scope {}".format(scope)) self.scopes.remove(scope) del self.producers[scope] @@ -766,9 +768,11 @@ def report(self) -> None: total_quantities = [ sum( [ - len(self.config_parameters[scope][output.vec_config]) - if isinstance(output, QuantityGroup) - else 1 + ( + len(self.config_parameters[scope][output.vec_config]) + if isinstance(output, QuantityGroup) + else 1 + ) for output in self.outputs[scope] ] ) diff --git a/code_generation/producer.py b/code_generation/producer.py index 414e17ce..8e3d12ea 100644 --- a/code_generation/producer.py +++ b/code_generation/producer.py @@ -830,11 +830,7 @@ def writecalls( calls: List[str] = [] for producer in self.producers[scope]: # duplicate outputs of vector subproducers if they were generated automatically - if ( - self.call - and isinstance(producer, VectorProducer) - and producer.output - ): + if self.call and isinstance(producer, VectorProducer) and producer.output: for i in range(len(config["nominal"][producer.vec_configs[0]]) - 1): producer.output.append( producer.output[0].copy( diff --git a/code_generation/systematics.py b/code_generation/systematics.py index 86448999..027011eb 100644 --- a/code_generation/systematics.py +++ b/code_generation/systematics.py @@ -82,9 +82,9 @@ def __init__( ] = self.expand_producer_dict_keys(ignore_producers) self.producers: TProducerStore = {} self.ignore_producers: TProducerStore = {} - self.shift_config: Dict[ - str, TConfiguration - ] = self.expand_configuration_dict_keys(shift_config) + self.shift_config: Dict[str, TConfiguration] = ( + self.expand_configuration_dict_keys(shift_config) + ) self.scopes: Set[str] = self.determine_scopes(scopes) self.validate() @@ -403,9 +403,9 @@ def __init__( scopes: List of scopes that are affected by the systematic shift. """ super().__init__(name, {}, {}, scopes, {}) - self.quantity_change: Dict[ - NanoAODQuantity, Union[str, NanoAODQuantity] - ] = quantity_change + self.quantity_change: Dict[NanoAODQuantity, Union[str, NanoAODQuantity]] = ( + quantity_change + ) self.quantities: Set[NanoAODQuantity] = set(quantity_change.keys()) def apply(self, scope: str) -> None: From 542ca2cdd09a8dc7cbc11bb865fd47daaaf72752 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Mon, 9 Dec 2024 03:50:44 +0100 Subject: [PATCH 05/15] Make global_scope definition in analysis config boolean (True->'global', False->None). Introduce more coverage for potential types in multiple 'if cases'. The handling of empty lists and boolean variables can be somewhat finicky due to the way some cases are set up. Treat with caution. --- code_generation/code_generation.py | 13 ++++---- code_generation/configuration.py | 33 +++++++++---------- code_generation/exceptions.py | 3 +- code_generation/friend_trees.py | 2 -- code_generation/helpers.py | 24 ++++++++++++++ code_generation/modifiers.py | 5 +-- code_generation/optimizer.py | 7 ++-- code_generation/producer.py | 53 ++++++++++++++++-------------- code_generation/systematics.py | 5 +-- 9 files changed, 87 insertions(+), 58 deletions(-) create mode 100644 code_generation/helpers.py diff --git a/code_generation/code_generation.py b/code_generation/code_generation.py index 2da348bb..b50d0f5f 100644 --- a/code_generation/code_generation.py +++ b/code_generation/code_generation.py @@ -235,7 +235,7 @@ def sort_scopes(self) -> None: self.scopes = sorted( scope for scope in self.scopes if scope != self.global_scope ) - if self.global_scope: + if self.global_scope is not None: self.scopes = [self.global_scope] + self.scopes def get_git_status(self) -> None: @@ -471,7 +471,7 @@ def generate_subsets(self, scope: str) -> None: # two cases: # 1. no global scope exists: we have to use df0 as the input df # 2. there is a global scope, jump down - if not self.global_scope: + if self.global_scope is None: if is_first: self.subset_calls[scope].append( subset.call( @@ -490,7 +490,6 @@ def generate_subsets(self, scope: str) -> None: # 1. global scope: there we have to use df0 as the input df # 2. first call of all other scopes: we have to use the # last global df as the input df - # if scope == self.global_scope and is_first: if scope == self.global_scope and is_first: self.subset_calls[scope].append( subset.call( @@ -542,7 +541,7 @@ def generate_run_commands(self) -> str: ) # convert output lists to a set to remove duplicates - if self.global_scope: + if self.global_scope is not None: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -619,7 +618,7 @@ def set_output_quantities(self) -> str: output_quantities = "{" for scope in self._outputfiles_generated.keys(): # get the outputset for the scope - if self.global_scope: + if self.global_scope is not None: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -721,7 +720,7 @@ def set_shift_quantities_map(self) -> str: self.output_commands[scope].extend(output.get_leaves_of_scope(scope)) if len(self.output_commands[scope]) > 0 and scope != self.global_scope: # convert output lists to a set to remove duplicates - if self.global_scope: + if self.global_scope is not None: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -773,7 +772,7 @@ def set_quantities_shift_map(self) -> str: self.output_commands[scope].extend(output.get_leaves_of_scope(scope)) if len(self.output_commands[scope]) > 0 and scope != self.global_scope: # convert output lists to a set to remove duplicates - if self.global_scope: + if self.global_scope is not None: global_commands = self.output_commands[self.global_scope] else: global_commands = [] diff --git a/code_generation/configuration.py b/code_generation/configuration.py index c0ccb4f1..f440acc8 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -28,6 +28,7 @@ ) from code_generation.rules import ProducerRule, RemoveProducer from code_generation.systematics import SystematicShift, SystematicShiftByQuantity +from code_generation.helpers import is_empty log = logging.getLogger(__name__) # type aliases @@ -62,7 +63,7 @@ def __init__( available_sample_types: Union[str, List[str]], available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], - global_scope: str = "global", + global_scope: str = True, ): """ @@ -89,7 +90,10 @@ def __init__( self.available_scopes = set(available_scopes) self.available_outputs: Dict[str, QuantitiesStore] = {} self.available_shifts: Dict[str, Set[str]] = {} - self.global_scope = global_scope + if global_scope: + self.global_scope = "global" + else: + self.global_scope = None self.producers: TProducerStore = {} self.unpacked_producers: TProducerStore = {} @@ -258,12 +262,12 @@ def unpack_producergroups( """ if isinstance(producers, list): - # we always want to know the toplevel producergroup, so if the parent is None, we set it to the first producer. + # we always want to know the toplevel producergroup, so if the parent evaluates to false, we set it to the first producer. # If a prent is given, we set it to the parent, since this means we are in a producergroup. This is important if we # have nested producergroups, this way every producer is assigned to the outermost producergroup, which is important for the # potential removal of a single producer. for producer in producers: - if parent is None: + if is_empty(parent): parent_producer = producer else: parent_producer = parent @@ -277,7 +281,7 @@ def unpack_producergroups( if isinstance(producers, ProducerGroup): log.debug("{} Unpacking ".format(" " * depth)) for sub_producer in producers.producers[scope]: - if parent is None: + if is_empty(parent): parent_producer = producers else: parent_producer = parent @@ -288,7 +292,7 @@ def unpack_producergroups( depth=depth + 1, ) else: - if parent is None: + if is_empty(parent): log.debug("{} {}".format(" " * depth, producers)) self.unpacked_producers[scope][producers] = producers else: @@ -334,11 +338,11 @@ def add_shift( Returns: None """ - if exclude_samples is not None and samples is not None: + if not is_empty(exclude_samples) and not is_empty(samples): raise ConfigurationError( f"You cannot use samples and exclude_samples at the same time -> Shift {shift}, samples {samples}, exclude_samples {exclude_samples}" ) - if samples is not None: + if not is_empty(samples): if isinstance(samples, str): samples = [samples] for sample in samples: @@ -346,7 +350,7 @@ def add_shift( raise ConfigurationError( f"Sampletype {sample} is not available -> Shift {shift}, available_sample_types {self.available_sample_types}, sample_types {samples}" ) - if exclude_samples is not None: + if not is_empty(exclude_samples): if isinstance(exclude_samples, str): exclude_samples = [exclude_samples] for excluded_sample in exclude_samples: @@ -361,7 +365,7 @@ def add_shift( raise TypeError("shift must be of type SystematicShift") if isinstance(samples, str): samples = [samples] - if samples is None or self.sample in samples: + if is_empty(samples) or self.sample in samples: scopes_to_shift = [ scope for scope in shift.get_scopes() if scope in self.scopes ] @@ -614,7 +618,7 @@ def _remove_empty_configkeys(self, config) -> None: Returns: None """ - for key in config: + for key in list(config): if isinstance(config[key], dict): self._remove_empty_configkeys(config[key]) # special case for extended vector producers, here we can have a list, that contains empty dicts @@ -632,12 +636,7 @@ def _remove_empty_configkeys(self, config) -> None: if isinstance(value, dict): self._remove_empty_configkeys(value) - elif ( - config[key] is None - or config[key] == "" - or config[key] == [] - or config[key] == {} - ): + elif is_empty(config[key]): log.info( "Removing {} since it is an empty configuration parameter".format( key diff --git a/code_generation/exceptions.py b/code_generation/exceptions.py index 9695a522..e598299c 100644 --- a/code_generation/exceptions.py +++ b/code_generation/exceptions.py @@ -1,6 +1,7 @@ from __future__ import annotations # needed for type annotations in > python 3.7 from typing import List, Set, Union from code_generation.quantity import Quantity +from code_generation.helpers import is_empty class ConfigurationError(Exception): @@ -108,7 +109,7 @@ class InvalidShiftError(ConfigurationError): """ def __init__(self, shift: str, sample: str, scope: Union[str, None] = None): - if scope is None: + if is_empty(scope): self.message = "Shift {} is not setup properly or not available for sampletype {}".format( shift, sample ) diff --git a/code_generation/friend_trees.py b/code_generation/friend_trees.py index 2adc7e11..4300a4a4 100644 --- a/code_generation/friend_trees.py +++ b/code_generation/friend_trees.py @@ -44,7 +44,6 @@ def __init__( available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], input_information: Union[str, List[str]], - global_scope: str = "global", ): """Generate a configuration for a FriendTree production. @@ -68,7 +67,6 @@ def __init__( available_sample_types, available_eras, available_scopes, - global_scope=global_scope, ) self.run_nominal = False # in the main constructor, the global scope is added to the scopes list. diff --git a/code_generation/helpers.py b/code_generation/helpers.py new file mode 100644 index 00000000..75a5ce04 --- /dev/null +++ b/code_generation/helpers.py @@ -0,0 +1,24 @@ +from __future__ import annotations # needed for type annotations in > python 3.7 + +# File with helper functions for the CROWN code generation + + +def is_empty(value): + """ + Check if a value is empty. + + Args: + value: The value that should be checked. + + Returns: + bool: Whether the input value is considered 'empty' + """ + # List of all values that should be considered empty despite not having a length. + empty_values = [None] + + try: + length = len(value) + except TypeError: + length = -1 + bool_val = value in empty_values or length == 0 + return bool_val diff --git a/code_generation/modifiers.py b/code_generation/modifiers.py index 1057e42d..7976a606 100644 --- a/code_generation/modifiers.py +++ b/code_generation/modifiers.py @@ -4,6 +4,7 @@ SampleConfigurationError, EraConfigurationError, ) +from code_generation.helpers import is_empty ConfigurationParameters = Union[str, int, float, bool] @@ -71,7 +72,7 @@ def apply(self, sample: str) -> ModifierResolved: """ if sample in self.samples: return self.modifier_dict[sample] - elif self.default is not None: + elif not is_empty(self.default): return self.default else: raise SampleConfigurationError(sample, self.samples) @@ -106,7 +107,7 @@ def apply(self, era: str) -> ModifierResolved: """ if era in self.eras: return self.modifier_dict[era] - elif self.default is not None: + elif not is_empty(self.default): return self.default else: raise EraConfigurationError(era, self.eras) diff --git a/code_generation/optimizer.py b/code_generation/optimizer.py index 6864084c..fa14e3d4 100644 --- a/code_generation/optimizer.py +++ b/code_generation/optimizer.py @@ -1,6 +1,7 @@ from __future__ import annotations # needed for type annotations in > python 3.7 from code_generation.quantity import NanoAODQuantity, Quantity from code_generation.producer import Filter, BaseFilter, Producer, ProducerGroup +from code_generation.helpers import is_empty from typing import Set, Tuple, Union, List import logging @@ -79,7 +80,7 @@ def get_global_outputs(self) -> List[Quantity]: """ outputs: List[Quantity] = [] for producer in self.global_producers: - if producer.get_outputs("global") is not None: + if not is_empty(producer.get_outputs("global")): outputs.extend( [ quantity @@ -155,7 +156,7 @@ def Optimize(self) -> None: log.error("Please check, if all needed producers are activated") raise Exception wrongProducer, wrong_inputs = self.check_ordering() - if wrongProducer is not None: + if not is_empty(wrongProducer): producers_to_relocate = self.find_inputs(wrongProducer, wrong_inputs) # if len(producers_to_relocate) == 0: # self.optimized = True @@ -197,7 +198,7 @@ def check_ordering( outputs = self.global_outputs for producer_to_check in self.ordering: temp_outputs = producer_to_check.get_outputs(self.scope) - if temp_outputs is not None: + if not is_empty(temp_outputs): outputs.extend( [ quantity diff --git a/code_generation/producer.py b/code_generation/producer.py index 8e3d12ea..38c385fb 100644 --- a/code_generation/producer.py +++ b/code_generation/producer.py @@ -8,6 +8,7 @@ InvalidProducerConfigurationError, ConfigurationError, ) +from code_generation.helpers import is_empty import code_generation.quantity as q @@ -47,7 +48,7 @@ def __init__( "Exception (%s): Argument 'input' must be a list or a dict!" % name ) raise Exception - if not isinstance(output, list) and output: + if not isinstance(output, list) and output is not None: log.error( "Exception (%s): Argument 'output' must be a list or None!" % name ) @@ -66,7 +67,7 @@ def __init__( inputdict = input self.input: Dict[str, List[q.Quantity]] = inputdict # keep track of variable dependencies - if self.output: + if not is_empty(self.output): for scope in self.scopes: for input_quantity in self.input[scope]: for output_quantity in self.output: @@ -75,7 +76,7 @@ def __init__( log.debug("| Producer: {}".format(self.name)) log.debug("| Call: {}".format(self.call)) for scope in self.scopes: - if self.input[scope] is None: + if is_empty(self.input[scope]): log.debug("| Inputs ({}): None".format(scope)) else: log.debug( @@ -83,7 +84,7 @@ def __init__( scope, [input.name for input in self.input[scope]] ) ) - if self.output is None: + if is_empty(self.output): log.debug("| Output: None") else: log.debug("| Outputs: {}".format([output.name for output in self.output])) @@ -142,7 +143,7 @@ def reserve_output(self, scope: str) -> None: """ - if self.output: + if not is_empty(self.output): for output_quantity in self.output: output_quantity.reserve_scope(scope) @@ -162,10 +163,10 @@ def shift(self, name: str, scope: str = "global") -> None: % (name, self.name, scope) ) raise Exception - if self.output is None: + if is_empty(self.output): log.error( - "Exception (%s): output None cannot be shifted ! How did you end up here ?" - % name + "Exception (%s): output %s cannot be shifted ! How did you end up here ?" + % (name, self.output) ) raise Exception for entry in self.output: @@ -206,10 +207,10 @@ def ignore_shift(self, name: str, scope: str = "global") -> None: % (name, self.name, scope) ) raise Exception - if self.output is None: + if is_empty(self.output): log.error( - "Exception (%s): output None cannot be shifted ! How did you end up here ?" - % name + "Exception (%s): output %s cannot be shifted ! How did you end up here ?" + % (name, self.output) ) raise Exception for entry in self.output: @@ -231,7 +232,7 @@ def writecall( Returns: str: The generated C++ call """ - if self.output is None: + if is_empty(self.output): config[shift]["output"] = "" config[shift]["output_vec"] = "" else: @@ -310,7 +311,7 @@ def writecalls( ) raise Exception calls = [self.writecall(config, scope)] - if self.output: + if not is_empty(self.output): list_of_shifts = self.output[0].get_shifts( scope ) # all entries must have same shifts @@ -358,7 +359,7 @@ def get_outputs(self, scope: str) -> List[Union[q.QuantityGroup, q.Quantity]]: ) ) raise Exception - if self.output is None: + if is_empty(self.output): return [] else: return self.output @@ -427,7 +428,7 @@ def writecalls( % (self.vec_configs[0], key) ) raise Exception - if self.output and len(self.output) != n_versions: + if not is_empty(self.output) and len(self.output) != n_versions: log.error( "{} expects either no output or same amount as entries in config lists !".format( self @@ -440,7 +441,7 @@ def writecalls( helper_dict: Dict[Any, Any] = {} for key in self.vec_configs: helper_dict[key] = config[shift][key][i] - if self.output: + if not is_empty(self.output): helper_dict["output"] = ( '"' + self.output[i].get_leaf(shift, scope) + '"' ) @@ -482,7 +483,7 @@ def __init__( # set the vec config key of the quantity group quantity_group.set_vec_config(vec_config) super().__init__(name, call, input, [quantity_group], scope) - if self.output is None: + if is_empty(self.output): raise InvalidProducerConfigurationError(self.name) # add the vec config to the parameters of the producer for scope in self.scopes: @@ -496,7 +497,7 @@ def __repr__(self) -> str: @property def output_group(self) -> q.QuantityGroup: - if self.output is None: + if is_empty(self.output): raise Exception("ExtendedVectorProducer has no output!") if not isinstance(self.output[0], q.QuantityGroup): log.error("ExtendedVectorProducer expects a QuantityGroup as output!") @@ -520,7 +521,7 @@ def writecalls( """ n_versions = len(config["nominal"][self.vec_config]) log.debug("Number of extended producers to be created {}".format(n_versions)) - if self.output is None: + if is_empty(self.output): raise InvalidProducerConfigurationError(self.name) if not isinstance(self.output[0], q.QuantityGroup): log.error("ExtendedVectorProducer expects a QuantityGroup as output!") @@ -666,7 +667,7 @@ def __init__( else: self.input = dict(input) # If call is provided, this is supposed to consume output of subproducers. Creating these internal products below: - if self.call: + if not is_empty(self.call): log.debug("Constructing {}".format(self.name)) log.debug(" --> Scopes: {}".format(self.scopes)) for scope in self.scopes: @@ -709,7 +710,7 @@ def __init__( log.debug("| ProducerGroup: {}".format(self.name)) log.debug("| Call: {}".format(self.call)) for scope in self.scopes: - if self.input[scope] is None: + if is_empty(self.input[scope]): log.debug("| Inputs ({}): None".format(scope)) else: log.debug( @@ -717,7 +718,7 @@ def __init__( scope, [input.name for input in self.input[scope]] ) ) - if self.output is None: + if is_empty(self.output): log.debug("| Output: None") else: log.debug("| Outputs: {}".format([output.name for output in self.output])) @@ -830,7 +831,11 @@ def writecalls( calls: List[str] = [] for producer in self.producers[scope]: # duplicate outputs of vector subproducers if they were generated automatically - if self.call and isinstance(producer, VectorProducer) and producer.output: + if ( + not is_empty(self.call) + and isinstance(producer, VectorProducer) + and not is_empty(producer.output) + ): for i in range(len(config["nominal"][producer.vec_configs[0]]) - 1): producer.output.append( producer.output[0].copy( @@ -928,7 +933,7 @@ def CollectProducersOutput( ) -> Set[q.Quantity]: output: Set[q.Quantity] = set() for producer in producers: - if producer.output: + if not is_empty(producer.output): output |= set(producer.output) if isinstance(producer, ProducerGroup): try: diff --git a/code_generation/systematics.py b/code_generation/systematics.py index 027011eb..b106de7e 100644 --- a/code_generation/systematics.py +++ b/code_generation/systematics.py @@ -11,6 +11,7 @@ TProducerStore, ) from code_generation.quantity import NanoAODQuantity +from code_generation.helpers import is_empty log = logging.getLogger(__name__) @@ -181,7 +182,7 @@ def determine_scopes(self, scopes: Union[List[str], str, None]) -> Set[str]: Returns: set: Set of scopes that are affected by the systematic shift. """ - if scopes is None: + if is_empty(scopes): scope_set: Set[str] = ( set(self.shift_config.keys()) | set(self.input_producers.keys()) @@ -302,7 +303,7 @@ def add_ignore_producer( Returns: None """ - if scopes is None: + if is_empty(scopes): scopes = self.scopes if isinstance(scopes, str): scopes = set(scopes) From 9e9ea32c00f174c23d1610dbe459e51acb48dd68 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Mon, 9 Dec 2024 04:09:15 +0100 Subject: [PATCH 06/15] Black --- code_generation/configuration.py | 12 +++++++----- code_generation/systematics.py | 12 ++++++------ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/code_generation/configuration.py b/code_generation/configuration.py index f440acc8..ba2f9e7c 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -376,9 +376,9 @@ def add_shift( if scope in shift.get_scopes(): self._add_available_shift(shift, scope) shift.apply(scope) - self.shifts[scope][shift.shiftname] = ( - self.resolve_modifiers(shift.get_shift_config(scope)) - ) + self.shifts[scope][ + shift.shiftname + ] = self.resolve_modifiers(shift.get_shift_config(scope)) else: self._add_available_shift(shift, scope) shift.apply(self.global_scope) @@ -391,8 +391,10 @@ def add_shift( for scope in scopes_to_shift: self._add_available_shift(shift, scope) shift.apply(scope) - self.shifts[scope][shift.shiftname] = self.resolve_modifiers( - shift.get_shift_config(scope) + self.shifts[scope][ + shift.shiftname + ] = self.resolve_modifiers( + shift.get_shift_config(self.global_scope) ) def _is_valid_shift( diff --git a/code_generation/systematics.py b/code_generation/systematics.py index b106de7e..a0be7241 100644 --- a/code_generation/systematics.py +++ b/code_generation/systematics.py @@ -83,9 +83,9 @@ def __init__( ] = self.expand_producer_dict_keys(ignore_producers) self.producers: TProducerStore = {} self.ignore_producers: TProducerStore = {} - self.shift_config: Dict[str, TConfiguration] = ( - self.expand_configuration_dict_keys(shift_config) - ) + self.shift_config: Dict[ + str, TConfiguration + ] = self.expand_configuration_dict_keys(shift_config) self.scopes: Set[str] = self.determine_scopes(scopes) self.validate() @@ -404,9 +404,9 @@ def __init__( scopes: List of scopes that are affected by the systematic shift. """ super().__init__(name, {}, {}, scopes, {}) - self.quantity_change: Dict[NanoAODQuantity, Union[str, NanoAODQuantity]] = ( - quantity_change - ) + self.quantity_change: Dict[ + NanoAODQuantity, Union[str, NanoAODQuantity] + ] = quantity_change self.quantities: Set[NanoAODQuantity] = set(quantity_change.keys()) def apply(self, scope: str) -> None: From f34aa06a8b0eb8948e74baf3a3c64c2eefddb531 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Mon, 9 Dec 2024 04:12:49 +0100 Subject: [PATCH 07/15] Revert "Make global_scope definition in analysis config boolean (True->'global', False->None). Introduce more coverage for potential types in multiple 'if cases'. The handling of empty lists and boolean variables can be somewhat finicky due to the way some cases are set up. Treat with caution." This reverts commit 542ca2cdd09a8dc7cbc11bb865fd47daaaf72752. --- code_generation/code_generation.py | 13 ++++---- code_generation/configuration.py | 33 ++++++++++--------- code_generation/exceptions.py | 3 +- code_generation/friend_trees.py | 2 ++ code_generation/helpers.py | 24 -------------- code_generation/modifiers.py | 5 ++- code_generation/optimizer.py | 7 ++-- code_generation/producer.py | 53 ++++++++++++++---------------- code_generation/systematics.py | 5 ++- 9 files changed, 58 insertions(+), 87 deletions(-) delete mode 100644 code_generation/helpers.py diff --git a/code_generation/code_generation.py b/code_generation/code_generation.py index b50d0f5f..2da348bb 100644 --- a/code_generation/code_generation.py +++ b/code_generation/code_generation.py @@ -235,7 +235,7 @@ def sort_scopes(self) -> None: self.scopes = sorted( scope for scope in self.scopes if scope != self.global_scope ) - if self.global_scope is not None: + if self.global_scope: self.scopes = [self.global_scope] + self.scopes def get_git_status(self) -> None: @@ -471,7 +471,7 @@ def generate_subsets(self, scope: str) -> None: # two cases: # 1. no global scope exists: we have to use df0 as the input df # 2. there is a global scope, jump down - if self.global_scope is None: + if not self.global_scope: if is_first: self.subset_calls[scope].append( subset.call( @@ -490,6 +490,7 @@ def generate_subsets(self, scope: str) -> None: # 1. global scope: there we have to use df0 as the input df # 2. first call of all other scopes: we have to use the # last global df as the input df + # if scope == self.global_scope and is_first: if scope == self.global_scope and is_first: self.subset_calls[scope].append( subset.call( @@ -541,7 +542,7 @@ def generate_run_commands(self) -> str: ) # convert output lists to a set to remove duplicates - if self.global_scope is not None: + if self.global_scope: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -618,7 +619,7 @@ def set_output_quantities(self) -> str: output_quantities = "{" for scope in self._outputfiles_generated.keys(): # get the outputset for the scope - if self.global_scope is not None: + if self.global_scope: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -720,7 +721,7 @@ def set_shift_quantities_map(self) -> str: self.output_commands[scope].extend(output.get_leaves_of_scope(scope)) if len(self.output_commands[scope]) > 0 and scope != self.global_scope: # convert output lists to a set to remove duplicates - if self.global_scope is not None: + if self.global_scope: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -772,7 +773,7 @@ def set_quantities_shift_map(self) -> str: self.output_commands[scope].extend(output.get_leaves_of_scope(scope)) if len(self.output_commands[scope]) > 0 and scope != self.global_scope: # convert output lists to a set to remove duplicates - if self.global_scope is not None: + if self.global_scope: global_commands = self.output_commands[self.global_scope] else: global_commands = [] diff --git a/code_generation/configuration.py b/code_generation/configuration.py index ba2f9e7c..cb88bbdf 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -28,7 +28,6 @@ ) from code_generation.rules import ProducerRule, RemoveProducer from code_generation.systematics import SystematicShift, SystematicShiftByQuantity -from code_generation.helpers import is_empty log = logging.getLogger(__name__) # type aliases @@ -63,7 +62,7 @@ def __init__( available_sample_types: Union[str, List[str]], available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], - global_scope: str = True, + global_scope: str = "global", ): """ @@ -90,10 +89,7 @@ def __init__( self.available_scopes = set(available_scopes) self.available_outputs: Dict[str, QuantitiesStore] = {} self.available_shifts: Dict[str, Set[str]] = {} - if global_scope: - self.global_scope = "global" - else: - self.global_scope = None + self.global_scope = global_scope self.producers: TProducerStore = {} self.unpacked_producers: TProducerStore = {} @@ -262,12 +258,12 @@ def unpack_producergroups( """ if isinstance(producers, list): - # we always want to know the toplevel producergroup, so if the parent evaluates to false, we set it to the first producer. + # we always want to know the toplevel producergroup, so if the parent is None, we set it to the first producer. # If a prent is given, we set it to the parent, since this means we are in a producergroup. This is important if we # have nested producergroups, this way every producer is assigned to the outermost producergroup, which is important for the # potential removal of a single producer. for producer in producers: - if is_empty(parent): + if parent is None: parent_producer = producer else: parent_producer = parent @@ -281,7 +277,7 @@ def unpack_producergroups( if isinstance(producers, ProducerGroup): log.debug("{} Unpacking ".format(" " * depth)) for sub_producer in producers.producers[scope]: - if is_empty(parent): + if parent is None: parent_producer = producers else: parent_producer = parent @@ -292,7 +288,7 @@ def unpack_producergroups( depth=depth + 1, ) else: - if is_empty(parent): + if parent is None: log.debug("{} {}".format(" " * depth, producers)) self.unpacked_producers[scope][producers] = producers else: @@ -338,11 +334,11 @@ def add_shift( Returns: None """ - if not is_empty(exclude_samples) and not is_empty(samples): + if exclude_samples is not None and samples is not None: raise ConfigurationError( f"You cannot use samples and exclude_samples at the same time -> Shift {shift}, samples {samples}, exclude_samples {exclude_samples}" ) - if not is_empty(samples): + if samples is not None: if isinstance(samples, str): samples = [samples] for sample in samples: @@ -350,7 +346,7 @@ def add_shift( raise ConfigurationError( f"Sampletype {sample} is not available -> Shift {shift}, available_sample_types {self.available_sample_types}, sample_types {samples}" ) - if not is_empty(exclude_samples): + if exclude_samples is not None: if isinstance(exclude_samples, str): exclude_samples = [exclude_samples] for excluded_sample in exclude_samples: @@ -365,7 +361,7 @@ def add_shift( raise TypeError("shift must be of type SystematicShift") if isinstance(samples, str): samples = [samples] - if is_empty(samples) or self.sample in samples: + if samples is None or self.sample in samples: scopes_to_shift = [ scope for scope in shift.get_scopes() if scope in self.scopes ] @@ -620,7 +616,7 @@ def _remove_empty_configkeys(self, config) -> None: Returns: None """ - for key in list(config): + for key in config: if isinstance(config[key], dict): self._remove_empty_configkeys(config[key]) # special case for extended vector producers, here we can have a list, that contains empty dicts @@ -638,7 +634,12 @@ def _remove_empty_configkeys(self, config) -> None: if isinstance(value, dict): self._remove_empty_configkeys(value) - elif is_empty(config[key]): + elif ( + config[key] is None + or config[key] == "" + or config[key] == [] + or config[key] == {} + ): log.info( "Removing {} since it is an empty configuration parameter".format( key diff --git a/code_generation/exceptions.py b/code_generation/exceptions.py index e598299c..9695a522 100644 --- a/code_generation/exceptions.py +++ b/code_generation/exceptions.py @@ -1,7 +1,6 @@ from __future__ import annotations # needed for type annotations in > python 3.7 from typing import List, Set, Union from code_generation.quantity import Quantity -from code_generation.helpers import is_empty class ConfigurationError(Exception): @@ -109,7 +108,7 @@ class InvalidShiftError(ConfigurationError): """ def __init__(self, shift: str, sample: str, scope: Union[str, None] = None): - if is_empty(scope): + if scope is None: self.message = "Shift {} is not setup properly or not available for sampletype {}".format( shift, sample ) diff --git a/code_generation/friend_trees.py b/code_generation/friend_trees.py index 4300a4a4..2adc7e11 100644 --- a/code_generation/friend_trees.py +++ b/code_generation/friend_trees.py @@ -44,6 +44,7 @@ def __init__( available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], input_information: Union[str, List[str]], + global_scope: str = "global", ): """Generate a configuration for a FriendTree production. @@ -67,6 +68,7 @@ def __init__( available_sample_types, available_eras, available_scopes, + global_scope=global_scope, ) self.run_nominal = False # in the main constructor, the global scope is added to the scopes list. diff --git a/code_generation/helpers.py b/code_generation/helpers.py deleted file mode 100644 index 75a5ce04..00000000 --- a/code_generation/helpers.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import annotations # needed for type annotations in > python 3.7 - -# File with helper functions for the CROWN code generation - - -def is_empty(value): - """ - Check if a value is empty. - - Args: - value: The value that should be checked. - - Returns: - bool: Whether the input value is considered 'empty' - """ - # List of all values that should be considered empty despite not having a length. - empty_values = [None] - - try: - length = len(value) - except TypeError: - length = -1 - bool_val = value in empty_values or length == 0 - return bool_val diff --git a/code_generation/modifiers.py b/code_generation/modifiers.py index 7976a606..1057e42d 100644 --- a/code_generation/modifiers.py +++ b/code_generation/modifiers.py @@ -4,7 +4,6 @@ SampleConfigurationError, EraConfigurationError, ) -from code_generation.helpers import is_empty ConfigurationParameters = Union[str, int, float, bool] @@ -72,7 +71,7 @@ def apply(self, sample: str) -> ModifierResolved: """ if sample in self.samples: return self.modifier_dict[sample] - elif not is_empty(self.default): + elif self.default is not None: return self.default else: raise SampleConfigurationError(sample, self.samples) @@ -107,7 +106,7 @@ def apply(self, era: str) -> ModifierResolved: """ if era in self.eras: return self.modifier_dict[era] - elif not is_empty(self.default): + elif self.default is not None: return self.default else: raise EraConfigurationError(era, self.eras) diff --git a/code_generation/optimizer.py b/code_generation/optimizer.py index fa14e3d4..6864084c 100644 --- a/code_generation/optimizer.py +++ b/code_generation/optimizer.py @@ -1,7 +1,6 @@ from __future__ import annotations # needed for type annotations in > python 3.7 from code_generation.quantity import NanoAODQuantity, Quantity from code_generation.producer import Filter, BaseFilter, Producer, ProducerGroup -from code_generation.helpers import is_empty from typing import Set, Tuple, Union, List import logging @@ -80,7 +79,7 @@ def get_global_outputs(self) -> List[Quantity]: """ outputs: List[Quantity] = [] for producer in self.global_producers: - if not is_empty(producer.get_outputs("global")): + if producer.get_outputs("global") is not None: outputs.extend( [ quantity @@ -156,7 +155,7 @@ def Optimize(self) -> None: log.error("Please check, if all needed producers are activated") raise Exception wrongProducer, wrong_inputs = self.check_ordering() - if not is_empty(wrongProducer): + if wrongProducer is not None: producers_to_relocate = self.find_inputs(wrongProducer, wrong_inputs) # if len(producers_to_relocate) == 0: # self.optimized = True @@ -198,7 +197,7 @@ def check_ordering( outputs = self.global_outputs for producer_to_check in self.ordering: temp_outputs = producer_to_check.get_outputs(self.scope) - if not is_empty(temp_outputs): + if temp_outputs is not None: outputs.extend( [ quantity diff --git a/code_generation/producer.py b/code_generation/producer.py index 38c385fb..8e3d12ea 100644 --- a/code_generation/producer.py +++ b/code_generation/producer.py @@ -8,7 +8,6 @@ InvalidProducerConfigurationError, ConfigurationError, ) -from code_generation.helpers import is_empty import code_generation.quantity as q @@ -48,7 +47,7 @@ def __init__( "Exception (%s): Argument 'input' must be a list or a dict!" % name ) raise Exception - if not isinstance(output, list) and output is not None: + if not isinstance(output, list) and output: log.error( "Exception (%s): Argument 'output' must be a list or None!" % name ) @@ -67,7 +66,7 @@ def __init__( inputdict = input self.input: Dict[str, List[q.Quantity]] = inputdict # keep track of variable dependencies - if not is_empty(self.output): + if self.output: for scope in self.scopes: for input_quantity in self.input[scope]: for output_quantity in self.output: @@ -76,7 +75,7 @@ def __init__( log.debug("| Producer: {}".format(self.name)) log.debug("| Call: {}".format(self.call)) for scope in self.scopes: - if is_empty(self.input[scope]): + if self.input[scope] is None: log.debug("| Inputs ({}): None".format(scope)) else: log.debug( @@ -84,7 +83,7 @@ def __init__( scope, [input.name for input in self.input[scope]] ) ) - if is_empty(self.output): + if self.output is None: log.debug("| Output: None") else: log.debug("| Outputs: {}".format([output.name for output in self.output])) @@ -143,7 +142,7 @@ def reserve_output(self, scope: str) -> None: """ - if not is_empty(self.output): + if self.output: for output_quantity in self.output: output_quantity.reserve_scope(scope) @@ -163,10 +162,10 @@ def shift(self, name: str, scope: str = "global") -> None: % (name, self.name, scope) ) raise Exception - if is_empty(self.output): + if self.output is None: log.error( - "Exception (%s): output %s cannot be shifted ! How did you end up here ?" - % (name, self.output) + "Exception (%s): output None cannot be shifted ! How did you end up here ?" + % name ) raise Exception for entry in self.output: @@ -207,10 +206,10 @@ def ignore_shift(self, name: str, scope: str = "global") -> None: % (name, self.name, scope) ) raise Exception - if is_empty(self.output): + if self.output is None: log.error( - "Exception (%s): output %s cannot be shifted ! How did you end up here ?" - % (name, self.output) + "Exception (%s): output None cannot be shifted ! How did you end up here ?" + % name ) raise Exception for entry in self.output: @@ -232,7 +231,7 @@ def writecall( Returns: str: The generated C++ call """ - if is_empty(self.output): + if self.output is None: config[shift]["output"] = "" config[shift]["output_vec"] = "" else: @@ -311,7 +310,7 @@ def writecalls( ) raise Exception calls = [self.writecall(config, scope)] - if not is_empty(self.output): + if self.output: list_of_shifts = self.output[0].get_shifts( scope ) # all entries must have same shifts @@ -359,7 +358,7 @@ def get_outputs(self, scope: str) -> List[Union[q.QuantityGroup, q.Quantity]]: ) ) raise Exception - if is_empty(self.output): + if self.output is None: return [] else: return self.output @@ -428,7 +427,7 @@ def writecalls( % (self.vec_configs[0], key) ) raise Exception - if not is_empty(self.output) and len(self.output) != n_versions: + if self.output and len(self.output) != n_versions: log.error( "{} expects either no output or same amount as entries in config lists !".format( self @@ -441,7 +440,7 @@ def writecalls( helper_dict: Dict[Any, Any] = {} for key in self.vec_configs: helper_dict[key] = config[shift][key][i] - if not is_empty(self.output): + if self.output: helper_dict["output"] = ( '"' + self.output[i].get_leaf(shift, scope) + '"' ) @@ -483,7 +482,7 @@ def __init__( # set the vec config key of the quantity group quantity_group.set_vec_config(vec_config) super().__init__(name, call, input, [quantity_group], scope) - if is_empty(self.output): + if self.output is None: raise InvalidProducerConfigurationError(self.name) # add the vec config to the parameters of the producer for scope in self.scopes: @@ -497,7 +496,7 @@ def __repr__(self) -> str: @property def output_group(self) -> q.QuantityGroup: - if is_empty(self.output): + if self.output is None: raise Exception("ExtendedVectorProducer has no output!") if not isinstance(self.output[0], q.QuantityGroup): log.error("ExtendedVectorProducer expects a QuantityGroup as output!") @@ -521,7 +520,7 @@ def writecalls( """ n_versions = len(config["nominal"][self.vec_config]) log.debug("Number of extended producers to be created {}".format(n_versions)) - if is_empty(self.output): + if self.output is None: raise InvalidProducerConfigurationError(self.name) if not isinstance(self.output[0], q.QuantityGroup): log.error("ExtendedVectorProducer expects a QuantityGroup as output!") @@ -667,7 +666,7 @@ def __init__( else: self.input = dict(input) # If call is provided, this is supposed to consume output of subproducers. Creating these internal products below: - if not is_empty(self.call): + if self.call: log.debug("Constructing {}".format(self.name)) log.debug(" --> Scopes: {}".format(self.scopes)) for scope in self.scopes: @@ -710,7 +709,7 @@ def __init__( log.debug("| ProducerGroup: {}".format(self.name)) log.debug("| Call: {}".format(self.call)) for scope in self.scopes: - if is_empty(self.input[scope]): + if self.input[scope] is None: log.debug("| Inputs ({}): None".format(scope)) else: log.debug( @@ -718,7 +717,7 @@ def __init__( scope, [input.name for input in self.input[scope]] ) ) - if is_empty(self.output): + if self.output is None: log.debug("| Output: None") else: log.debug("| Outputs: {}".format([output.name for output in self.output])) @@ -831,11 +830,7 @@ def writecalls( calls: List[str] = [] for producer in self.producers[scope]: # duplicate outputs of vector subproducers if they were generated automatically - if ( - not is_empty(self.call) - and isinstance(producer, VectorProducer) - and not is_empty(producer.output) - ): + if self.call and isinstance(producer, VectorProducer) and producer.output: for i in range(len(config["nominal"][producer.vec_configs[0]]) - 1): producer.output.append( producer.output[0].copy( @@ -933,7 +928,7 @@ def CollectProducersOutput( ) -> Set[q.Quantity]: output: Set[q.Quantity] = set() for producer in producers: - if not is_empty(producer.output): + if producer.output: output |= set(producer.output) if isinstance(producer, ProducerGroup): try: diff --git a/code_generation/systematics.py b/code_generation/systematics.py index a0be7241..86448999 100644 --- a/code_generation/systematics.py +++ b/code_generation/systematics.py @@ -11,7 +11,6 @@ TProducerStore, ) from code_generation.quantity import NanoAODQuantity -from code_generation.helpers import is_empty log = logging.getLogger(__name__) @@ -182,7 +181,7 @@ def determine_scopes(self, scopes: Union[List[str], str, None]) -> Set[str]: Returns: set: Set of scopes that are affected by the systematic shift. """ - if is_empty(scopes): + if scopes is None: scope_set: Set[str] = ( set(self.shift_config.keys()) | set(self.input_producers.keys()) @@ -303,7 +302,7 @@ def add_ignore_producer( Returns: None """ - if is_empty(scopes): + if scopes is None: scopes = self.scopes if isinstance(scopes, str): scopes = set(scopes) From 9c0224215dacd906c45f34f072fb1615d155ab80 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Mon, 9 Dec 2024 04:36:28 +0100 Subject: [PATCH 08/15] Typo --- code_generation/configuration.py | 19 ++++++++++--------- code_generation/systematics.py | 12 ++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/code_generation/configuration.py b/code_generation/configuration.py index cb88bbdf..4951e8e2 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -62,7 +62,7 @@ def __init__( available_sample_types: Union[str, List[str]], available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], - global_scope: str = "global", + global_scope: bool = True, ): """ @@ -89,7 +89,10 @@ def __init__( self.available_scopes = set(available_scopes) self.available_outputs: Dict[str, QuantitiesStore] = {} self.available_shifts: Dict[str, Set[str]] = {} - self.global_scope = global_scope + if global_scope: + self.global_scope = "global" + else: + self.global_scope = None self.producers: TProducerStore = {} self.unpacked_producers: TProducerStore = {} @@ -372,9 +375,9 @@ def add_shift( if scope in shift.get_scopes(): self._add_available_shift(shift, scope) shift.apply(scope) - self.shifts[scope][ - shift.shiftname - ] = self.resolve_modifiers(shift.get_shift_config(scope)) + self.shifts[scope][shift.shiftname] = ( + self.resolve_modifiers(shift.get_shift_config(scope)) + ) else: self._add_available_shift(shift, scope) shift.apply(self.global_scope) @@ -387,10 +390,8 @@ def add_shift( for scope in scopes_to_shift: self._add_available_shift(shift, scope) shift.apply(scope) - self.shifts[scope][ - shift.shiftname - ] = self.resolve_modifiers( - shift.get_shift_config(self.global_scope) + self.shifts[scope][shift.shiftname] = self.resolve_modifiers( + shift.get_shift_config(scope) ) def _is_valid_shift( diff --git a/code_generation/systematics.py b/code_generation/systematics.py index 86448999..027011eb 100644 --- a/code_generation/systematics.py +++ b/code_generation/systematics.py @@ -82,9 +82,9 @@ def __init__( ] = self.expand_producer_dict_keys(ignore_producers) self.producers: TProducerStore = {} self.ignore_producers: TProducerStore = {} - self.shift_config: Dict[ - str, TConfiguration - ] = self.expand_configuration_dict_keys(shift_config) + self.shift_config: Dict[str, TConfiguration] = ( + self.expand_configuration_dict_keys(shift_config) + ) self.scopes: Set[str] = self.determine_scopes(scopes) self.validate() @@ -403,9 +403,9 @@ def __init__( scopes: List of scopes that are affected by the systematic shift. """ super().__init__(name, {}, {}, scopes, {}) - self.quantity_change: Dict[ - NanoAODQuantity, Union[str, NanoAODQuantity] - ] = quantity_change + self.quantity_change: Dict[NanoAODQuantity, Union[str, NanoAODQuantity]] = ( + quantity_change + ) self.quantities: Set[NanoAODQuantity] = set(quantity_change.keys()) def apply(self, scope: str) -> None: From 4c31f4e7da1e321bd0a8d61417eb5351e7c7e62a Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Mon, 9 Dec 2024 04:55:28 +0100 Subject: [PATCH 09/15] Reapply "Make global_scope definition in analysis config boolean (True->'global', False->None). Introduce more coverage for potential types in multiple 'if cases'. The handling of empty lists and boolean variables can be somewhat finicky due to the way some cases are set up. Treat with caution." This reverts commit f34aa06a8b0eb8948e74baf3a3c64c2eefddb531. --- code_generation/code_generation.py | 13 ++++---- code_generation/configuration.py | 26 +++++++-------- code_generation/exceptions.py | 3 +- code_generation/friend_trees.py | 2 -- code_generation/helpers.py | 24 ++++++++++++++ code_generation/modifiers.py | 5 +-- code_generation/optimizer.py | 7 ++-- code_generation/producer.py | 53 ++++++++++++++++-------------- code_generation/systematics.py | 5 +-- 9 files changed, 82 insertions(+), 56 deletions(-) create mode 100644 code_generation/helpers.py diff --git a/code_generation/code_generation.py b/code_generation/code_generation.py index 2da348bb..b50d0f5f 100644 --- a/code_generation/code_generation.py +++ b/code_generation/code_generation.py @@ -235,7 +235,7 @@ def sort_scopes(self) -> None: self.scopes = sorted( scope for scope in self.scopes if scope != self.global_scope ) - if self.global_scope: + if self.global_scope is not None: self.scopes = [self.global_scope] + self.scopes def get_git_status(self) -> None: @@ -471,7 +471,7 @@ def generate_subsets(self, scope: str) -> None: # two cases: # 1. no global scope exists: we have to use df0 as the input df # 2. there is a global scope, jump down - if not self.global_scope: + if self.global_scope is None: if is_first: self.subset_calls[scope].append( subset.call( @@ -490,7 +490,6 @@ def generate_subsets(self, scope: str) -> None: # 1. global scope: there we have to use df0 as the input df # 2. first call of all other scopes: we have to use the # last global df as the input df - # if scope == self.global_scope and is_first: if scope == self.global_scope and is_first: self.subset_calls[scope].append( subset.call( @@ -542,7 +541,7 @@ def generate_run_commands(self) -> str: ) # convert output lists to a set to remove duplicates - if self.global_scope: + if self.global_scope is not None: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -619,7 +618,7 @@ def set_output_quantities(self) -> str: output_quantities = "{" for scope in self._outputfiles_generated.keys(): # get the outputset for the scope - if self.global_scope: + if self.global_scope is not None: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -721,7 +720,7 @@ def set_shift_quantities_map(self) -> str: self.output_commands[scope].extend(output.get_leaves_of_scope(scope)) if len(self.output_commands[scope]) > 0 and scope != self.global_scope: # convert output lists to a set to remove duplicates - if self.global_scope: + if self.global_scope is not None: global_commands = self.output_commands[self.global_scope] else: global_commands = [] @@ -773,7 +772,7 @@ def set_quantities_shift_map(self) -> str: self.output_commands[scope].extend(output.get_leaves_of_scope(scope)) if len(self.output_commands[scope]) > 0 and scope != self.global_scope: # convert output lists to a set to remove duplicates - if self.global_scope: + if self.global_scope is not None: global_commands = self.output_commands[self.global_scope] else: global_commands = [] diff --git a/code_generation/configuration.py b/code_generation/configuration.py index 4951e8e2..2184af4b 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -28,6 +28,7 @@ ) from code_generation.rules import ProducerRule, RemoveProducer from code_generation.systematics import SystematicShift, SystematicShiftByQuantity +from code_generation.helpers import is_empty log = logging.getLogger(__name__) # type aliases @@ -261,12 +262,12 @@ def unpack_producergroups( """ if isinstance(producers, list): - # we always want to know the toplevel producergroup, so if the parent is None, we set it to the first producer. + # we always want to know the toplevel producergroup, so if the parent evaluates to false, we set it to the first producer. # If a prent is given, we set it to the parent, since this means we are in a producergroup. This is important if we # have nested producergroups, this way every producer is assigned to the outermost producergroup, which is important for the # potential removal of a single producer. for producer in producers: - if parent is None: + if is_empty(parent): parent_producer = producer else: parent_producer = parent @@ -280,7 +281,7 @@ def unpack_producergroups( if isinstance(producers, ProducerGroup): log.debug("{} Unpacking ".format(" " * depth)) for sub_producer in producers.producers[scope]: - if parent is None: + if is_empty(parent): parent_producer = producers else: parent_producer = parent @@ -291,7 +292,7 @@ def unpack_producergroups( depth=depth + 1, ) else: - if parent is None: + if is_empty(parent): log.debug("{} {}".format(" " * depth, producers)) self.unpacked_producers[scope][producers] = producers else: @@ -337,11 +338,11 @@ def add_shift( Returns: None """ - if exclude_samples is not None and samples is not None: + if not is_empty(exclude_samples) and not is_empty(samples): raise ConfigurationError( f"You cannot use samples and exclude_samples at the same time -> Shift {shift}, samples {samples}, exclude_samples {exclude_samples}" ) - if samples is not None: + if not is_empty(samples): if isinstance(samples, str): samples = [samples] for sample in samples: @@ -349,7 +350,7 @@ def add_shift( raise ConfigurationError( f"Sampletype {sample} is not available -> Shift {shift}, available_sample_types {self.available_sample_types}, sample_types {samples}" ) - if exclude_samples is not None: + if not is_empty(exclude_samples): if isinstance(exclude_samples, str): exclude_samples = [exclude_samples] for excluded_sample in exclude_samples: @@ -364,7 +365,7 @@ def add_shift( raise TypeError("shift must be of type SystematicShift") if isinstance(samples, str): samples = [samples] - if samples is None or self.sample in samples: + if is_empty(samples) or self.sample in samples: scopes_to_shift = [ scope for scope in shift.get_scopes() if scope in self.scopes ] @@ -617,7 +618,7 @@ def _remove_empty_configkeys(self, config) -> None: Returns: None """ - for key in config: + for key in list(config): if isinstance(config[key], dict): self._remove_empty_configkeys(config[key]) # special case for extended vector producers, here we can have a list, that contains empty dicts @@ -635,12 +636,7 @@ def _remove_empty_configkeys(self, config) -> None: if isinstance(value, dict): self._remove_empty_configkeys(value) - elif ( - config[key] is None - or config[key] == "" - or config[key] == [] - or config[key] == {} - ): + elif is_empty(config[key]): log.info( "Removing {} since it is an empty configuration parameter".format( key diff --git a/code_generation/exceptions.py b/code_generation/exceptions.py index 9695a522..e598299c 100644 --- a/code_generation/exceptions.py +++ b/code_generation/exceptions.py @@ -1,6 +1,7 @@ from __future__ import annotations # needed for type annotations in > python 3.7 from typing import List, Set, Union from code_generation.quantity import Quantity +from code_generation.helpers import is_empty class ConfigurationError(Exception): @@ -108,7 +109,7 @@ class InvalidShiftError(ConfigurationError): """ def __init__(self, shift: str, sample: str, scope: Union[str, None] = None): - if scope is None: + if is_empty(scope): self.message = "Shift {} is not setup properly or not available for sampletype {}".format( shift, sample ) diff --git a/code_generation/friend_trees.py b/code_generation/friend_trees.py index 2adc7e11..4300a4a4 100644 --- a/code_generation/friend_trees.py +++ b/code_generation/friend_trees.py @@ -44,7 +44,6 @@ def __init__( available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], input_information: Union[str, List[str]], - global_scope: str = "global", ): """Generate a configuration for a FriendTree production. @@ -68,7 +67,6 @@ def __init__( available_sample_types, available_eras, available_scopes, - global_scope=global_scope, ) self.run_nominal = False # in the main constructor, the global scope is added to the scopes list. diff --git a/code_generation/helpers.py b/code_generation/helpers.py new file mode 100644 index 00000000..75a5ce04 --- /dev/null +++ b/code_generation/helpers.py @@ -0,0 +1,24 @@ +from __future__ import annotations # needed for type annotations in > python 3.7 + +# File with helper functions for the CROWN code generation + + +def is_empty(value): + """ + Check if a value is empty. + + Args: + value: The value that should be checked. + + Returns: + bool: Whether the input value is considered 'empty' + """ + # List of all values that should be considered empty despite not having a length. + empty_values = [None] + + try: + length = len(value) + except TypeError: + length = -1 + bool_val = value in empty_values or length == 0 + return bool_val diff --git a/code_generation/modifiers.py b/code_generation/modifiers.py index 1057e42d..7976a606 100644 --- a/code_generation/modifiers.py +++ b/code_generation/modifiers.py @@ -4,6 +4,7 @@ SampleConfigurationError, EraConfigurationError, ) +from code_generation.helpers import is_empty ConfigurationParameters = Union[str, int, float, bool] @@ -71,7 +72,7 @@ def apply(self, sample: str) -> ModifierResolved: """ if sample in self.samples: return self.modifier_dict[sample] - elif self.default is not None: + elif not is_empty(self.default): return self.default else: raise SampleConfigurationError(sample, self.samples) @@ -106,7 +107,7 @@ def apply(self, era: str) -> ModifierResolved: """ if era in self.eras: return self.modifier_dict[era] - elif self.default is not None: + elif not is_empty(self.default): return self.default else: raise EraConfigurationError(era, self.eras) diff --git a/code_generation/optimizer.py b/code_generation/optimizer.py index 6864084c..fa14e3d4 100644 --- a/code_generation/optimizer.py +++ b/code_generation/optimizer.py @@ -1,6 +1,7 @@ from __future__ import annotations # needed for type annotations in > python 3.7 from code_generation.quantity import NanoAODQuantity, Quantity from code_generation.producer import Filter, BaseFilter, Producer, ProducerGroup +from code_generation.helpers import is_empty from typing import Set, Tuple, Union, List import logging @@ -79,7 +80,7 @@ def get_global_outputs(self) -> List[Quantity]: """ outputs: List[Quantity] = [] for producer in self.global_producers: - if producer.get_outputs("global") is not None: + if not is_empty(producer.get_outputs("global")): outputs.extend( [ quantity @@ -155,7 +156,7 @@ def Optimize(self) -> None: log.error("Please check, if all needed producers are activated") raise Exception wrongProducer, wrong_inputs = self.check_ordering() - if wrongProducer is not None: + if not is_empty(wrongProducer): producers_to_relocate = self.find_inputs(wrongProducer, wrong_inputs) # if len(producers_to_relocate) == 0: # self.optimized = True @@ -197,7 +198,7 @@ def check_ordering( outputs = self.global_outputs for producer_to_check in self.ordering: temp_outputs = producer_to_check.get_outputs(self.scope) - if temp_outputs is not None: + if not is_empty(temp_outputs): outputs.extend( [ quantity diff --git a/code_generation/producer.py b/code_generation/producer.py index 8e3d12ea..38c385fb 100644 --- a/code_generation/producer.py +++ b/code_generation/producer.py @@ -8,6 +8,7 @@ InvalidProducerConfigurationError, ConfigurationError, ) +from code_generation.helpers import is_empty import code_generation.quantity as q @@ -47,7 +48,7 @@ def __init__( "Exception (%s): Argument 'input' must be a list or a dict!" % name ) raise Exception - if not isinstance(output, list) and output: + if not isinstance(output, list) and output is not None: log.error( "Exception (%s): Argument 'output' must be a list or None!" % name ) @@ -66,7 +67,7 @@ def __init__( inputdict = input self.input: Dict[str, List[q.Quantity]] = inputdict # keep track of variable dependencies - if self.output: + if not is_empty(self.output): for scope in self.scopes: for input_quantity in self.input[scope]: for output_quantity in self.output: @@ -75,7 +76,7 @@ def __init__( log.debug("| Producer: {}".format(self.name)) log.debug("| Call: {}".format(self.call)) for scope in self.scopes: - if self.input[scope] is None: + if is_empty(self.input[scope]): log.debug("| Inputs ({}): None".format(scope)) else: log.debug( @@ -83,7 +84,7 @@ def __init__( scope, [input.name for input in self.input[scope]] ) ) - if self.output is None: + if is_empty(self.output): log.debug("| Output: None") else: log.debug("| Outputs: {}".format([output.name for output in self.output])) @@ -142,7 +143,7 @@ def reserve_output(self, scope: str) -> None: """ - if self.output: + if not is_empty(self.output): for output_quantity in self.output: output_quantity.reserve_scope(scope) @@ -162,10 +163,10 @@ def shift(self, name: str, scope: str = "global") -> None: % (name, self.name, scope) ) raise Exception - if self.output is None: + if is_empty(self.output): log.error( - "Exception (%s): output None cannot be shifted ! How did you end up here ?" - % name + "Exception (%s): output %s cannot be shifted ! How did you end up here ?" + % (name, self.output) ) raise Exception for entry in self.output: @@ -206,10 +207,10 @@ def ignore_shift(self, name: str, scope: str = "global") -> None: % (name, self.name, scope) ) raise Exception - if self.output is None: + if is_empty(self.output): log.error( - "Exception (%s): output None cannot be shifted ! How did you end up here ?" - % name + "Exception (%s): output %s cannot be shifted ! How did you end up here ?" + % (name, self.output) ) raise Exception for entry in self.output: @@ -231,7 +232,7 @@ def writecall( Returns: str: The generated C++ call """ - if self.output is None: + if is_empty(self.output): config[shift]["output"] = "" config[shift]["output_vec"] = "" else: @@ -310,7 +311,7 @@ def writecalls( ) raise Exception calls = [self.writecall(config, scope)] - if self.output: + if not is_empty(self.output): list_of_shifts = self.output[0].get_shifts( scope ) # all entries must have same shifts @@ -358,7 +359,7 @@ def get_outputs(self, scope: str) -> List[Union[q.QuantityGroup, q.Quantity]]: ) ) raise Exception - if self.output is None: + if is_empty(self.output): return [] else: return self.output @@ -427,7 +428,7 @@ def writecalls( % (self.vec_configs[0], key) ) raise Exception - if self.output and len(self.output) != n_versions: + if not is_empty(self.output) and len(self.output) != n_versions: log.error( "{} expects either no output or same amount as entries in config lists !".format( self @@ -440,7 +441,7 @@ def writecalls( helper_dict: Dict[Any, Any] = {} for key in self.vec_configs: helper_dict[key] = config[shift][key][i] - if self.output: + if not is_empty(self.output): helper_dict["output"] = ( '"' + self.output[i].get_leaf(shift, scope) + '"' ) @@ -482,7 +483,7 @@ def __init__( # set the vec config key of the quantity group quantity_group.set_vec_config(vec_config) super().__init__(name, call, input, [quantity_group], scope) - if self.output is None: + if is_empty(self.output): raise InvalidProducerConfigurationError(self.name) # add the vec config to the parameters of the producer for scope in self.scopes: @@ -496,7 +497,7 @@ def __repr__(self) -> str: @property def output_group(self) -> q.QuantityGroup: - if self.output is None: + if is_empty(self.output): raise Exception("ExtendedVectorProducer has no output!") if not isinstance(self.output[0], q.QuantityGroup): log.error("ExtendedVectorProducer expects a QuantityGroup as output!") @@ -520,7 +521,7 @@ def writecalls( """ n_versions = len(config["nominal"][self.vec_config]) log.debug("Number of extended producers to be created {}".format(n_versions)) - if self.output is None: + if is_empty(self.output): raise InvalidProducerConfigurationError(self.name) if not isinstance(self.output[0], q.QuantityGroup): log.error("ExtendedVectorProducer expects a QuantityGroup as output!") @@ -666,7 +667,7 @@ def __init__( else: self.input = dict(input) # If call is provided, this is supposed to consume output of subproducers. Creating these internal products below: - if self.call: + if not is_empty(self.call): log.debug("Constructing {}".format(self.name)) log.debug(" --> Scopes: {}".format(self.scopes)) for scope in self.scopes: @@ -709,7 +710,7 @@ def __init__( log.debug("| ProducerGroup: {}".format(self.name)) log.debug("| Call: {}".format(self.call)) for scope in self.scopes: - if self.input[scope] is None: + if is_empty(self.input[scope]): log.debug("| Inputs ({}): None".format(scope)) else: log.debug( @@ -717,7 +718,7 @@ def __init__( scope, [input.name for input in self.input[scope]] ) ) - if self.output is None: + if is_empty(self.output): log.debug("| Output: None") else: log.debug("| Outputs: {}".format([output.name for output in self.output])) @@ -830,7 +831,11 @@ def writecalls( calls: List[str] = [] for producer in self.producers[scope]: # duplicate outputs of vector subproducers if they were generated automatically - if self.call and isinstance(producer, VectorProducer) and producer.output: + if ( + not is_empty(self.call) + and isinstance(producer, VectorProducer) + and not is_empty(producer.output) + ): for i in range(len(config["nominal"][producer.vec_configs[0]]) - 1): producer.output.append( producer.output[0].copy( @@ -928,7 +933,7 @@ def CollectProducersOutput( ) -> Set[q.Quantity]: output: Set[q.Quantity] = set() for producer in producers: - if producer.output: + if not is_empty(producer.output): output |= set(producer.output) if isinstance(producer, ProducerGroup): try: diff --git a/code_generation/systematics.py b/code_generation/systematics.py index 027011eb..b106de7e 100644 --- a/code_generation/systematics.py +++ b/code_generation/systematics.py @@ -11,6 +11,7 @@ TProducerStore, ) from code_generation.quantity import NanoAODQuantity +from code_generation.helpers import is_empty log = logging.getLogger(__name__) @@ -181,7 +182,7 @@ def determine_scopes(self, scopes: Union[List[str], str, None]) -> Set[str]: Returns: set: Set of scopes that are affected by the systematic shift. """ - if scopes is None: + if is_empty(scopes): scope_set: Set[str] = ( set(self.shift_config.keys()) | set(self.input_producers.keys()) @@ -302,7 +303,7 @@ def add_ignore_producer( Returns: None """ - if scopes is None: + if is_empty(scopes): scopes = self.scopes if isinstance(scopes, str): scopes = set(scopes) From ad18f6379d8ff4de2a722887014b78e1696e2891 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Mon, 9 Dec 2024 14:14:39 +0100 Subject: [PATCH 10/15] Formating --- code_generation/configuration.py | 14 +++++++------- code_generation/systematics.py | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/code_generation/configuration.py b/code_generation/configuration.py index 2184af4b..8bffa343 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -376,16 +376,16 @@ def add_shift( if scope in shift.get_scopes(): self._add_available_shift(shift, scope) shift.apply(scope) - self.shifts[scope][shift.shiftname] = ( - self.resolve_modifiers(shift.get_shift_config(scope)) - ) + self.shifts[scope][ + shift.shiftname + ] = self.resolve_modifiers(shift.get_shift_config(scope)) else: self._add_available_shift(shift, scope) shift.apply(self.global_scope) - self.shifts[scope][shift.shiftname] = ( - self.resolve_modifiers( - shift.get_shift_config(self.global_scope) - ) + self.shifts[scope][ + shift.shiftname + ] = self.resolve_modifiers( + shift.get_shift_config(self.global_scope) ) else: for scope in scopes_to_shift: diff --git a/code_generation/systematics.py b/code_generation/systematics.py index b106de7e..a0be7241 100644 --- a/code_generation/systematics.py +++ b/code_generation/systematics.py @@ -83,9 +83,9 @@ def __init__( ] = self.expand_producer_dict_keys(ignore_producers) self.producers: TProducerStore = {} self.ignore_producers: TProducerStore = {} - self.shift_config: Dict[str, TConfiguration] = ( - self.expand_configuration_dict_keys(shift_config) - ) + self.shift_config: Dict[ + str, TConfiguration + ] = self.expand_configuration_dict_keys(shift_config) self.scopes: Set[str] = self.determine_scopes(scopes) self.validate() @@ -404,9 +404,9 @@ def __init__( scopes: List of scopes that are affected by the systematic shift. """ super().__init__(name, {}, {}, scopes, {}) - self.quantity_change: Dict[NanoAODQuantity, Union[str, NanoAODQuantity]] = ( - quantity_change - ) + self.quantity_change: Dict[ + NanoAODQuantity, Union[str, NanoAODQuantity] + ] = quantity_change self.quantities: Set[NanoAODQuantity] = set(quantity_change.keys()) def apply(self, scope: str) -> None: From 4ed2afe428672a9e54c174f3973ad1f53f746984 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Mon, 9 Dec 2024 16:36:47 +0100 Subject: [PATCH 11/15] Re-add 'build*/' to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 535496ed..bbe7388b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # folders build/* +build*/ !build/.gitkeep log/ logs/ From a9037aaec63a4443961f03a7518427a9ec0ece21 Mon Sep 17 00:00:00 2001 From: tvoigtlaender <74188699+tvoigtlaender@users.noreply.github.com> Date: Thu, 12 Dec 2024 00:52:17 +0100 Subject: [PATCH 12/15] Merge changes to main in PR branch (#289) * Update introduction.rst * Update changelog.rst * Update changelog.rst * Update postprocessing.rst * Update friend_trees.rst * Update build_root.rst * Update contrib.rst * added missing reference * Update contrib.rst * Fixed references --------- Co-authored-by: jannikdemand <160126226+jannikdemand@users.noreply.github.com> Co-authored-by: jannikdemand --- docs/sphinx_source/build_root.rst | 6 ++--- docs/sphinx_source/changelog.rst | 12 +++++----- docs/sphinx_source/contrib.rst | 10 ++++---- docs/sphinx_source/correction_manager.rst | 2 +- docs/sphinx_source/friend_trees.rst | 28 +++++++++++------------ docs/sphinx_source/introduction.rst | 2 +- docs/sphinx_source/postprocessing.rst | 6 ++--- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/docs/sphinx_source/build_root.rst b/docs/sphinx_source/build_root.rst index 3fca6031..66e0ee1a 100644 --- a/docs/sphinx_source/build_root.rst +++ b/docs/sphinx_source/build_root.rst @@ -1,11 +1,11 @@ How to build ROOT with CVMFS on CentOS 7 ========================================= -For profiling with debug symbols or just to test the newest ROOT features, you may want to use your own ROOT version. Here are the commands, which allow you to build ROOT with a given build type, ROOT release tag and C++ 17. +For profiling with debug symbols or just to test the latest ROOT features, you may want to use your own ROOT version. Here are the commands that allow you to build ROOT with a given build type, ROOT release tag and C++ 17. -Most likely, you want to use :code:`RelWithDebInfo` as build type so you get debug symbols but also a realistic performance due to compiler optimizations. +Most likely, you want to use :code:`RelWithDebInfo` as build type. This provides debug symbols while maintaining a realistic performance due to compiler optimizations. -To look up the release tags, go to https://github.com/root-project/root and see the tags (not the branches!). +To look up the release tags, visit https://github.com/root-project/root and check the tags (not the branches!). .. code-block:: console diff --git a/docs/sphinx_source/changelog.rst b/docs/sphinx_source/changelog.rst index e50f7aef..66456c68 100644 --- a/docs/sphinx_source/changelog.rst +++ b/docs/sphinx_source/changelog.rst @@ -4,20 +4,20 @@ Changelog May 2024 - Version 0.4.0 * Switch to ROOT 6.30, supporting now RHEL8 and RHEL9. -* Introduced support for ML inference via `OnnxRuntime`. A generic producer is avialable at this link: https://github.com/KIT-CMS/CROWN/blob/main/include/ml.hxx -* Introduced a CorrectionManager, that is responsible for loading correction files and sharing them among the different producers. This allows to load the corrections only once and share them among the different producers, resulting in a signifiant speedup of the initial loading time. In the course of this implementation, Many functions now have a deprecated version, that does not use the `CorrectionManager`. The old functions will be removed in the next release. A more detailed description can be found in the :ref:`The Correction Manager` page. +* Introduced support for ML inference via `OnnxRuntime`. A generic producer is available at this link: https://github.com/KIT-CMS/CROWN/blob/main/include/ml.hxx +* Introduced a CorrectionManager that is responsible for loading correction files and sharing them across producers. This allows to load the corrections only once and share them among the different producers, resulting in a significant speedup of the initial loading time. As part of this implementation, many functions now have a deprecated version, that does not use the `CorrectionManager`. The old functions will be removed in the next major release. A more detailed description can be found on :ref:`The Correction Manager` page. Sept. 2023 - Version 0.3.0 * Switched to ROOT 6.28 via LCG 104, resulting in about 20% faster processing times. -* Added support for the generation of friend trees with additional friends as input. For more details, check :ref:`FriendTree Generation`. -* Added option to compile the CROWNlib only, allowing to reuse the same libary for multiple CROWN executables. +* Added support for generating friend trees with additional input friends.. For more details, check :ref:`FriendTree Generation`. +* Added option to compile the CROWNlib only, allowing to reuse the same library for multiple CROWN executables. Feb. 2023 * Added support for the generation of friend trees. For more details, check :ref:`FriendTree Generation`. -* Added documentation on ntuple and friend production via KingMaker. For more details, check :ref:`KingMaker`. +* Added documentation on Ntuple and friend production via KingMaker. For more details, check :ref:`Workflow Management`. Jan. 2023 -* Added Quantities <-> Shifts mapping to the output files to allow an easier Postprocessing. For more details, check :ref:`Quantity mapping`. +* Added Quantities <-> Shifts mapping to the output files to allow easier postprocessing. For more details, check :ref:`Quantity mapping`. diff --git a/docs/sphinx_source/contrib.rst b/docs/sphinx_source/contrib.rst index ed6c242f..7f0f0d1d 100644 --- a/docs/sphinx_source/contrib.rst +++ b/docs/sphinx_source/contrib.rst @@ -1,18 +1,18 @@ Writing a new producer ======================= -Writing a new producer requires two main parts, adding the :ref:`C++ function` and the required :ref:`python part`. +Writing a new producer involves two main parts, adding the :ref:`C++ function` and the required :ref:`python component`. -If the C++ function is written generally enough, it can be used in multiple producers and multiple purposes in the end. +If the C++ function is written generically enough, it can be used in multiple producers and multiple purposes in the end. For example, the producer generating the pt_1 quantity can be used regardless of what particle is being considered. -In the following, an introduction on how to add a new producer is given. As an example, we will add a new producer, which can be used to calculate the Lorentz vectors of particles, in our case electrons. For simplicity, we only want to calculate one single Lorentz vector for a given index. First, we will do the C++ implementation of the function followed by the Python definition. Keep in mind, that those two parts are connected. +In the following, an introduction to adding a new producer is given. As an example, we will add a new producer, which can be used to calculate the Lorentz vectors of particles, in our case electrons. For simplicity, we only want to calculate one single Lorentz vector for a given index. First, we will do the C++ implementation of the function followed by the Python definition. Keep in mind, that those two parts are connected. Writing a new C++ function ============================ -For a new C++ function, a definition in the header file, and the implementation in the source file are required. As good practice, we will add the function to a namespace called ``lorentzvector``, and call the function ``build``. -The return type of any function in CROWN should always be ``ROOT::RDF::RNode`` and the first argument of the function should always be the RDataframe, where we want to Define our new quantity. This means the basic definition of the function should look like this: +For a new C++ function, both a definition in the header file and the implementation in the source file are required. As good practice, we will add the function to a namespace called ``lorentzvector``, and call the function ``build``. +The return type of any function in CROWN should always be ``ROOT::RDF::RNode``, and the first argument of the function should always be the RDataframe, where we want to Define our new quantity. This means the basic definition of the function should look like this: .. code-block:: cpp diff --git a/docs/sphinx_source/correction_manager.rst b/docs/sphinx_source/correction_manager.rst index d70034df..2a31b491 100644 --- a/docs/sphinx_source/correction_manager.rst +++ b/docs/sphinx_source/correction_manager.rst @@ -13,7 +13,7 @@ For now, the CorrectionManager supports the following correction files: - correctionlib files of type ``correction::CompoundCorrection`` using the :cpp:func:`correctionManager::CorrectionManager::loadCompoundCorrection` function - json files using the :cpp:func:`correctionManager::CorrectionManager::loadjson` function -A Documentation of all Correction Manager functions can be found in :ref:`Namespace:Correctionmanager` +A Documentation of all Correction Manager functions can be found in :ref:`Namespace: Correctionmanager` Required Changes ****************** diff --git a/docs/sphinx_source/friend_trees.rst b/docs/sphinx_source/friend_trees.rst index 9e38c3f6..948788d0 100644 --- a/docs/sphinx_source/friend_trees.rst +++ b/docs/sphinx_source/friend_trees.rst @@ -1,21 +1,21 @@ FriendTree Generation =========================== -CROWN can be used, to generate FriendTrees based on a CROWN ntuple. The concept of FriendTrees is explained here: https://root.cern/manual/trees/#widening-a-ttree-through-friends. They allow to extend an existing ntuple with new quantities. Common use cases are new high-level variables like neural network outputs or additional correction factors. +CROWN can be used, to generate FriendTrees based on a CROWN Ntuple. The concept of FriendTrees is explained here: https://root.cern/manual/trees/#widening-a-ttree-through-friends. They allow to extend an existing ntuple with new quantities. Common use cases are new high-level variables like neural network outputs or additional correction factors. .. image:: ../images/root_friends.png :width: 900 :align: center :alt: Sketch of how Friend trees work -The the example depicted above, two additional friends to the main NTuple are created. During analysis, the quantities stored in the friend trees can be added by using the ``AddFriend`` method. The quantities are then available in the TTree as if they were part of the original NTuple. +In the example depicted above, two additional friends to the main NTuple are created. During analysis, the quantities stored in the friend trees can be added by using the ``AddFriend`` method. The quantities are then available in the TTree as if they were part of the original NTuple. A FriendTree is generated using a FriendTreeConfiguration. Such a configuration has some major differences, compared to a regular configuration: -1. The input file is a CROWN ntuple, not a ROOT file. +1. The input file is a CROWN Ntuple, not a ROOT file. 2. Only one scope per user is allowed. 3. No global scope is required -4. The available inputs have to be specified. The available inputs can be provided by using a CROWN ntuple as input, or a JSON file. The ntuple can be used for debugging proposes, when running a production, it is recommended to use a JSON file. The basic structure of this quantities map is listed below. Such a JSON can then be used for multiple eras, sample types and scopes. +4. The available inputs must be specified. The available inputs can be provided by using a CROWN Ntuple as input, or a JSON file. The Ntuple can be used for debugging purposes, when running a production. It is recommended to use a JSON file. The basic structure of this quantities map is listed below. Such a JSON can then be used for multiple eras, sample types and scopes. .. code-block:: JSON @@ -43,28 +43,28 @@ A FriendTree is generated using a FriendTreeConfiguration. Such a configuration -The recommended way of producing FriendTrees is to use a workflow tool, that manages the submission of jobs, generation of tarballs and organizing the output. One possible workflow tool choice is KingMaker (https://github.com/KIT-CMS/KingMaker). A more detailed description of the KingMaker workflow can be found in :ref:`KingMaker`. +The recommended way of producing FriendTrees is to use a workflow tool, that manages the submission of jobs, generation of tarballs and organizing the output. One possible workflow tool choice is KingMaker (https://github.com/KIT-CMS/KingMaker). A more detailed description of the KingMaker workflow can be found in :ref:`Workflow Management`. Writing a FriendTreeConfiguration --------------------------------- -The basic structure of a FriendTreeConfiguration is identical to a regular configuration. When creating a new FriendTree executable, an additional argument has to be provided: +The basic structure of a FriendTreeConfiguration is identical to a regular configuration. When creating a new FriendTree executable, you must provide an additional argument: -* ``DQUANTITIESMAP`` - The path to the quantities map JSON file or the crown ntuple root file. +* ``DQUANTITIESMAP`` - The path to the quantities map JSON file or the crown Ntuple ROOT file. -All other parameters are identical to the regular configuration. Setting up producers, outputs and new systematic shifts works the same way as before. The configuration has to be of type ``FriendTreeConfiguration``. During the configuration, the available inputs are checked for consistency, to catch any possible misconfiguration early. In addition, as for CROWN ntuples, only required shifts are executed. +All other parameters are identical to the regular configuration. Setting up producers, outputs and new systematic shifts works the same way as before. The configuration must be of type ``FriendTreeConfiguration``. During the configuration, the available inputs are checked for consistency, to catch any possible misconfiguration early. In addition, as for CROWN Ntuples, only required shifts are executed. FriendTrees with multiple input friend trees -------------------------------------------- -Starting from version 0.3 of CROWN, it is also possible to use multiple input friend trees. A typical use case for this feature is the evaluation of Classifiers, and storing the output of the classifier in the friend tree. This way, the classifier can utilize quantities from both the main ntuple and from additional friend trees. The interface for configuring such a FriendTree executable is similar to the regular FriendTree configuration, with the following differences: +Starting from version 0.3 of CROWN, it is also possible to use multiple input friend trees. A typical use case for this feature is the evaluation of Classifiers, and storing the output of the classifier in the friend tree. This way, the classifier can utilize quantities from both the main Ntuple and from additional friend trees. The interface for configuring such a FriendTree executable is similar to the regular FriendTree configuration, with the following differences: -* The information for all input files has to be provided. This means that the ``DQUANTITIESMAP`` has to be extended. It is possible to - 1. provide a single JSON file, that contains the input information for all input files (the crown ntuple + all additional files) +* The information for all input files must provided. This means that the ``DQUANTITIESMAP`` must extended. It is possible to + 1. provide a single JSON file, that contains the input information for all files (the crown Ntuple + all additional files) 2. provide a list of JSON files, each containing the input information for one input file - 3. provide a list of root files (crown ntuple + all additional files) + 3. provide a list of ROOT files (crown Ntuple + all additional files) -During the execution, all input files have to be provided, resulting in a command line like this: +During execution, all input files must be provided, resulting in a command line like this: .. code-block:: bash @@ -74,4 +74,4 @@ During the execution, all input files have to be provided, resulting in a comman Before execution, the input files are checked for consistency. This means that the following checks are performed: * All input files have to contain the same number of entries -* All input files have to be readable (no missing files) \ No newline at end of file +* All input files must be readable and present (no missing files) diff --git a/docs/sphinx_source/introduction.rst b/docs/sphinx_source/introduction.rst index 809a764c..caf4c976 100644 --- a/docs/sphinx_source/introduction.rst +++ b/docs/sphinx_source/introduction.rst @@ -1,7 +1,7 @@ Introduction ============= -The **C** ++-based **RO** OT **W** orkflow for **N** -tuples (CROWN) is a fast new way to convert NanoAOD samples into flat :code:`TTrees` to be used in further analysis. The main focus of the framework is to provide a fast and clean way of selecting events and calculating quantities and weights. The framework has minimal dependencies and only uses ROOT and it's Dataframe as a backend. +The **C** ++-based **RO** OT **W** orkflow for **N** -tuples (CROWN) is a fast new way to convert NanoAOD samples into flat :code:`TTrees` to be used in further analysis. The main focus of the framework is to provide a fast and clean way of selecting events and calculating quantities and weights. The framework has minimal dependencies and only uses ROOT and its Dataframe as a backend. Design Idea diff --git a/docs/sphinx_source/postprocessing.rst b/docs/sphinx_source/postprocessing.rst index 28e0569a..53b9887e 100644 --- a/docs/sphinx_source/postprocessing.rst +++ b/docs/sphinx_source/postprocessing.rst @@ -1,13 +1,13 @@ Ntuples in Postprocessing =========================== -The CROWN Ntuples can be used by any Postprocessing framework. Some things have to be kept in mind, in order to ensure an easy processing. -Most important difference is, that only quantities affected by a shift are recalculated. This means the prostprocessing framework must be able to use a mixture of the original and the shifted quantities, when applying shifts. In order to make this step a bit easier, the information, which quantities are affected by a shift, is stored in the Ntuple. +The CROWN Ntuples can be used by any Postprocessing framework. There are a few things to keep in mind to ensure easy processing. +The most important difference is that only quantities affected by a shift are recalculated. This means the postprocessing framework must be able to use a mixture of the original and shifted quantities, when applying shifts. In order to make this step a bit easier, the information which quantities are affected by a shift, is stored in the Ntuple. Quantity mapping ***************** -To read the mapping from a NTuple, the python function listed below may be used. Two types of mapping are available, depending on the actual usecase. In the first, the mapping is sorted by shift; in the second the mapping is sorted by quantity. +To read the mapping from an Ntuple, the python function listed below may be used. Two types of mapping are available, depending on the actual use case. In the first, the mapping is sorted by shift; in the second the mapping is sorted by quantity. .. code-block:: python From 861e792532bc0ccb9da02fe25f53558a25efd4a8 Mon Sep 17 00:00:00 2001 From: tvoigtlaender <74188699+tvoigtlaender@users.noreply.github.com> Date: Thu, 12 Dec 2024 00:53:50 +0100 Subject: [PATCH 13/15] Revert "Merge changes to main in PR branch" (#290) Revert "Merge changes to main in PR branch (#289)" This reverts commit a9037aaec63a4443961f03a7518427a9ec0ece21. --- docs/sphinx_source/build_root.rst | 6 ++--- docs/sphinx_source/changelog.rst | 12 +++++----- docs/sphinx_source/contrib.rst | 10 ++++---- docs/sphinx_source/correction_manager.rst | 2 +- docs/sphinx_source/friend_trees.rst | 28 +++++++++++------------ docs/sphinx_source/introduction.rst | 2 +- docs/sphinx_source/postprocessing.rst | 6 ++--- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/docs/sphinx_source/build_root.rst b/docs/sphinx_source/build_root.rst index 66e0ee1a..3fca6031 100644 --- a/docs/sphinx_source/build_root.rst +++ b/docs/sphinx_source/build_root.rst @@ -1,11 +1,11 @@ How to build ROOT with CVMFS on CentOS 7 ========================================= -For profiling with debug symbols or just to test the latest ROOT features, you may want to use your own ROOT version. Here are the commands that allow you to build ROOT with a given build type, ROOT release tag and C++ 17. +For profiling with debug symbols or just to test the newest ROOT features, you may want to use your own ROOT version. Here are the commands, which allow you to build ROOT with a given build type, ROOT release tag and C++ 17. -Most likely, you want to use :code:`RelWithDebInfo` as build type. This provides debug symbols while maintaining a realistic performance due to compiler optimizations. +Most likely, you want to use :code:`RelWithDebInfo` as build type so you get debug symbols but also a realistic performance due to compiler optimizations. -To look up the release tags, visit https://github.com/root-project/root and check the tags (not the branches!). +To look up the release tags, go to https://github.com/root-project/root and see the tags (not the branches!). .. code-block:: console diff --git a/docs/sphinx_source/changelog.rst b/docs/sphinx_source/changelog.rst index 66456c68..e50f7aef 100644 --- a/docs/sphinx_source/changelog.rst +++ b/docs/sphinx_source/changelog.rst @@ -4,20 +4,20 @@ Changelog May 2024 - Version 0.4.0 * Switch to ROOT 6.30, supporting now RHEL8 and RHEL9. -* Introduced support for ML inference via `OnnxRuntime`. A generic producer is available at this link: https://github.com/KIT-CMS/CROWN/blob/main/include/ml.hxx -* Introduced a CorrectionManager that is responsible for loading correction files and sharing them across producers. This allows to load the corrections only once and share them among the different producers, resulting in a significant speedup of the initial loading time. As part of this implementation, many functions now have a deprecated version, that does not use the `CorrectionManager`. The old functions will be removed in the next major release. A more detailed description can be found on :ref:`The Correction Manager` page. +* Introduced support for ML inference via `OnnxRuntime`. A generic producer is avialable at this link: https://github.com/KIT-CMS/CROWN/blob/main/include/ml.hxx +* Introduced a CorrectionManager, that is responsible for loading correction files and sharing them among the different producers. This allows to load the corrections only once and share them among the different producers, resulting in a signifiant speedup of the initial loading time. In the course of this implementation, Many functions now have a deprecated version, that does not use the `CorrectionManager`. The old functions will be removed in the next release. A more detailed description can be found in the :ref:`The Correction Manager` page. Sept. 2023 - Version 0.3.0 * Switched to ROOT 6.28 via LCG 104, resulting in about 20% faster processing times. -* Added support for generating friend trees with additional input friends.. For more details, check :ref:`FriendTree Generation`. -* Added option to compile the CROWNlib only, allowing to reuse the same library for multiple CROWN executables. +* Added support for the generation of friend trees with additional friends as input. For more details, check :ref:`FriendTree Generation`. +* Added option to compile the CROWNlib only, allowing to reuse the same libary for multiple CROWN executables. Feb. 2023 * Added support for the generation of friend trees. For more details, check :ref:`FriendTree Generation`. -* Added documentation on Ntuple and friend production via KingMaker. For more details, check :ref:`Workflow Management`. +* Added documentation on ntuple and friend production via KingMaker. For more details, check :ref:`KingMaker`. Jan. 2023 -* Added Quantities <-> Shifts mapping to the output files to allow easier postprocessing. For more details, check :ref:`Quantity mapping`. +* Added Quantities <-> Shifts mapping to the output files to allow an easier Postprocessing. For more details, check :ref:`Quantity mapping`. diff --git a/docs/sphinx_source/contrib.rst b/docs/sphinx_source/contrib.rst index 7f0f0d1d..ed6c242f 100644 --- a/docs/sphinx_source/contrib.rst +++ b/docs/sphinx_source/contrib.rst @@ -1,18 +1,18 @@ Writing a new producer ======================= -Writing a new producer involves two main parts, adding the :ref:`C++ function` and the required :ref:`python component`. +Writing a new producer requires two main parts, adding the :ref:`C++ function` and the required :ref:`python part`. -If the C++ function is written generically enough, it can be used in multiple producers and multiple purposes in the end. +If the C++ function is written generally enough, it can be used in multiple producers and multiple purposes in the end. For example, the producer generating the pt_1 quantity can be used regardless of what particle is being considered. -In the following, an introduction to adding a new producer is given. As an example, we will add a new producer, which can be used to calculate the Lorentz vectors of particles, in our case electrons. For simplicity, we only want to calculate one single Lorentz vector for a given index. First, we will do the C++ implementation of the function followed by the Python definition. Keep in mind, that those two parts are connected. +In the following, an introduction on how to add a new producer is given. As an example, we will add a new producer, which can be used to calculate the Lorentz vectors of particles, in our case electrons. For simplicity, we only want to calculate one single Lorentz vector for a given index. First, we will do the C++ implementation of the function followed by the Python definition. Keep in mind, that those two parts are connected. Writing a new C++ function ============================ -For a new C++ function, both a definition in the header file and the implementation in the source file are required. As good practice, we will add the function to a namespace called ``lorentzvector``, and call the function ``build``. -The return type of any function in CROWN should always be ``ROOT::RDF::RNode``, and the first argument of the function should always be the RDataframe, where we want to Define our new quantity. This means the basic definition of the function should look like this: +For a new C++ function, a definition in the header file, and the implementation in the source file are required. As good practice, we will add the function to a namespace called ``lorentzvector``, and call the function ``build``. +The return type of any function in CROWN should always be ``ROOT::RDF::RNode`` and the first argument of the function should always be the RDataframe, where we want to Define our new quantity. This means the basic definition of the function should look like this: .. code-block:: cpp diff --git a/docs/sphinx_source/correction_manager.rst b/docs/sphinx_source/correction_manager.rst index 2a31b491..d70034df 100644 --- a/docs/sphinx_source/correction_manager.rst +++ b/docs/sphinx_source/correction_manager.rst @@ -13,7 +13,7 @@ For now, the CorrectionManager supports the following correction files: - correctionlib files of type ``correction::CompoundCorrection`` using the :cpp:func:`correctionManager::CorrectionManager::loadCompoundCorrection` function - json files using the :cpp:func:`correctionManager::CorrectionManager::loadjson` function -A Documentation of all Correction Manager functions can be found in :ref:`Namespace: Correctionmanager` +A Documentation of all Correction Manager functions can be found in :ref:`Namespace:Correctionmanager` Required Changes ****************** diff --git a/docs/sphinx_source/friend_trees.rst b/docs/sphinx_source/friend_trees.rst index 948788d0..9e38c3f6 100644 --- a/docs/sphinx_source/friend_trees.rst +++ b/docs/sphinx_source/friend_trees.rst @@ -1,21 +1,21 @@ FriendTree Generation =========================== -CROWN can be used, to generate FriendTrees based on a CROWN Ntuple. The concept of FriendTrees is explained here: https://root.cern/manual/trees/#widening-a-ttree-through-friends. They allow to extend an existing ntuple with new quantities. Common use cases are new high-level variables like neural network outputs or additional correction factors. +CROWN can be used, to generate FriendTrees based on a CROWN ntuple. The concept of FriendTrees is explained here: https://root.cern/manual/trees/#widening-a-ttree-through-friends. They allow to extend an existing ntuple with new quantities. Common use cases are new high-level variables like neural network outputs or additional correction factors. .. image:: ../images/root_friends.png :width: 900 :align: center :alt: Sketch of how Friend trees work -In the example depicted above, two additional friends to the main NTuple are created. During analysis, the quantities stored in the friend trees can be added by using the ``AddFriend`` method. The quantities are then available in the TTree as if they were part of the original NTuple. +The the example depicted above, two additional friends to the main NTuple are created. During analysis, the quantities stored in the friend trees can be added by using the ``AddFriend`` method. The quantities are then available in the TTree as if they were part of the original NTuple. A FriendTree is generated using a FriendTreeConfiguration. Such a configuration has some major differences, compared to a regular configuration: -1. The input file is a CROWN Ntuple, not a ROOT file. +1. The input file is a CROWN ntuple, not a ROOT file. 2. Only one scope per user is allowed. 3. No global scope is required -4. The available inputs must be specified. The available inputs can be provided by using a CROWN Ntuple as input, or a JSON file. The Ntuple can be used for debugging purposes, when running a production. It is recommended to use a JSON file. The basic structure of this quantities map is listed below. Such a JSON can then be used for multiple eras, sample types and scopes. +4. The available inputs have to be specified. The available inputs can be provided by using a CROWN ntuple as input, or a JSON file. The ntuple can be used for debugging proposes, when running a production, it is recommended to use a JSON file. The basic structure of this quantities map is listed below. Such a JSON can then be used for multiple eras, sample types and scopes. .. code-block:: JSON @@ -43,28 +43,28 @@ A FriendTree is generated using a FriendTreeConfiguration. Such a configuration -The recommended way of producing FriendTrees is to use a workflow tool, that manages the submission of jobs, generation of tarballs and organizing the output. One possible workflow tool choice is KingMaker (https://github.com/KIT-CMS/KingMaker). A more detailed description of the KingMaker workflow can be found in :ref:`Workflow Management`. +The recommended way of producing FriendTrees is to use a workflow tool, that manages the submission of jobs, generation of tarballs and organizing the output. One possible workflow tool choice is KingMaker (https://github.com/KIT-CMS/KingMaker). A more detailed description of the KingMaker workflow can be found in :ref:`KingMaker`. Writing a FriendTreeConfiguration --------------------------------- -The basic structure of a FriendTreeConfiguration is identical to a regular configuration. When creating a new FriendTree executable, you must provide an additional argument: +The basic structure of a FriendTreeConfiguration is identical to a regular configuration. When creating a new FriendTree executable, an additional argument has to be provided: -* ``DQUANTITIESMAP`` - The path to the quantities map JSON file or the crown Ntuple ROOT file. +* ``DQUANTITIESMAP`` - The path to the quantities map JSON file or the crown ntuple root file. -All other parameters are identical to the regular configuration. Setting up producers, outputs and new systematic shifts works the same way as before. The configuration must be of type ``FriendTreeConfiguration``. During the configuration, the available inputs are checked for consistency, to catch any possible misconfiguration early. In addition, as for CROWN Ntuples, only required shifts are executed. +All other parameters are identical to the regular configuration. Setting up producers, outputs and new systematic shifts works the same way as before. The configuration has to be of type ``FriendTreeConfiguration``. During the configuration, the available inputs are checked for consistency, to catch any possible misconfiguration early. In addition, as for CROWN ntuples, only required shifts are executed. FriendTrees with multiple input friend trees -------------------------------------------- -Starting from version 0.3 of CROWN, it is also possible to use multiple input friend trees. A typical use case for this feature is the evaluation of Classifiers, and storing the output of the classifier in the friend tree. This way, the classifier can utilize quantities from both the main Ntuple and from additional friend trees. The interface for configuring such a FriendTree executable is similar to the regular FriendTree configuration, with the following differences: +Starting from version 0.3 of CROWN, it is also possible to use multiple input friend trees. A typical use case for this feature is the evaluation of Classifiers, and storing the output of the classifier in the friend tree. This way, the classifier can utilize quantities from both the main ntuple and from additional friend trees. The interface for configuring such a FriendTree executable is similar to the regular FriendTree configuration, with the following differences: -* The information for all input files must provided. This means that the ``DQUANTITIESMAP`` must extended. It is possible to - 1. provide a single JSON file, that contains the input information for all files (the crown Ntuple + all additional files) +* The information for all input files has to be provided. This means that the ``DQUANTITIESMAP`` has to be extended. It is possible to + 1. provide a single JSON file, that contains the input information for all input files (the crown ntuple + all additional files) 2. provide a list of JSON files, each containing the input information for one input file - 3. provide a list of ROOT files (crown Ntuple + all additional files) + 3. provide a list of root files (crown ntuple + all additional files) -During execution, all input files must be provided, resulting in a command line like this: +During the execution, all input files have to be provided, resulting in a command line like this: .. code-block:: bash @@ -74,4 +74,4 @@ During execution, all input files must be provided, resulting in a command line Before execution, the input files are checked for consistency. This means that the following checks are performed: * All input files have to contain the same number of entries -* All input files must be readable and present (no missing files) +* All input files have to be readable (no missing files) \ No newline at end of file diff --git a/docs/sphinx_source/introduction.rst b/docs/sphinx_source/introduction.rst index caf4c976..809a764c 100644 --- a/docs/sphinx_source/introduction.rst +++ b/docs/sphinx_source/introduction.rst @@ -1,7 +1,7 @@ Introduction ============= -The **C** ++-based **RO** OT **W** orkflow for **N** -tuples (CROWN) is a fast new way to convert NanoAOD samples into flat :code:`TTrees` to be used in further analysis. The main focus of the framework is to provide a fast and clean way of selecting events and calculating quantities and weights. The framework has minimal dependencies and only uses ROOT and its Dataframe as a backend. +The **C** ++-based **RO** OT **W** orkflow for **N** -tuples (CROWN) is a fast new way to convert NanoAOD samples into flat :code:`TTrees` to be used in further analysis. The main focus of the framework is to provide a fast and clean way of selecting events and calculating quantities and weights. The framework has minimal dependencies and only uses ROOT and it's Dataframe as a backend. Design Idea diff --git a/docs/sphinx_source/postprocessing.rst b/docs/sphinx_source/postprocessing.rst index 53b9887e..28e0569a 100644 --- a/docs/sphinx_source/postprocessing.rst +++ b/docs/sphinx_source/postprocessing.rst @@ -1,13 +1,13 @@ Ntuples in Postprocessing =========================== -The CROWN Ntuples can be used by any Postprocessing framework. There are a few things to keep in mind to ensure easy processing. -The most important difference is that only quantities affected by a shift are recalculated. This means the postprocessing framework must be able to use a mixture of the original and shifted quantities, when applying shifts. In order to make this step a bit easier, the information which quantities are affected by a shift, is stored in the Ntuple. +The CROWN Ntuples can be used by any Postprocessing framework. Some things have to be kept in mind, in order to ensure an easy processing. +Most important difference is, that only quantities affected by a shift are recalculated. This means the prostprocessing framework must be able to use a mixture of the original and the shifted quantities, when applying shifts. In order to make this step a bit easier, the information, which quantities are affected by a shift, is stored in the Ntuple. Quantity mapping ***************** -To read the mapping from an Ntuple, the python function listed below may be used. Two types of mapping are available, depending on the actual use case. In the first, the mapping is sorted by shift; in the second the mapping is sorted by quantity. +To read the mapping from a NTuple, the python function listed below may be used. Two types of mapping are available, depending on the actual usecase. In the first, the mapping is sorted by shift; in the second the mapping is sorted by quantity. .. code-block:: python From c6acbae4d9494be64640dcec1c85f52fd76d5a4f Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Tue, 21 Jan 2025 14:05:36 +0100 Subject: [PATCH 14/15] undo change from PR that did not add anything --- code_generation/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code_generation/configuration.py b/code_generation/configuration.py index 8bffa343..7489cb32 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -618,7 +618,7 @@ def _remove_empty_configkeys(self, config) -> None: Returns: None """ - for key in list(config): + for key in config: if isinstance(config[key], dict): self._remove_empty_configkeys(config[key]) # special case for extended vector producers, here we can have a list, that contains empty dicts From 6bd7f6372cd42c387c37124ef05cf8b053cf2408 Mon Sep 17 00:00:00 2001 From: tvoigtlaender Date: Tue, 21 Jan 2025 14:08:55 +0100 Subject: [PATCH 15/15] move changes to global_scope behavior to new PR --- code_generation/configuration.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/code_generation/configuration.py b/code_generation/configuration.py index 7489cb32..71184cdf 100644 --- a/code_generation/configuration.py +++ b/code_generation/configuration.py @@ -63,7 +63,6 @@ def __init__( available_sample_types: Union[str, List[str]], available_eras: Union[str, List[str]], available_scopes: Union[str, List[str]], - global_scope: bool = True, ): """ @@ -90,10 +89,7 @@ def __init__( self.available_scopes = set(available_scopes) self.available_outputs: Dict[str, QuantitiesStore] = {} self.available_shifts: Dict[str, Set[str]] = {} - if global_scope: - self.global_scope = "global" - else: - self.global_scope = None + self.global_scope = "global" self.producers: TProducerStore = {} self.unpacked_producers: TProducerStore = {}