diff --git a/n3fit/runcards/examples/developing_weights.h5 b/n3fit/runcards/examples/developing_weights.h5 index 385ea55a75..e6fff8585a 100644 Binary files a/n3fit/runcards/examples/developing_weights.h5 and b/n3fit/runcards/examples/developing_weights.h5 differ diff --git a/n3fit/src/n3fit/backends/keras_backend/base_layers.py b/n3fit/src/n3fit/backends/keras_backend/base_layers.py index 14215e9cf6..36a2016fee 100644 --- a/n3fit/src/n3fit/backends/keras_backend/base_layers.py +++ b/n3fit/src/n3fit/backends/keras_backend/base_layers.py @@ -17,34 +17,44 @@ The names of the layer and the activation function are the ones to be used in the n3fit runcard. """ -from tensorflow.keras.layers import Lambda, LSTM, Dropout, Concatenate -from tensorflow.keras.layers import concatenate, Input # pylint: disable=unused-import +from tensorflow import expand_dims, math, nn +from tensorflow.keras.layers import ( # pylint: disable=unused-import + Dropout, + Input, + Lambda, + concatenate, +) from tensorflow.keras.layers import Dense as KerasDense -from tensorflow import expand_dims +from tensorflow.keras.layers import LSTM, Concatenate # pylint: disable=unused-import from tensorflow.keras.regularizers import l1_l2 -from tensorflow import nn, math from n3fit.backends import MetaLayer +from n3fit.backends.keras_backend.multi_dense import MultiDense + # Custom activation functions def square_activation(x): - """ Squares the input """ - return x*x + """Squares the input""" + return x * x + def modified_tanh(x): - """ A non-saturating version of the tanh function """ - return math.abs(x)*nn.tanh(x) + """A non-saturating version of the tanh function""" + return math.abs(x) * nn.tanh(x) + def leaky_relu(x): - """ Computes the Leaky ReLU activation function """ + """Computes the Leaky ReLU activation function""" return nn.leaky_relu(x, alpha=0.2) + custom_activations = { - "square" : square_activation, + "square": square_activation, "leaky_relu": leaky_relu, "modified_tanh": modified_tanh, } + def LSTM_modified(**kwargs): """ LSTM asks for a sample X timestep X features kind of thing so we need to reshape the input @@ -61,9 +71,11 @@ def ReshapedLSTM(input_tensor): return ReshapedLSTM + class Dense(KerasDense, MetaLayer): pass + def dense_per_flavour(basis_size=8, kernel_initializer="glorot_normal", **dense_kwargs): """ Generates a list of layers which can take as an input either one single layer @@ -85,7 +97,7 @@ def dense_per_flavour(basis_size=8, kernel_initializer="glorot_normal", **dense_ # Need to generate a list of dense layers dense_basis = [ - base_layer_selector("dense", kernel_initializer=initializer, **dense_kwargs) + base_layer_selector("single_dense", kernel_initializer=initializer, **dense_kwargs) for initializer in kernel_initializer ] @@ -116,13 +128,26 @@ def apply_dense(xinput): layers = { "dense": ( + MultiDense, + { + "input_shape": (1,), + "replica_seeds": None, + "kernel_initializer": "glorot_normal", + "units": 5, + "activation": "sigmoid", + "kernel_regularizer": None, + "replica_input": True, + }, + ), + # This one is only used inside dense_per_flavour + "single_dense": ( Dense, { "input_shape": (1,), "kernel_initializer": "glorot_normal", "units": 5, "activation": "sigmoid", - "kernel_regularizer": None + "kernel_regularizer": None, }, ), "dense_per_flavour": ( @@ -143,31 +168,28 @@ def apply_dense(xinput): "concatenate": (Concatenate, {}), } -regularizers = { - 'l1_l2': (l1_l2, {'l1': 0., 'l2': 0.}) - } +regularizers = {'l1_l2': (l1_l2, {'l1': 0.0, 'l2': 0.0})} + def base_layer_selector(layer_name, **kwargs): """ - Given a layer name, looks for it in the `layers` dictionary and returns an instance. + Given a layer name, looks for it in the `layers` dictionary and returns an instance. - The layer dictionary defines a number of defaults - but they can be overwritten/enhanced through kwargs + The layer dictionary defines a number of defaults + but they can be overwritten/enhanced through kwargs - Parameters - ---------- - `layer_name - str with the name of the layer - `**kwargs` - extra optional arguments to pass to the layer (beyond their defaults) + Parameters + ---------- + `layer_name + str with the name of the layer + `**kwargs` + extra optional arguments to pass to the layer (beyond their defaults) """ try: layer_tuple = layers[layer_name] except KeyError as e: raise NotImplementedError( - "Layer not implemented in keras_backend/base_layers.py: {0}".format( - layer_name - ) + "Layer not implemented in keras_backend/base_layers.py: {0}".format(layer_name) ) from e layer_class = layer_tuple[0] @@ -182,6 +204,7 @@ def base_layer_selector(layer_name, **kwargs): return layer_class(**layer_args) + def regularizer_selector(reg_name, **kwargs): """Given a regularizer name looks in the `regularizer` dictionary and return an instance. @@ -204,7 +227,8 @@ def regularizer_selector(reg_name, **kwargs): reg_tuple = regularizers[reg_name] except KeyError: raise NotImplementedError( - "Regularizer not implemented in keras_backend/base_layers.py: {0}".format(reg_name)) + "Regularizer not implemented in keras_backend/base_layers.py: {0}".format(reg_name) + ) reg_class = reg_tuple[0] reg_args = reg_tuple[1] diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py new file mode 100644 index 0000000000..871ad8dfa6 --- /dev/null +++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py @@ -0,0 +1,179 @@ +from typing import List + +import tensorflow as tf +from tensorflow.keras.initializers import Initializer +from tensorflow.keras.layers import Dense, Dropout + + +class MultiDense(Dense): + """ + Dense layer for multiple replicas at the same time. + + Inputs to this layer may contain multiple replicas, for the later layers. + In this case, the `replica_input` argument should be set to `True`, which is the default. + The input shape in this case is (batch_size, replicas, gridsize, features). + For the first layer, there are no replicas yet, and so the `replica_input` argument + should be set to `False`. + The input shape in this case is (batch_size, gridsize, features). + + Weights are initialized using a `replica_seeds` list of seeds, and are identical to the + weights of a list of single dense layers with the same `replica_seeds`. + + + Example + ------- + + >>> from tensorflow.keras import Sequential + >>> from tensorflow.keras.layers import Dense + >>> from tensorflow.keras.initializers import GlorotUniform + >>> import tensorflow as tf + >>> replicas = 2 + >>> multi_dense_model = Sequential([ + >>> MultiDense(units=8, replica_seeds=[42, 43], replica_input=False, kernel_initializer=GlorotUniform(seed=0)), + >>> MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=0)), + >>> ]) + >>> single_models = [ + >>> Sequential([ + >>> Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)), + >>> Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r)), + >>> ]) + >>> for r in range(replicas) + >>> ] + >>> gridsize, features = 100, 2 + >>> multi_dense_model.build(input_shape=(None, gridsize, features)) + >>> for single_model in single_models: + >>> single_model.build(input_shape=(None, gridsize, features)) + >>> test_input = tf.random.uniform(shape=(1, gridsize, features)) + >>> multi_dense_output = multi_dense_model(test_input) + >>> single_dense_output = tf.stack([single_model(test_input) for single_model in single_models], axis=1) + >>> tf.reduce_all(tf.equal(multi_dense_output, single_dense_output)) + + Parameters + ---------- + replica_seeds: List[int] + List of seeds per replica for the kernel initializer. + kernel_initializer: Initializer + Initializer class for the kernel. + replica_input: bool (default: True) + Whether the input already contains multiple replicas. + """ + + def __init__( + self, + replica_seeds: List[int], + kernel_initializer: Initializer, + replica_input: bool = True, + **kwargs + ): + super().__init__(**kwargs) + self.replicas = len(replica_seeds) + self.replica_seeds = replica_seeds + self.kernel_initializer = MultiInitializer( + single_initializer=kernel_initializer, replica_seeds=replica_seeds + ) + self.bias_initializer = MultiInitializer( + single_initializer=self.bias_initializer, replica_seeds=replica_seeds + ) + self.replica_input = replica_input + + def build(self, input_shape): + input_dim = input_shape[-1] + self.kernel = self.add_weight( + name="kernel", + shape=(self.replicas, input_dim, self.units), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.replicas, 1, self.units), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + self.input_spec.axes = {-1: input_dim} + self.built = True + + def call(self, inputs): + """ + Compute output of shape (batch_size, replicas, gridsize, units). + + For the first layer, (self.replica_input is False), this is equivalent to + applying each replica separately and concatenating along the last axis. + If the input already contains multiple replica outputs, it is equivalent + to applying each replica to its corresponding input. + """ + if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: + inputs = tf.cast(inputs, dtype=self._compute_dtype_object) + + input_axes = 'brnf' if self.replica_input else 'bnf' + einrule = input_axes + ',rfg->brng' + outputs = tf.einsum(einrule, inputs, self.kernel) + + # Reshape the output back to the original ndim of the input. + if not tf.executing_eagerly(): + output_shape = self.compute_output_shape(inputs.shape.as_list()) + outputs.set_shape(output_shape) + + if self.use_bias: + outputs = outputs + self.bias + + if self.activation is not None: + outputs = self.activation(outputs) + + return outputs + + def compute_output_shape(self, input_shape): + # Remove the replica axis from the input shape. + if self.replica_input: + input_shape = input_shape[:1] + input_shape[2:] + + output_shape = super().compute_output_shape(input_shape) + + # Add back the replica axis to the output shape. + output_shape = output_shape[:1] + [self.replicas] + output_shape[1:] + + return output_shape + + def get_config(self): + config = super().get_config() + config.update({"replica_input": self.replica_input, "replica_seeds": self.replica_seeds}) + return config + + +class MultiInitializer(Initializer): + """ + Multi replica initializer that exactly replicates a stack of single replica initializers. + + Weights are stacked on the first axis, and per replica seeds are added to a base seed of the + given single replica initializer. + + Parameters + ---------- + single_initializer: Initializer + Initializer class for the kernel. + replica_seeds: List[int] + List of seeds per replica for the kernel initializer. + """ + + def __init__(self, single_initializer: Initializer, replica_seeds: List[int]): + self.initializer_class = type(single_initializer) + self.initializer_config = single_initializer.get_config() + self.base_seed = single_initializer.seed if hasattr(single_initializer, "seed") else None + self.replica_seeds = replica_seeds + + def __call__(self, shape, dtype=None, **kwargs): + shape = shape[1:] # Remove the replica axis from the shape. + per_replica_weights = [] + for replica_seed in self.replica_seeds: + if self.base_seed is not None: + self.initializer_config["seed"] = self.base_seed + replica_seed + single_initializer = self.initializer_class.from_config(self.initializer_config) + + per_replica_weights.append(single_initializer(shape, dtype, **kwargs)) + + return tf.stack(per_replica_weights, axis=0) diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index 885785a268..d93f3b1d9a 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -385,8 +385,8 @@ def check_consistent_parallel(parameters, parallel_models, same_trvl_per_replica "Replicas cannot be run in parallel with different training/validation " " masks, please set `same_trvl_per_replica` to True in the runcard" ) - if parameters.get("layer_type") != "dense": - raise CheckError("Parallelization has only been tested with layer_type=='dense'") + if parameters.get("layer_type") == "dense_per_flavour": + raise CheckError("Parallelization has not been tested with layer_type=='dense_per_flavour'") @make_argcheck @@ -427,10 +427,9 @@ def check_fiatlux_pdfs_id(replicas, fiatlux): f"Cannot generate a photon replica with id larger than the number of replicas of the PDFs set {luxset.name}:\nreplica id={max_id}, replicas of {luxset.name} = {pdfs_ids}" ) + @make_argcheck def check_multireplica_qed(replicas, fiatlux): if fiatlux is not None: if len(replicas) > 1: - raise CheckError( - "At the moment, running a multireplica QED fits is not allowed." - ) + raise CheckError("At the moment, running a multireplica QED fits is not allowed.") diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index fc180f392f..5a69fe9396 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -750,17 +750,28 @@ def initializer_generator(seed, i_layer): # list_of_pdf_layers[d][r] is the layer at depth d for replica r list_of_pdf_layers = [] for i_layer, (nodes_out, activation) in enumerate(zip(nodes_list, activations)): - layers = [ - base_layer_selector( + if layer_type == "dense": + layers = base_layer_selector( layer_type, - kernel_initializer=initializer_generator(replica_seed, i_layer), + replica_seeds=replica_seeds, + kernel_initializer=initializer_generator(0, i_layer), units=nodes_out, activation=activation, - input_shape=(nodes_in,), + replica_input=(i_layer != 0), **custom_args, ) - for replica_seed in replica_seeds - ] + else: + layers = [ + base_layer_selector( + layer_type, + kernel_initializer=initializer_generator(replica_seed, i_layer), + units=nodes_out, + activation=activation, + input_shape=(nodes_in,), + **custom_args, + ) + for replica_seed in replica_seeds + ] list_of_pdf_layers.append(layers) nodes_in = int(nodes_out) @@ -775,6 +786,14 @@ def initializer_generator(seed, i_layer): list_of_pdf_layers[-1] = [lambda x: concat(layer(x)) for layer in list_of_pdf_layers[-1]] # Apply all layers to the input to create the models + if layer_type == "dense": + pdfs = x_input + for layer in list_of_pdf_layers: + pdfs = layer(pdfs) + model = MetaModel({'NN_input': x_input}, pdfs, name=NN_LAYER_ALL_REPLICAS) + + return model + pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]] for layers in list_of_pdf_layers[1:]: diff --git a/n3fit/src/n3fit/tests/regressions/weights_1.h5 b/n3fit/src/n3fit/tests/regressions/weights_1.h5 index 7f9f930184..524ca5389d 100644 Binary files a/n3fit/src/n3fit/tests/regressions/weights_1.h5 and b/n3fit/src/n3fit/tests/regressions/weights_1.h5 differ diff --git a/n3fit/src/n3fit/tests/regressions/weights_2.h5 b/n3fit/src/n3fit/tests/regressions/weights_2.h5 index 51061a63f2..6b4bb3d669 100644 Binary files a/n3fit/src/n3fit/tests/regressions/weights_2.h5 and b/n3fit/src/n3fit/tests/regressions/weights_2.h5 differ diff --git a/n3fit/src/n3fit/tests/test_modelgen.py b/n3fit/src/n3fit/tests/test_modelgen.py index ffdaa254b6..c87523f838 100644 --- a/n3fit/src/n3fit/tests/test_modelgen.py +++ b/n3fit/src/n3fit/tests/test_modelgen.py @@ -26,16 +26,17 @@ def test_generate_dense_network(): - nn = generate_nn("dense", **COMMON_ARGS).get_layer(f"{NN_PREFIX}_0") + nn = generate_nn("dense", **COMMON_ARGS) # The number of layers should be input layer + len(OUT_SIZES) assert len(nn.layers) == len(OUT_SIZES) + 1 # Check that the number of parameters is as expected - # We expect 4 weights where the two first ones are - # (INSIZE, OUT_SIZE[0]) (OUT_SIZE[0],) - # and the second one - # (OUT_SIZE[0], OUT_SIZE[1]) (OUT_SIZE[1],) - expected_sizes = [(INSIZE, OUT_SIZES[0]), (OUT_SIZES[0],), OUT_SIZES, (OUT_SIZES[1],)] + expected_sizes = [ + (1, INSIZE, OUT_SIZES[0]), + (1, 1, OUT_SIZES[0]), + (1, *OUT_SIZES), + (1, 1, OUT_SIZES[1]), + ] for weight, esize in zip(nn.weights, expected_sizes): assert weight.shape == esize diff --git a/n3fit/src/n3fit/tests/test_multidense.py b/n3fit/src/n3fit/tests/test_multidense.py new file mode 100644 index 0000000000..b912c0bd82 --- /dev/null +++ b/n3fit/src/n3fit/tests/test_multidense.py @@ -0,0 +1,68 @@ +import numpy as np +import tensorflow as tf +from tensorflow.keras import Sequential +from tensorflow.keras.initializers import GlorotUniform +from tensorflow.keras.layers import Dense + +from n3fit.backends.keras_backend.multi_dense import MultiDense +from n3fit.model_gen import generate_nn + + +def test_multidense(): + replicas = 2 + multi_dense_model = Sequential( + [ + MultiDense( + units=8, + replica_seeds=[42, 43], + replica_input=False, + kernel_initializer=GlorotUniform(seed=0), + ), + MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=100)), + ] + ) + single_models = [ + Sequential( + [ + Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)), + Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r + 100)), + ] + ) + for r in range(replicas) + ] + + gridsize, features = 100, 3 + multi_dense_model.build(input_shape=(None, gridsize, features)) + for single_model in single_models: + single_model.build(input_shape=(None, gridsize, features)) + + test_input = tf.random.uniform(shape=(1, gridsize, features)) + multi_dense_output = multi_dense_model(test_input) + single_dense_output = tf.stack( + [single_model(test_input) for single_model in single_models], axis=1 + ) + + np.testing.assert_allclose(multi_dense_output, single_dense_output, atol=1e-6, rtol=1e-4) + + +def test_initializers(): + input_shape = (None, 3, 1) + dense_layers = [] + for r in range(2): + dense_layer = Dense(units=2, kernel_initializer=GlorotUniform(seed=42 + r)) + dense_layer.build(input_shape=input_shape) + dense_layers.append(dense_layer) + stacked_weights = tf.stack([dense_layer.weights[0] for dense_layer in dense_layers], axis=0) + + multi_dense_layer = MultiDense( + units=2, + replica_seeds=[0, 1], + replica_input=False, + kernel_initializer=GlorotUniform(seed=42), + ) + multi_dense_layer.build(input_shape=input_shape) + + multi_dense_weights = multi_dense_layer.weights[0].numpy() + stacked_weights = stacked_weights.numpy() + + np.testing.assert_allclose(multi_dense_weights, stacked_weights)