miscelaneous fixes for tensorflow 2.16

kicking down the can recover previous behaviour try-except for 3.11 deal with type missmatch make sure units are int remove pdb fix change in how weights are named Update n3fit/src/n3fit/tests/test_hyperopt.py 0 is understood as None by initializer change scope of hyperopt test bugfix 312
NNPDF · Mar 4, 2024 · db760e4 · db760e4
1 parent b2cd6fb
commit db760e4
Show file tree

Hide file tree

Showing 12 changed files with 105 additions and 78 deletions.
diff --git a/.github/workflows/python_installation.yml b/.github/workflows/python_installation.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.11"]
+        python-version: ["3.11", "3.12"]
         include:
           - os: ubuntu-latest
             CONDA_OS: linux-64

diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py b/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
@@ -9,12 +9,7 @@
 """
 
 from tensorflow.keras.layers import Layer
-from tensorflow.keras.initializers import (
-    Constant,
-    RandomUniform,
-    glorot_normal,
-    glorot_uniform,
-)
+from tensorflow.keras.initializers import Constant, RandomUniform, glorot_normal, glorot_uniform
 
 # Define in this dictionary new initializers as well as the arguments they accept (with default values if needed be)
 initializers = {
@@ -37,9 +32,7 @@ class MetaLayer(Layer):
     weight_inits = []
 
     # Building function
-    def builder_helper(
-        self, name, kernel_shape, initializer, trainable=True, constraint=None
-    ):
+    def builder_helper(self, name, kernel_shape, initializer, trainable=True, constraint=None):
         """
         Creates a kernel that should be saved as an attribute of the caller class
         name: name of the kernel
@@ -73,9 +66,9 @@ def get_weight_by_name(self, weight_name, internal_count=0):
             weight_name: str
                 Name of the weight
         """
-        check_name = f"{self.name}/{weight_name}:{internal_count}"
+        main_name = f"{self.name}/{weight_name}"
         for weight in self.weights:
-            if weight.name == check_name:
+            if weight.name in (f"{main_name}:{internal_count}", main_name, weight_name):
                 return weight
         return None
 

diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
@@ -6,8 +6,8 @@
 """
 
 import re
+import shutil
 
-import h5py
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras import optimizers as Kopt
@@ -16,12 +16,6 @@
 
 import n3fit.backends.keras_backend.operations as op
 
-# Check the TF version to check if legacy-mode is needed (TF < 2.2)
-tf_version = tf.__version__.split(".")
-if int(tf_version[0]) == 2 and int(tf_version[1]) < 2:
-    raise NotImplementedError("n3fit needs TF > 2.2 in order to work")
-
-
 # We need a function to transform tensors to numpy/python primitives
 # which is not part of the official TF interface and can change with the version
 if hasattr(tf_utils, "to_numpy_or_python_type"):
@@ -414,6 +408,17 @@ def load_identical_replicas(self, model_file):
         for i_replica in range(self.num_replicas):
             self.set_replica_weights(weights, i_replica)
 
+    def save_weights(self, file, save_format="h5"):
+        """
+        Compatibility function for tf < 2.16
+        """
+        try:
+            super().save_weights(file, save_format=save_format)
+        except TypeError:
+            new_file = file.with_suffix(".weights.h5")
+            super().save_weights(new_file)
+            shutil.move(new_file, file)
+
 
 def is_stacked_single_replicas(layer):
     """

diff --git a/n3fit/src/n3fit/backends/keras_backend/callbacks.py b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
@@ -10,9 +10,10 @@
 
 import logging
 from time import time
+
 import numpy as np
 import tensorflow as tf
-from tensorflow.keras.callbacks import TensorBoard, Callback
+from tensorflow.keras.callbacks import Callback, TensorBoard
 
 log = logging.getLogger(__name__)
 
@@ -30,7 +31,7 @@ def __init__(self, count_range=100):
         self.last_time = 0
 
     def on_epoch_end(self, epoch, logs=None):
-        """ At the end of every epoch it checks the time """
+        """At the end of every epoch it checks the time"""
         new_time = time()
         if epoch == 0:
             # The first epoch is only useful for starting
@@ -45,13 +46,13 @@ def on_epoch_end(self, epoch, logs=None):
         self.last_time = new_time
 
     def on_train_end(self, logs=None):
-        """ Print the results """
+        """Print the results"""
         total_time = time() - self.starting_time
         n_times = len(self.all_times)
         # Skip the first 100 epochs to avoid fluctuations due to compilations of part of the code
         # by epoch 100 all parts of the code have usually been called so it's a good compromise
-        mean = np.mean(self.all_times[min(110, n_times-1):])
-        std = np.std(self.all_times[min(110, n_times-1):])
+        mean = np.mean(self.all_times[min(110, n_times - 1) :])
+        std = np.std(self.all_times[min(110, n_times - 1) :])
         log.info(f"> > Average time per epoch: {mean:.5} +- {std:.5} s")
         log.info(f"> > > Total time: {total_time/60:.5} min")
 
@@ -75,9 +76,15 @@ def __init__(self, stopping_object, log_freq=100):
         super().__init__()
         self.log_freq = log_freq
         self.stopping_object = stopping_object
+        self._current_loss = None
+
+    def on_epoch_begin(self, epoch, logs=None):
+        # TODO This is an unnecessary performance hit, just for testing
+        self._current_loss = self.model.compute_losses()
 
     def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+        """Function to be called at the end of every epoch"""
+        logs = self._current_loss
         print_stats = ((epoch + 1) % self.log_freq) == 0
         # Note that the input logs correspond to the fit before the weights are updated
         self.stopping_object.monitor_chi2(logs, epoch, print_stats=print_stats)
@@ -117,7 +124,7 @@ def __init__(self, datasets, multipliers, update_freq=100):
         self.updateable_weights = []
 
     def on_train_begin(self, logs=None):
-        """ Save an instance of all relevant layers """
+        """Save an instance of all relevant layers"""
         for layer_name in self.datasets:
             layer = self.model.get_layer(layer_name)
             self.updateable_weights.append(layer.weights)
@@ -133,7 +140,7 @@ def _update_weights(self):
                 w.assign(w * multiplier)
 
     def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+        """Function to be called at the end of every epoch"""
         if (epoch + 1) % self.update_freq == 0:
             self._update_weights()
 

diff --git a/n3fit/src/n3fit/backends/keras_backend/constraints.py b/n3fit/src/n3fit/backends/keras_backend/constraints.py
@@ -3,8 +3,8 @@
 """
 
 import tensorflow as tf
-from tensorflow.keras.constraints import MinMaxNorm
 from tensorflow.keras import backend as K
+from tensorflow.keras.constraints import MinMaxNorm
 
 
 class MinMaxWeight(MinMaxNorm):
@@ -14,15 +14,11 @@ class MinMaxWeight(MinMaxNorm):
     """
 
     def __init__(self, min_value, max_value, **kwargs):
-        super(MinMaxWeight, self).__init__(
-            min_value=min_value, max_value=max_value, **kwargs
-        )
+        super(MinMaxWeight, self).__init__(min_value=min_value, max_value=max_value, **kwargs)
 
-    @tf.function
     def __call__(self, w):
         norms = K.sum(w, axis=self.axis, keepdims=True)
         desired = (
-            self.rate * K.clip(norms, self.min_value, self.max_value)
-            + (1 - self.rate) * norms
+            self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms
         )
         return w * desired / (K.epsilon() + norms)
diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
@@ -99,8 +99,8 @@ def call(self, inputs):
         If the input already contains multiple replica outputs, it is equivalent
         to applying each replica to its corresponding input.
         """
-        if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype:
-            inputs = tf.cast(inputs, dtype=self._compute_dtype_object)
+        # cast always
+        inputs = tf.cast(inputs, dtype=self.compute_dtype)
 
         outputs = self.matmul(inputs)
 

diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py
@@ -22,8 +22,10 @@
     Note that tensor operations can also be applied to layers as the output of a layer is a tensor
     equally operations are automatically converted to layers when used as such.
 """
+
 from typing import Optional
 
+import keras
 import numpy as np
 import numpy.typing as npt
 import tensorflow as tf
@@ -249,11 +251,15 @@ def concatenate(tensor_list, axis=-1, target_shape=None, name=None):
     Concatenates a list of numbers or tensor into a bigger tensor
     If the target shape is given, the output is reshaped to said shape
     """
-    concatenated_tensor = tf.concat(tensor_list, axis, name=name)
-    if target_shape:
-        return K.reshape(concatenated_tensor, target_shape)
-    else:
+    try:
+        # For tensorflow >= 2.16, Keras >= 3
+        concatenated_tensor = keras.ops.concatenate(tensor_list, axis=axis)
+    except AttributeError:
+        concatenated_tensor = tf.concat(tensor_list, axis=axis)
+
+    if target_shape is None:
         return concatenated_tensor
+    return K.reshape(concatenated_tensor, target_shape)
 
 
 # Mathematical operations

diff --git a/n3fit/src/n3fit/hyper_optimization/hyper_scan.py b/n3fit/src/n3fit/hyper_optimization/hyper_scan.py
@@ -16,6 +16,7 @@
 import logging
 
 import hyperopt
+from hyperopt.pyll.base import scope
 import numpy as np
 
 from n3fit.backends import MetaLayer, MetaModel
@@ -36,15 +37,19 @@ def hp_uniform(key, lower_end, higher_end):
     return hyperopt.hp.uniform(key, lower_end, higher_end)
 
 
-def hp_quniform(key, lower_end, higher_end, step_size=None, steps=None):
+def hp_quniform(key, lower_end, higher_end, step_size=None, steps=None, make_int=False):
     """Like uniform but admits a step_size"""
     if lower_end is None or higher_end is None:
         return None
     if not step_size:
         step_size = lower_end
     if steps:
         step_size = (higher_end - lower_end) / steps
-    return hyperopt.hp.quniform(key, lower_end, higher_end, step_size)
+
+    ret = hyperopt.hp.quniform(key, lower_end, higher_end, step_size)
+    if make_int:
+        ret = scope.int(ret)
+    return ret
 
 
 def hp_loguniform(key, lower_end, higher_end):
@@ -276,7 +281,7 @@ def stopping(self, min_epochs=None, max_epochs=None, min_patience=None, max_pati
         stopping_key = "stopping_patience"
 
         if min_epochs is not None and max_epochs is not None:
-            epochs = hp_quniform(epochs_key, min_epochs, max_epochs, step_size=1)
+            epochs = hp_quniform(epochs_key, min_epochs, max_epochs, step_size=1, make_int=True)
             self._update_param(epochs_key, epochs)
 
         if min_patience is not None or max_patience is not None:
@@ -429,7 +434,9 @@ def architecture(
             units = []
             for i in range(n):
                 units_label = "nl{0}:-{1}/{0}".format(n, i)
-                units_sampler = hp_quniform(units_label, min_units, max_units, step_size=1)
+                units_sampler = hp_quniform(
+                    units_label, min_units, max_units, step_size=1, make_int=True
+                )
                 units.append(units_sampler)
             # The number of nodes in the last layer are read from the runcard
             units.append(output_size)

diff --git a/n3fit/src/n3fit/layers/observable.py b/n3fit/src/n3fit/layers/observable.py
@@ -1,6 +1,8 @@
-from n3fit.backends import MetaLayer
+from abc import ABC, abstractmethod
+
 import numpy as np
-from abc import abstractmethod, ABC
+
+from n3fit.backends import MetaLayer
 from n3fit.backends import operations as op
 
 
@@ -68,9 +70,6 @@ def __init__(self, fktable_data, fktable_arr, operation_name, nfl=14, **kwargs):
         self.operation = op.c_to_py_fun(operation_name)
         self.output_dim = self.fktables[0].shape[0]
 
-    def compute_output_shape(self, input_shape):
-        return (self.output_dim, None)
-
     # Overridables
     @abstractmethod
     def gen_mask(self, basis):

diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
@@ -9,6 +9,7 @@
 
 
 """
+
 from dataclasses import dataclass
 from typing import Callable, List
 
@@ -73,7 +74,9 @@ def _generate_loss(self, mask=None):
         if self.invcovmat is not None:
             if self.rotation:
                 # If we have a matrix diagonal only, padd with 0s and hope it's not too heavy on memory
-                invcovmat_matrix = np.eye(self.invcovmat.shape[-1]) * self.invcovmat[..., np.newaxis]
+                invcovmat_matrix = (
+                    np.eye(self.invcovmat.shape[-1]) * self.invcovmat[..., np.newaxis]
+                )
                 if self.covmat is not None:
                     covmat_matrix = np.eye(self.covmat.shape[-1]) * self.covmat[..., np.newaxis]
                 else:
@@ -82,11 +85,7 @@ def _generate_loss(self, mask=None):
                 covmat_matrix = self.covmat
                 invcovmat_matrix = self.invcovmat
             loss = losses.LossInvcovmat(
-                invcovmat_matrix,
-                self.data,
-                mask,
-                covmat=covmat_matrix,
-                name=self.name
+                invcovmat_matrix, self.data, mask, covmat=covmat_matrix, name=self.name
             )
         elif self.positivity:
             loss = losses.LossPositivity(name=self.name, c=self.multiplier)
@@ -642,9 +641,10 @@ def compute_unnormalized_pdf(x):
 
         if photons:
             # add batch and flavor dimensions
-            photon_integrals = op.batchit(op.batchit(photons.integral))
+            ph_tensor = op.numpy_to_tensor(photons.integral)
+            photon_integrals = op.batchit(op.batchit(ph_tensor))
         else:
-            photon_integrals = np.zeros((1, num_replicas, 1))
+            photon_integrals = op.numpy_to_tensor(np.zeros((1, num_replicas, 1)))
 
         PDFs_normalized = sumrule_layer(
             {
@@ -737,7 +737,7 @@ def layer_generator(i_layer, nodes_out, activation):
                 layer = base_layer_selector(
                     layer_type,
                     kernel_initializer=initializers,
-                    units=nodes_out,
+                    units=int(nodes_out),
                     activation=activation,
                     input_shape=(nodes_in,),
                     basis_size=basis_size,
@@ -755,7 +755,7 @@ def layer_generator(i_layer, nodes_out, activation):
                 layer_type,
                 replica_seeds=replica_seeds,
                 kernel_initializer=MetaLayer.select_initializer(initializer_name, seed=i_layer),
-                units=nodes_out,
+                units=int(nodes_out),
                 activation=activation,
                 is_first_layer=(i_layer == 0),
                 regularizer=reg,