From db760e49a212735112e31d0341dcc87897232320 Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Mon, 4 Mar 2024 12:03:58 +0100
Subject: [PATCH] miscelaneous fixes for tensorflow 2.16

kicking down the can

recover previous behaviour

try-except for 3.11

deal with type missmatch

make sure units are int

remove pdb

fix change in how weights are named

Update n3fit/src/n3fit/tests/test_hyperopt.py

0 is understood as None by initializer

change scope of hyperopt test

bugfix

312
---
 .github/workflows/python_installation.yml     |  2 +-
 .../n3fit/backends/keras_backend/MetaLayer.py | 15 +++------
 .../n3fit/backends/keras_backend/MetaModel.py | 19 +++++++-----
 .../n3fit/backends/keras_backend/callbacks.py | 23 +++++++++-----
 .../backends/keras_backend/constraints.py     | 10 ++----
 .../backends/keras_backend/multi_dense.py     |  4 +--
 .../backends/keras_backend/operations.py      | 14 ++++++---
 .../n3fit/hyper_optimization/hyper_scan.py    | 15 ++++++---
 n3fit/src/n3fit/layers/observable.py          |  9 +++---
 n3fit/src/n3fit/model_gen.py                  | 20 ++++++------
 n3fit/src/n3fit/tests/test_hyperopt.py        | 21 +++++++++----
 n3fit/src/n3fit/tests/test_multidense.py      | 31 +++++++++++--------
 12 files changed, 105 insertions(+), 78 deletions(-)

diff --git a/.github/workflows/python_installation.yml b/.github/workflows/python_installation.yml
index 1496c5f743..9116de7943 100644
--- a/.github/workflows/python_installation.yml
+++ b/.github/workflows/python_installation.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.11"]
+        python-version: ["3.11", "3.12"]
         include:
           - os: ubuntu-latest
             CONDA_OS: linux-64
diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py b/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
index af4210d603..31def842c4 100644
--- a/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
+++ b/n3fit/src/n3fit/backends/keras_backend/MetaLayer.py
@@ -9,12 +9,7 @@
 """
 
 from tensorflow.keras.layers import Layer
-from tensorflow.keras.initializers import (
-    Constant,
-    RandomUniform,
-    glorot_normal,
-    glorot_uniform,
-)
+from tensorflow.keras.initializers import Constant, RandomUniform, glorot_normal, glorot_uniform
 
 # Define in this dictionary new initializers as well as the arguments they accept (with default values if needed be)
 initializers = {
@@ -37,9 +32,7 @@ class MetaLayer(Layer):
     weight_inits = []
 
     # Building function
-    def builder_helper(
-        self, name, kernel_shape, initializer, trainable=True, constraint=None
-    ):
+    def builder_helper(self, name, kernel_shape, initializer, trainable=True, constraint=None):
         """
         Creates a kernel that should be saved as an attribute of the caller class
         name: name of the kernel
@@ -73,9 +66,9 @@ def get_weight_by_name(self, weight_name, internal_count=0):
             weight_name: str
                 Name of the weight
         """
-        check_name = f"{self.name}/{weight_name}:{internal_count}"
+        main_name = f"{self.name}/{weight_name}"
         for weight in self.weights:
-            if weight.name == check_name:
+            if weight.name in (f"{main_name}:{internal_count}", main_name, weight_name):
                 return weight
         return None
 
diff --git a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
index 2533d8456f..2956a25d2e 100644
--- a/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
+++ b/n3fit/src/n3fit/backends/keras_backend/MetaModel.py
@@ -6,8 +6,8 @@
 """
 
 import re
+import shutil
 
-import h5py
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras import optimizers as Kopt
@@ -16,12 +16,6 @@
 
 import n3fit.backends.keras_backend.operations as op
 
-# Check the TF version to check if legacy-mode is needed (TF < 2.2)
-tf_version = tf.__version__.split(".")
-if int(tf_version[0]) == 2 and int(tf_version[1]) < 2:
-    raise NotImplementedError("n3fit needs TF > 2.2 in order to work")
-
-
 # We need a function to transform tensors to numpy/python primitives
 # which is not part of the official TF interface and can change with the version
 if hasattr(tf_utils, "to_numpy_or_python_type"):
@@ -414,6 +408,17 @@ def load_identical_replicas(self, model_file):
         for i_replica in range(self.num_replicas):
             self.set_replica_weights(weights, i_replica)
 
+    def save_weights(self, file, save_format="h5"):
+        """
+        Compatibility function for tf < 2.16
+        """
+        try:
+            super().save_weights(file, save_format=save_format)
+        except TypeError:
+            new_file = file.with_suffix(".weights.h5")
+            super().save_weights(new_file)
+            shutil.move(new_file, file)
+
 
 def is_stacked_single_replicas(layer):
     """
diff --git a/n3fit/src/n3fit/backends/keras_backend/callbacks.py b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
index 7349d6be36..c72ea9fb5c 100644
--- a/n3fit/src/n3fit/backends/keras_backend/callbacks.py
+++ b/n3fit/src/n3fit/backends/keras_backend/callbacks.py
@@ -10,9 +10,10 @@
 
 import logging
 from time import time
+
 import numpy as np
 import tensorflow as tf
-from tensorflow.keras.callbacks import TensorBoard, Callback
+from tensorflow.keras.callbacks import Callback, TensorBoard
 
 log = logging.getLogger(__name__)
 
@@ -30,7 +31,7 @@ def __init__(self, count_range=100):
         self.last_time = 0
 
     def on_epoch_end(self, epoch, logs=None):
-        """ At the end of every epoch it checks the time """
+        """At the end of every epoch it checks the time"""
         new_time = time()
         if epoch == 0:
             # The first epoch is only useful for starting
@@ -45,13 +46,13 @@ def on_epoch_end(self, epoch, logs=None):
         self.last_time = new_time
 
     def on_train_end(self, logs=None):
-        """ Print the results """
+        """Print the results"""
         total_time = time() - self.starting_time
         n_times = len(self.all_times)
         # Skip the first 100 epochs to avoid fluctuations due to compilations of part of the code
         # by epoch 100 all parts of the code have usually been called so it's a good compromise
-        mean = np.mean(self.all_times[min(110, n_times-1):])
-        std = np.std(self.all_times[min(110, n_times-1):])
+        mean = np.mean(self.all_times[min(110, n_times - 1) :])
+        std = np.std(self.all_times[min(110, n_times - 1) :])
         log.info(f"> > Average time per epoch: {mean:.5} +- {std:.5} s")
         log.info(f"> > > Total time: {total_time/60:.5} min")
 
@@ -75,9 +76,15 @@ def __init__(self, stopping_object, log_freq=100):
         super().__init__()
         self.log_freq = log_freq
         self.stopping_object = stopping_object
+        self._current_loss = None
+
+    def on_epoch_begin(self, epoch, logs=None):
+        # TODO This is an unnecessary performance hit, just for testing
+        self._current_loss = self.model.compute_losses()
 
     def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+        """Function to be called at the end of every epoch"""
+        logs = self._current_loss
         print_stats = ((epoch + 1) % self.log_freq) == 0
         # Note that the input logs correspond to the fit before the weights are updated
         self.stopping_object.monitor_chi2(logs, epoch, print_stats=print_stats)
@@ -117,7 +124,7 @@ def __init__(self, datasets, multipliers, update_freq=100):
         self.updateable_weights = []
 
     def on_train_begin(self, logs=None):
-        """ Save an instance of all relevant layers """
+        """Save an instance of all relevant layers"""
         for layer_name in self.datasets:
             layer = self.model.get_layer(layer_name)
             self.updateable_weights.append(layer.weights)
@@ -133,7 +140,7 @@ def _update_weights(self):
                 w.assign(w * multiplier)
 
     def on_epoch_end(self, epoch, logs=None):
-        """ Function to be called at the end of every epoch """
+        """Function to be called at the end of every epoch"""
         if (epoch + 1) % self.update_freq == 0:
             self._update_weights()
 
diff --git a/n3fit/src/n3fit/backends/keras_backend/constraints.py b/n3fit/src/n3fit/backends/keras_backend/constraints.py
index b186cd2638..5b1bd8d413 100644
--- a/n3fit/src/n3fit/backends/keras_backend/constraints.py
+++ b/n3fit/src/n3fit/backends/keras_backend/constraints.py
@@ -3,8 +3,8 @@
 """
 
 import tensorflow as tf
-from tensorflow.keras.constraints import MinMaxNorm
 from tensorflow.keras import backend as K
+from tensorflow.keras.constraints import MinMaxNorm
 
 
 class MinMaxWeight(MinMaxNorm):
@@ -14,15 +14,11 @@ class MinMaxWeight(MinMaxNorm):
     """
 
     def __init__(self, min_value, max_value, **kwargs):
-        super(MinMaxWeight, self).__init__(
-            min_value=min_value, max_value=max_value, **kwargs
-        )
+        super(MinMaxWeight, self).__init__(min_value=min_value, max_value=max_value, **kwargs)
 
-    @tf.function
     def __call__(self, w):
         norms = K.sum(w, axis=self.axis, keepdims=True)
         desired = (
-            self.rate * K.clip(norms, self.min_value, self.max_value)
-            + (1 - self.rate) * norms
+            self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms
         )
         return w * desired / (K.epsilon() + norms)
diff --git a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
index 9fa0ac5835..3b2037c47e 100644
--- a/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
+++ b/n3fit/src/n3fit/backends/keras_backend/multi_dense.py
@@ -99,8 +99,8 @@ def call(self, inputs):
         If the input already contains multiple replica outputs, it is equivalent
         to applying each replica to its corresponding input.
         """
-        if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype:
-            inputs = tf.cast(inputs, dtype=self._compute_dtype_object)
+        # cast always
+        inputs = tf.cast(inputs, dtype=self.compute_dtype)
 
         outputs = self.matmul(inputs)
 
diff --git a/n3fit/src/n3fit/backends/keras_backend/operations.py b/n3fit/src/n3fit/backends/keras_backend/operations.py
index fe62dfd24e..cbf0641276 100644
--- a/n3fit/src/n3fit/backends/keras_backend/operations.py
+++ b/n3fit/src/n3fit/backends/keras_backend/operations.py
@@ -22,8 +22,10 @@
     Note that tensor operations can also be applied to layers as the output of a layer is a tensor
     equally operations are automatically converted to layers when used as such.
 """
+
 from typing import Optional
 
+import keras
 import numpy as np
 import numpy.typing as npt
 import tensorflow as tf
@@ -249,11 +251,15 @@ def concatenate(tensor_list, axis=-1, target_shape=None, name=None):
     Concatenates a list of numbers or tensor into a bigger tensor
     If the target shape is given, the output is reshaped to said shape
     """
-    concatenated_tensor = tf.concat(tensor_list, axis, name=name)
-    if target_shape:
-        return K.reshape(concatenated_tensor, target_shape)
-    else:
+    try:
+        # For tensorflow >= 2.16, Keras >= 3
+        concatenated_tensor = keras.ops.concatenate(tensor_list, axis=axis)
+    except AttributeError:
+        concatenated_tensor = tf.concat(tensor_list, axis=axis)
+
+    if target_shape is None:
         return concatenated_tensor
+    return K.reshape(concatenated_tensor, target_shape)
 
 
 # Mathematical operations
diff --git a/n3fit/src/n3fit/hyper_optimization/hyper_scan.py b/n3fit/src/n3fit/hyper_optimization/hyper_scan.py
index 174e921677..f06234ff6a 100644
--- a/n3fit/src/n3fit/hyper_optimization/hyper_scan.py
+++ b/n3fit/src/n3fit/hyper_optimization/hyper_scan.py
@@ -16,6 +16,7 @@
 import logging
 
 import hyperopt
+from hyperopt.pyll.base import scope
 import numpy as np
 
 from n3fit.backends import MetaLayer, MetaModel
@@ -36,7 +37,7 @@ def hp_uniform(key, lower_end, higher_end):
     return hyperopt.hp.uniform(key, lower_end, higher_end)
 
 
-def hp_quniform(key, lower_end, higher_end, step_size=None, steps=None):
+def hp_quniform(key, lower_end, higher_end, step_size=None, steps=None, make_int=False):
     """Like uniform but admits a step_size"""
     if lower_end is None or higher_end is None:
         return None
@@ -44,7 +45,11 @@ def hp_quniform(key, lower_end, higher_end, step_size=None, steps=None):
         step_size = lower_end
     if steps:
         step_size = (higher_end - lower_end) / steps
-    return hyperopt.hp.quniform(key, lower_end, higher_end, step_size)
+
+    ret = hyperopt.hp.quniform(key, lower_end, higher_end, step_size)
+    if make_int:
+        ret = scope.int(ret)
+    return ret
 
 
 def hp_loguniform(key, lower_end, higher_end):
@@ -276,7 +281,7 @@ def stopping(self, min_epochs=None, max_epochs=None, min_patience=None, max_pati
         stopping_key = "stopping_patience"
 
         if min_epochs is not None and max_epochs is not None:
-            epochs = hp_quniform(epochs_key, min_epochs, max_epochs, step_size=1)
+            epochs = hp_quniform(epochs_key, min_epochs, max_epochs, step_size=1, make_int=True)
             self._update_param(epochs_key, epochs)
 
         if min_patience is not None or max_patience is not None:
@@ -429,7 +434,9 @@ def architecture(
             units = []
             for i in range(n):
                 units_label = "nl{0}:-{1}/{0}".format(n, i)
-                units_sampler = hp_quniform(units_label, min_units, max_units, step_size=1)
+                units_sampler = hp_quniform(
+                    units_label, min_units, max_units, step_size=1, make_int=True
+                )
                 units.append(units_sampler)
             # The number of nodes in the last layer are read from the runcard
             units.append(output_size)
diff --git a/n3fit/src/n3fit/layers/observable.py b/n3fit/src/n3fit/layers/observable.py
index 739d7c775b..3337c37885 100644
--- a/n3fit/src/n3fit/layers/observable.py
+++ b/n3fit/src/n3fit/layers/observable.py
@@ -1,6 +1,8 @@
-from n3fit.backends import MetaLayer
+from abc import ABC, abstractmethod
+
 import numpy as np
-from abc import abstractmethod, ABC
+
+from n3fit.backends import MetaLayer
 from n3fit.backends import operations as op
 
 
@@ -68,9 +70,6 @@ def __init__(self, fktable_data, fktable_arr, operation_name, nfl=14, **kwargs):
         self.operation = op.c_to_py_fun(operation_name)
         self.output_dim = self.fktables[0].shape[0]
 
-    def compute_output_shape(self, input_shape):
-        return (self.output_dim, None)
-
     # Overridables
     @abstractmethod
     def gen_mask(self, basis):
diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py
index 219bbdfc11..8c4c30c59a 100644
--- a/n3fit/src/n3fit/model_gen.py
+++ b/n3fit/src/n3fit/model_gen.py
@@ -9,6 +9,7 @@
 
 
 """
+
 from dataclasses import dataclass
 from typing import Callable, List
 
@@ -73,7 +74,9 @@ def _generate_loss(self, mask=None):
         if self.invcovmat is not None:
             if self.rotation:
                 # If we have a matrix diagonal only, padd with 0s and hope it's not too heavy on memory
-                invcovmat_matrix = np.eye(self.invcovmat.shape[-1]) * self.invcovmat[..., np.newaxis]
+                invcovmat_matrix = (
+                    np.eye(self.invcovmat.shape[-1]) * self.invcovmat[..., np.newaxis]
+                )
                 if self.covmat is not None:
                     covmat_matrix = np.eye(self.covmat.shape[-1]) * self.covmat[..., np.newaxis]
                 else:
@@ -82,11 +85,7 @@ def _generate_loss(self, mask=None):
                 covmat_matrix = self.covmat
                 invcovmat_matrix = self.invcovmat
             loss = losses.LossInvcovmat(
-                invcovmat_matrix,
-                self.data,
-                mask,
-                covmat=covmat_matrix,
-                name=self.name
+                invcovmat_matrix, self.data, mask, covmat=covmat_matrix, name=self.name
             )
         elif self.positivity:
             loss = losses.LossPositivity(name=self.name, c=self.multiplier)
@@ -642,9 +641,10 @@ def compute_unnormalized_pdf(x):
 
         if photons:
             # add batch and flavor dimensions
-            photon_integrals = op.batchit(op.batchit(photons.integral))
+            ph_tensor = op.numpy_to_tensor(photons.integral)
+            photon_integrals = op.batchit(op.batchit(ph_tensor))
         else:
-            photon_integrals = np.zeros((1, num_replicas, 1))
+            photon_integrals = op.numpy_to_tensor(np.zeros((1, num_replicas, 1)))
 
         PDFs_normalized = sumrule_layer(
             {
@@ -737,7 +737,7 @@ def layer_generator(i_layer, nodes_out, activation):
                 layer = base_layer_selector(
                     layer_type,
                     kernel_initializer=initializers,
-                    units=nodes_out,
+                    units=int(nodes_out),
                     activation=activation,
                     input_shape=(nodes_in,),
                     basis_size=basis_size,
@@ -755,7 +755,7 @@ def layer_generator(i_layer, nodes_out, activation):
                 layer_type,
                 replica_seeds=replica_seeds,
                 kernel_initializer=MetaLayer.select_initializer(initializer_name, seed=i_layer),
-                units=nodes_out,
+                units=int(nodes_out),
                 activation=activation,
                 is_first_layer=(i_layer == 0),
                 regularizer=reg,
diff --git a/n3fit/src/n3fit/tests/test_hyperopt.py b/n3fit/src/n3fit/tests/test_hyperopt.py
index cecc747452..f53377b083 100644
--- a/n3fit/src/n3fit/tests/test_hyperopt.py
+++ b/n3fit/src/n3fit/tests/test_hyperopt.py
@@ -32,13 +32,21 @@ def load_data(info_file):
 
 
 def test_restart_from_pickle(tmp_path):
-    """Ensure that our hyperopt restart works as expected"""
+    """Ensure that after a hyperopt restart, the testing continues
+    from the same point.
+    The test is set up so that it does one trial, then stops, then a second one
+    And then this is compared with two trials one after the other.
+
+    The test checks that the starting point of the second trial is the same in both cases
+    """
     # Prepare the run
     quickcard = f"hyper-{QUICKNAME}.yml"
     quickpath = REGRESSION_FOLDER / quickcard
-    # Set up some options
-    n_trials_stop = 2
-    n_trials_total = 4
+
+    # Set the test up so that it does one trial, then stops, then does another one
+    # and then we do two
+    n_trials_stop = 1
+    n_trials_total = 2
     output_restart = tmp_path / f"run_{n_trials_stop}_trials_and_then_{n_trials_total}_trials"
     output_direct = tmp_path / f"run_{n_trials_total}_trials"
 
@@ -46,7 +54,7 @@ def test_restart_from_pickle(tmp_path):
     shutil.copy(quickpath, tmp_path)
     # run some trials for the first time
     sp.run(
-        f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_stop} " f"-o {output_restart}".split(),
+        f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_stop} -o {output_restart}".split(),
         cwd=tmp_path,
         check=True,
     )
@@ -78,4 +86,5 @@ def test_restart_from_pickle(tmp_path):
         assert restart_json[i]['misc'] == direct_json[i]['misc']
         assert restart_json[i]['state'] == direct_json[i]['state']
         assert restart_json[i]['tid'] == direct_json[i]['tid']
-        assert restart_json[i]['result'] == direct_json[i]['result']
+        assert restart_json[i]['misc']['idxs'] == direct_json[i]['misc']['idxs']
+    # Note that it doesn't check the final loss of the second trial
diff --git a/n3fit/src/n3fit/tests/test_multidense.py b/n3fit/src/n3fit/tests/test_multidense.py
index 6c7df89e3a..e2a8ddc412 100644
--- a/n3fit/src/n3fit/tests/test_multidense.py
+++ b/n3fit/src/n3fit/tests/test_multidense.py
@@ -15,20 +15,21 @@ def test_multidense():
                 units=8,
                 replica_seeds=[42, 43],
                 is_first_layer=True,
-                kernel_initializer=GlorotUniform(seed=0),
+                kernel_initializer=GlorotUniform(seed=5),
             ),
             MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=100)),
         ]
     )
-    single_models = [
-        Sequential(
-            [
-                Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)),
-                Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r + 100)),
-            ]
+    single_models = []
+    for r in range(replicas):
+        single_models.append(
+            Sequential(
+                [
+                    Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r + 5)),
+                    Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r + 100)),
+                ]
+            )
         )
-        for r in range(replicas)
-    ]
 
     gridsize, features = 100, 3
     multi_dense_model.build(input_shape=(None, gridsize, features))
@@ -46,12 +47,17 @@ def test_multidense():
 
 def test_initializers():
     input_shape = (None, 3, 1)
-    dense_layers = []
+    dense_weights = []
     for r in range(2):
         dense_layer = Dense(units=2, kernel_initializer=GlorotUniform(seed=42 + r))
         dense_layer.build(input_shape=input_shape)
-        dense_layers.append(dense_layer)
-    stacked_weights = tf.stack([dense_layer.weights[0] for dense_layer in dense_layers], axis=0)
+        try:
+            dense_weights.append(dense_layer.weights[0].value.numpy())
+        except AttributeError:
+            # In tensorflow < 2.16, value was a function
+            dense_weights.append(dense_layer.weights[0].value().numpy())
+
+    stacked_weights = np.stack(dense_weights, axis=0)
 
     multi_dense_layer = MultiDense(
         units=2,
@@ -62,6 +68,5 @@ def test_initializers():
     multi_dense_layer.build(input_shape=input_shape)
 
     multi_dense_weights = multi_dense_layer.weights[0].numpy()
-    stacked_weights = stacked_weights.numpy()
 
     np.testing.assert_allclose(multi_dense_weights, stacked_weights)