Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restart hyperopt #1824

Merged
merged 40 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d9992ae
Added pkl_file attribute and extra FileTrials methods that save/load …
Cmurilochem Oct 24, 2023
062b030
Added --continue option to N3FitApp
Cmurilochem Oct 24, 2023
7dd614b
Added restart_hyperopt attribute to HyperScanner
Cmurilochem Oct 24, 2023
3050923
Added rstate attribute to FileTrials needed to store the last random.…
Cmurilochem Oct 24, 2023
632ff19
Added restart option in hyper_scan_wrapper
Cmurilochem Oct 24, 2023
4f1bdfa
Added docs to hyper_scan_wrapper
Cmurilochem Oct 24, 2023
6db6c30
Added provisory test for hyperopt restart
Cmurilochem Oct 24, 2023
46ccc57
Change in the way seeds are generated for each k-fold
Cmurilochem Oct 26, 2023
6d00ade
Fix in test_hyperopt.py
Cmurilochem Oct 26, 2023
e80895d
Fix in test_hyperopt.py
Cmurilochem Oct 30, 2023
4992ba8
Removed tmp_path argument from test
Cmurilochem Oct 30, 2023
04fd585
Updated test to make it work in CI/CD
Cmurilochem Oct 30, 2023
1490757
Updated tries.json and tries.pkl paths definition without format
Cmurilochem Oct 30, 2023
01842c8
Run isort in test_hyperopt.py
Cmurilochem Oct 30, 2023
3ea6fea
Changed keyword from continue to restart
Cmurilochem Oct 30, 2023
693fde1
Changed step_size to 10 in stopping method
Cmurilochem Oct 30, 2023
53237df
Fix test_hyperopt to use runcard in regressions folder
Cmurilochem Oct 30, 2023
209a1d2
Changed step_size to 1 in stopping method
Cmurilochem Oct 31, 2023
8280524
Removed hyperopt folder from tests; using regression folder
Cmurilochem Oct 31, 2023
d45dfea
Update doc string in hyper_scan_wrapper
Cmurilochem Oct 31, 2023
1c70229
Update doc string n3fit/src/n3fit/hyper_optimization/hyper_scan.py
Cmurilochem Oct 31, 2023
fa2a6dd
Updated np.random.default_rng to be constant
Cmurilochem Oct 31, 2023
5ef7787
Added HYPEROPT_SEED as constant at the top of the hyper_scan.py module
Cmurilochem Oct 31, 2023
3ce533e
Provisory suggestion to generate reproducible seeds for each fold in …
Cmurilochem Oct 31, 2023
aaafc80
Provisory suggestion to generate reproducible seeds for each fold in …
Cmurilochem Oct 31, 2023
c872bcc
Added new integer generator
Cmurilochem Oct 31, 2023
d528391
Added new integer generator
Cmurilochem Oct 31, 2023
5f2f6aa
different nnseed per fold
RoyStegeman Nov 1, 2023
08af2dd
Updated filetrials.py; fix in rstate docstring
Cmurilochem Nov 6, 2023
1f2445c
Updated filetrials.py; fix in rstate docstring
Cmurilochem Nov 6, 2023
9dd531c
Update n3fit/src/n3fit/hyper_optimization/filetrials.py
Cmurilochem Nov 6, 2023
5ce4c02
Updated filetrials.py; fix in from_pkl docstring
Cmurilochem Nov 6, 2023
2e2b9f9
Updated hyper_scan.py; fix in HyperScanner
Cmurilochem Nov 6, 2023
7d3c219
Fix in test_hyperopt; added output_restart and output_direct as sub-d…
Cmurilochem Nov 6, 2023
200c707
Fix in from_pkl exception
Cmurilochem Nov 6, 2023
ba84878
Removed static typing and added more descritive docstring to rstate a…
Cmurilochem Nov 6, 2023
d720280
Added documentation on hyperopt restarts
Cmurilochem Nov 6, 2023
260716f
Fix in reference docs
Cmurilochem Nov 6, 2023
a00cc9f
Fix in reference to pickle
Cmurilochem Nov 6, 2023
ec4d507
Fix in hyperopt.rst docs
Cmurilochem Nov 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 61 additions & 6 deletions n3fit/src/n3fit/hyper_optimization/filetrials.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""
Custom hyperopt trial object for persistent file storage
in the form of a json file within the nnfit folder
in the form of json and pickle files within the nnfit folder
"""
import json
import logging
from validphys.hyperoptplot import HyperoptTrial
import pickle

from hyperopt import Trials, space_eval

from validphys.hyperoptplot import HyperoptTrial

log = logging.getLogger(__name__)

# Note: the plan would be to do a PR in hyperopt's main repository
Expand Down Expand Up @@ -60,10 +63,42 @@ class FileTrials(Trials):

def __init__(self, replica_path, parameters=None, **kwargs):
self._store_trial = False
self._json_file = "{0}/tries.json".format(replica_path)
self._json_file = replica_path / "tries.json"
self.pkl_file = replica_path / "tries.pkl"
self._parameters = parameters
self._rstate = None
super().__init__(**kwargs)

@property
def rstate(self):
"""
Returns the rstate attribute.

Notes:
:func:`rstate` stores a `numpy.random.Generator` which is important to make
hyperopt restarts reproducible in the hyperparameter space. It can
be passed later as the `rstate` parameters of `hyperopt.fmin`.
"""
return self._rstate

@rstate.setter
def rstate(self, random_generator):
"""
Sets the rstate attribute.

# Arguments:
- `random_generator`: `numpy.random.Generator`

Example
--------
>>> import numpy as np
>>> from n3fit.hyper_optimization.filetrials import FileTrials
>>>
>>> trials = FileTrials(replica_path_set, parameters=parameters)
>>> trials.rstate = np.random.default_rng(42)
"""
self._rstate = random_generator

def refresh(self):
"""
This is the "flushing" method which is called at the end of every trial to
Expand All @@ -78,9 +113,7 @@ def refresh(self):
local_trials = []
for idx, t in enumerate(self._dynamic_trials):
local_trials.append(t)
local_trials[idx]["misc"]["space_vals"] = space_eval_trial(
self._parameters, t
)
local_trials[idx]["misc"]["space_vals"] = space_eval_trial(self._parameters, t)

all_to_str = json.dumps(local_trials, default=str)
with open(self._json_file, "w") as f:
Expand All @@ -95,3 +128,25 @@ def new_trial_ids(self, n):
def new_trial_docs(self, tids, specs, results, miscs):
self._store_trial = True
return super().new_trial_docs(tids, specs, results, miscs)

def to_pkl(self):
"""Dump `FileTrials` object into a pickle file."""
with open(self.pkl_file, "wb") as file:
pickle.dump(self, file)

@classmethod
def from_pkl(cls, pickle_filepath):
"""
Load and return an instance of `FileTrials` from a pickle file.

If a pickle file from previous run is present this method can be used
to instantiate an initial `FileTrials` object to restart.
"""
try:
with open(pickle_filepath, "rb") as file:
return pickle.load(file)
except FileNotFoundError as err:
raise FileNotFoundError(
"Failed to open 'tries.pkl' pickle file for restarting. "
f"Please ensure it is located in: {pickle_filepath}"
) from err
46 changes: 30 additions & 16 deletions n3fit/src/n3fit/hyper_optimization/hyper_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,22 @@
- a function
- a dictionary of spaces of parameters
you can do so by simply modifying the wrappers to point somewhere else
(and, of course the function in the fitting action that calls the miniimization).
(and, of course the function in the fitting action that calls the minimization).
"""
import copy
import logging

import hyperopt
import numpy as np
from n3fit.backends import MetaModel, MetaLayer
import n3fit.hyper_optimization.filetrials as filetrials
import logging

from n3fit.backends import MetaLayer, MetaModel
from n3fit.hyper_optimization.filetrials import FileTrials

log = logging.getLogger(__name__)

HYPEROPT_SEED = 42


# These are just wrapper around some hyperopt's sampling expresions defined in here
# https://github.com/hyperopt/hyperopt/wiki/FMin#21-parameter-expressions
# with a bit of extra documentation for the ones that are not obvious
Expand Down Expand Up @@ -88,12 +93,13 @@ def hyper_scan_wrapper(replica_path_set, model_trainer, hyperscanner, max_evals=
and performs ``max_evals`` evaluations of the hyperparametrizable function of ``model_trainer``.

A ``tries.json`` file will be saved in the ``replica_path_set`` folder with the information
of all trials.
of all trials. An additional ``tries.pkl`` file will also be generated in the same folder
that stores the previous states of `FileTrials`, this file can be used for restarting purposes.

Parameters
-----------
replica_path_set: path
folder where to create the json ``tries.json`` file
folder where to create the ``tries.json`` and ``tries.pkl`` files
model_trainer: :py:class:`n3fit.ModelTrainer.ModelTrainer`
a ``ModelTrainer`` object with the ``hyperparametrizable`` method
hyperscanner: :py:class:`n3fit.hyper_optimization.hyper_scan.HyperScanner`
Expand All @@ -109,7 +115,15 @@ def hyper_scan_wrapper(replica_path_set, model_trainer, hyperscanner, max_evals=
# Tell the trainer we are doing hpyeropt
model_trainer.set_hyperopt(True, keys=hyperscanner.hyper_keys, status_ok=hyperopt.STATUS_OK)
# Generate the trials object
trials = filetrials.FileTrials(replica_path_set, parameters=hyperscanner.as_dict())
trials = FileTrials(replica_path_set, parameters=hyperscanner.as_dict())
# Initialize seed for hyperopt
trials.rstate = np.random.default_rng(HYPEROPT_SEED)

# For restarts, reset the state of `FileTrials` saved in the pickle file
if hyperscanner.restart_hyperopt:
pickle_file_to_load = f"{replica_path_set}/tries.pkl"
log.info("Restarting hyperopt run using the pickle file %s", pickle_file_to_load)
trials = FileTrials.from_pkl(pickle_file_to_load)

# Perform the scan
best = hyperopt.fmin(
Expand All @@ -119,6 +133,8 @@ def hyper_scan_wrapper(replica_path_set, model_trainer, hyperscanner, max_evals=
max_evals=max_evals,
show_progressbar=False,
trials=trials,
rstate=trials.rstate,
trials_save_file=trials.pkl_file,
)
return hyperscanner.space_eval(best)

Expand Down Expand Up @@ -174,6 +190,10 @@ def __init__(self, parameters, sampling_dict, steps=5):
self.parameters = copy.deepcopy(parameters)
self.steps = steps

# adding extra options for restarting
restart_config = sampling_dict.get("restart")
self.restart_hyperopt = True if restart_config else False

self.hyper_keys = set([])

if "parameters" in sampling_dict:
Expand Down Expand Up @@ -256,8 +276,7 @@ def stopping(self, min_epochs=None, max_epochs=None, min_patience=None, max_pati
stopping_key = "stopping_patience"

if min_epochs is not None and max_epochs is not None:
epochs = hp_quniform(epochs_key, min_epochs, max_epochs,
step_size=1000)
epochs = hp_quniform(epochs_key, min_epochs, max_epochs, step_size=1)
self._update_param(epochs_key, epochs)

if min_patience is not None or max_patience is not None:
Expand Down Expand Up @@ -333,11 +352,7 @@ def optimizer(self, optimizers):
self._update_param(opt_key, opt_val)

def positivity(
self,
min_multiplier=None,
max_multiplier=None,
min_initial=None,
max_initial=None,
self, min_multiplier=None, max_multiplier=None, min_initial=None, max_initial=None
):
"""
Modifies the following entries of the `parameters` dictionary:
Expand Down Expand Up @@ -414,8 +429,7 @@ def architecture(
units = []
for i in range(n):
units_label = "nl{0}:-{1}/{0}".format(n, i)
units_sampler = hp_quniform(units_label, min_units, max_units,
step_size=1)
units_sampler = hp_quniform(units_label, min_units, max_units, step_size=1)
units.append(units_sampler)
# The number of nodes in the last layer are read from the runcard
units.append(output_size)
Expand Down
24 changes: 9 additions & 15 deletions n3fit/src/n3fit/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,7 @@ def __init__(
"folds": [],
"posdatasets": [],
}
self.experimental = {
"output": [],
"expdata": [],
"ndata": 0,
"model": None,
"folds": [],
}
self.experimental = {"output": [], "expdata": [], "ndata": 0, "model": None, "folds": []}

self._fill_the_dictionaries()

Expand Down Expand Up @@ -483,11 +477,7 @@ def _model_generation(self, xinput, pdf_models, partition, partition_idx):
except ValueError:
pass

models = {
"training": training,
"validation": validation,
"experimental": experimental,
}
models = {"training": training, "validation": validation, "experimental": experimental}

return models

Expand Down Expand Up @@ -850,7 +840,7 @@ def hyperparametrizable(self, params):
# Initialize all photon classes for the different replicas:
if self.lux_params:
photons = Photon(
theoryid=self.theoryid, lux_params=self.lux_params, replicas=self.replicas,
theoryid=self.theoryid, lux_params=self.lux_params, replicas=self.replicas
)
else:
photons = None
Expand All @@ -860,7 +850,11 @@ def hyperparametrizable(self, params):
# and the seed needs to be updated accordingly
seeds = self._nn_seeds
if k > 0:
seeds = [np.random.randint(0, pow(2, 31)) for _ in seeds]
# generate random integers for each k-fold from the input `nnseeds`
# we generate new seeds to avoid the integer overflow that may
# occur when doing k*nnseeds
rngs = [np.random.default_rng(seed=seed) for seed in seeds]
seeds = [generator.integers(1, pow(2, 30)) * k for generator in rngs]

# Generate the pdf model
pdf_models = self._generate_pdf(
Expand Down Expand Up @@ -922,7 +916,7 @@ def hyperparametrizable(self, params):
for model in models.values():
model.compile(**params["optimizer"])

passed = self._train_and_fit(models["training"], stopping_object, epochs=epochs,)
passed = self._train_and_fit(models["training"], stopping_object, epochs=epochs)

if self.mode_hyperopt:
# If doing a hyperparameter scan we need to keep track of the loss function
Expand Down
4 changes: 4 additions & 0 deletions n3fit/src/n3fit/scripts/n3fit_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ def produce_hyperscanner(self, parameters, hyperscan_config=None, hyperopt=None)

if hyperscan_config is None or hyperopt is None:
return None
if hyperopt and self.environment.restart:
hyperscan_config.update({'restart': 'true'})
return HyperScanner(parameters, hyperscan_config)


Expand All @@ -258,6 +260,7 @@ def check_positive(value):
return ivalue

parser.add_argument("--hyperopt", help="Enable hyperopt scan", default=None, type=int)
parser.add_argument("--restart", help="Enable hyperopt restarts", action="store_true")
parser.add_argument("replica", help="MC replica number", type=check_positive)
parser.add_argument(
"-r",
Expand All @@ -283,6 +286,7 @@ def run(self):
replicas = [replica]
self.environment.replicas = NSList(replicas, nskey="replica")
self.environment.hyperopt = self.args["hyperopt"]
self.environment.restart = self.args["restart"]
super().run()
except N3FitError as e:
log.error(f"Error in n3fit:\n{e}")
Expand Down
70 changes: 69 additions & 1 deletion n3fit/src/n3fit/tests/test_hyperopt.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,81 @@
"""
Test hyperoptimization features
"""
import json
import pathlib
import shutil
import subprocess as sp

from numpy.testing import assert_approx_equal

from n3fit.hyper_optimization import rewards


def test_rewards():
""" Ensure that rewards continue doing what they are supposed to do """
"""Ensure that rewards continue doing what they are supposed to do"""
losses = [0.0, 1.0, 2.0]
assert_approx_equal(rewards.average(losses), 1.0)
assert_approx_equal(rewards.best_worst(losses), 2.0)
assert_approx_equal(rewards.std(losses), 0.816496580927726)


REGRESSION_FOLDER = pathlib.Path(__file__).with_name("regressions")
QUICKNAME = "quickcard"
EXE = "n3fit"
REPLICA = "1"


def load_data(info_file):
"""Loads the information of the fit from the json files"""
with open(info_file, "r", encoding='utf-8') as file:
return json.load(file)


def test_restart_from_pickle(tmp_path):
"""Ensure that our hyperopt restart works as expected"""
# Prepare the run
quickcard = f"hyper-{QUICKNAME}.yml"
quickpath = REGRESSION_FOLDER / quickcard
# Set up some options
n_trials_stop = 2
n_trials_total = 4
output_restart = tmp_path / f"run_{n_trials_stop}_trials_and_then_{n_trials_total}_trials"
output_direct = tmp_path / f"run_{n_trials_total}_trials"

# cp runcard to tmp folder
shutil.copy(quickpath, tmp_path)
# run some trials for the first time
sp.run(
f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_stop} " f"-o {output_restart}".split(),
cwd=tmp_path,
check=True,
)
# restart and calculate more trials
sp.run(
f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_total} "
f"-o {output_restart} --restart".split(),
cwd=tmp_path,
check=True,
)
# start again and calculate all trials at once
sp.run(
f"{EXE} {quickpath} {REPLICA} --hyperopt {n_trials_total} " f"-o {output_direct}".split(),
cwd=tmp_path,
check=True,
)

# read up generated json files
restart_json_path = f"{output_restart}/nnfit/replica_{REPLICA}/tries.json"
restart_json = load_data(restart_json_path)
direct_json_path = f"{output_direct}/nnfit/replica_{REPLICA}/tries.json"
direct_json = load_data(direct_json_path)

# minimum check: the generated list of nested dictionaries have same lenght
assert len(restart_json) == len(direct_json)

for i in range(n_trials_total):
# check that the files share exactly the same hyperopt history
assert restart_json[i]['misc'] == direct_json[i]['misc']
assert restart_json[i]['state'] == direct_json[i]['state']
assert restart_json[i]['tid'] == direct_json[i]['tid']
assert restart_json[i]['result'] == direct_json[i]['result']