diff --git a/TODO b/TODO index 5f390906..47bf9681 100644 --- a/TODO +++ b/TODO @@ -199,6 +199,7 @@ optimizer, but just keep a record of the path. Then load it later. (2) Can be combined save model and save optimizer as one function call and where to put it. - Done - +27. 08/18/2019. + Update analyzer/fisher.py to be consistent with the latest calculator. + Done diff --git a/examples/eg_Fisher_info_kim_SW.py b/examples/eg_Fisher_info_kim_SW.py new file mode 100644 index 00000000..9c18dfca --- /dev/null +++ b/examples/eg_Fisher_info_kim_SW.py @@ -0,0 +1,35 @@ +""" +Fisher information for the SW potential. +""" + + +from kliff.models import KIM +from kliff.calculators import Calculator +from kliff.dataset import Dataset +from kliff.analyzers import Fisher + + +########################################################################################## +# Select the parameters that will be used to compute the Fisher information. Only +# parameters specified below will be use, others will be kept fixed. The size of the +# Fisher information matrix will be equal to the total size of the parameters specified +# here. +model = KIM(model_name='SW_StillingerWeber_1985_Si__MO_405512056662_005') +model.set_fitting_params( + A=[['default']], B=[['default']], sigma=[['default']], gamma=[['default']] +) + +# dataset +dataset_name = 'tmp_tset' +tset = Dataset() +tset.read(dataset_name) +configs = tset.get_configs() + +# calculator +calc = Calculator(model) +calc.create(configs) + +########################################################################################## +# Fisher information analyzer. +analyzer = Fisher(calc) +analyzer.run() diff --git a/examples/eg_RMSE_analysis_kim_SW.py b/examples/eg_RMSE_analysis_kim_SW.py index 28bbd82a..846bb37b 100644 --- a/examples/eg_RMSE_analysis_kim_SW.py +++ b/examples/eg_RMSE_analysis_kim_SW.py @@ -1,7 +1,7 @@ from kliff.models import KIM from kliff.calculators import Calculator from kliff.dataset import Dataset -from kliff.analyzers import energy_forces_RMSE +from kliff.analyzers import EnergyForcesRMSE model = KIM(model_name='SW_StillingerWeber_1985_Si__MO_405512056662_005') @@ -15,5 +15,5 @@ calc = Calculator(model) calc.create(configs) -analyzer = energy_forces_RMSE(calc) +analyzer = EnergyForcesRMSE(calc) analyzer.run(verbose=2, sort='energy') diff --git a/examples/eg_RMSE_analysis_nn.py b/examples/eg_RMSE_analysis_nn.py index 43ec664b..b0f13d51 100644 --- a/examples/eg_RMSE_analysis_nn.py +++ b/examples/eg_RMSE_analysis_nn.py @@ -3,7 +3,7 @@ from kliff import nn from kliff.dataset import Dataset from kliff.calculators import CalculatorTorch -from kliff.analyzers import energy_forces_RMSE +from kliff.analyzers import EnergyForcesRMSE # model descriptor = SymmetryFunction( @@ -36,5 +36,5 @@ calc.create(configs, reuse=True) # analyzer -analyzer = energy_forces_RMSE(calc) +analyzer = EnergyForcesRMSE(calc) analyzer.run(verbose=2, sort='energy') diff --git a/kliff/analyzers/__init__.py b/kliff/analyzers/__init__.py index 59b3b956..cae73556 100644 --- a/kliff/analyzers/__init__.py +++ b/kliff/analyzers/__init__.py @@ -1,3 +1,4 @@ -from .rmse import energy_forces_RMSE +from .rmse import EnergyForcesRMSE +from .fisher import Fisher -__all__ = ['energy_forces_RMSE'] +__all__ = ['EnergyForcesRMSE', 'Fisher'] diff --git a/kliff/analyzers/fisher.py b/kliff/analyzers/fisher.py index f1c18131..d5194d36 100644 --- a/kliff/analyzers/fisher.py +++ b/kliff/analyzers/fisher.py @@ -1,106 +1,180 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import numpy as np import copy -import sys +import numpy as np +from ..log import log_entry +from ..utils import split_string + +import kliff + + +logger = kliff.logger.get_logger(__name__) class Fisher: - """Fisher information matrix. + r"""Fisher information matrix. - Compute the Fisher information according to $I_{ij} = \sum_m df_m/dp_i * df_m/dp_j$, - where f_m are the forces on atoms in configuration m, p_i is the ith model parameter. - Derivatives are computed numerically using Ridders' algorithm. + Compute the Fisher information according to - Parameters - ---------- + ..math:: + I_{ij} = \sum_m \frac{\partial \bm f_m}{\partial \theta_i} + \cdot \frac{\partial \bm f_m}{\partial \theta_j} - KIMobjs: list of KIMcalculator objects + where :math:`f_m` are the forces on atoms in configuration :math:`m`, :math:`\theta_i` + is the ith model parameter. + Derivatives are computed numerically using Ridders' algorithm: + https://en.wikipedia.org/wiki/Ridders%27_method - params: ModelParameters object + Parameters + ---------- + calculator: + A calculator object. """ - def __init__(self, params, calculator): - self.params = params + def __init__(self, calculator): self.calculator = calculator self.F = None - self.F_std = None + self.F_stdev = None self.delta_params = None - def compute(self): - """Comptue the Fisher information matrix and the standard deviation. + def run(self, verbose=1): + """Compute the Fisher information matrix and the standard deviation. + + Parameters + ---------- + verbose: int + If ``0``, do not write out to file; if ``1``, write to a file named + ``analysis_Fisher_info_matrix.txt``. Returns ------- + I: 2D array, shape(N, N) + Fisher information matrix, where N is the number of parameters. - F: 2D array, shape(N, N), where N is the number of parameters - Fisher informaiton matrix (FIM) - - F_std: 2D array, shape(N, N), where N is the number of parameters - standard deviation of FIM + I_stdev: 2D array, shape(N, N) + Standard deviation of Fisher information matrix, where N is the number of + parameters. """ - F_all = [] - kim_in_out_data = self.calculator.get_kim_input_and_output() - for in_out in kim_in_out_data: - dfdp = self._get_derivative_one_conf(in_out) - F_all.append(np.dot(dfdp, dfdp.T)) - self.F = np.mean(F_all, axis=0) - self.F_std = np.std(F_all, axis=0) - return self.F, self.F_std - - def _get_derivative_one_conf(self, in_out): - """Compute the derivative dfm/dpi for one atom configuration. + + msg = 'Start computing Fisher information matrix.' + log_entry(logger, msg, level='info') + + I_all = [] + + cas = self.calculator.get_compute_arguments() + for i, ca in enumerate(cas): + if i % 100 == 0: + msg = 'Processing configuration {}.'.format(i) + log_entry(logger, msg, level='info') + dfdp = self._compute_jacobian_one_config(ca) + I_all.append(np.dot(dfdp.T, dfdp)) + I = np.mean(I_all, axis=0) + I_stdev = np.std(I_all, axis=0) + + self._write_result(I, I_stdev, verbose) + msg = 'Finish computing Fisher information matrix.' + log_entry(logger, msg, level='info') + + return I, I_stdev + + def _write_result(self, I, stdev, verbose, path='analysis_Fisher_info_matrix.txt'): + + params = self.calculator.get_opt_params() + nparams = len(params) + names = [] + values = [] + component_idx = [] + for i in range(len(params)): + out = self.calculator.model.get_opt_param_name_value_and_indices(i) + n, v, p_idx, c_idx = out + names.append(n) + values.append(v) + component_idx.append(c_idx) + + # header + header = '#' * 80 + '\n# Fisher information matrix.\n#\n' + msg = ( + 'The size of the parameter list is {0}, and thus the Fisher information ' + 'matrix is a {0} by {0} matrix. The rows (columns) are associated with the ' + 'parameters in the following order:'.format(nparams) + ) + header += split_string(msg, length=80, starter='#') + header += '#\n' + header += ( + 'row (column) index param name param value param component index\n' + ) + for i, (n, v, c) in enumerate(zip(names, values, component_idx)): + header += '{} {} {:23.15e} {}\n'.format(i, n, v, c) + header += '#' * 80 + '\n' + print(header) + + # write to file + if verbose > 0: + with open(path, 'w') as fout: + + fout.write(header) + + fout.write( + '\n# Fisher information matrix, shape({0}, {0})\n'.format(nparams) + ) + for line in I: + for v in line: + fout.write('{:23.15e} '.format(v)) + fout.write('\n') + + fout.write( + '\n# Standard deviation in Fisher information matrix, ' + 'shape({0}, {0})\n'.format(nparams) + ) + for line in stdev: + for v in line: + fout.write('{:23.15e} '.format(v)) + fout.write('\n') + + def _compute_jacobian_one_config(self, ca): + """Compute the Jacobian of forces w.r.t. parameters for one configuration. Parameters ---------- - - in_out: Configuration object + ca: object + `compute argument` associated with one configuration. """ + try: import numdifftools as nd except ImportError as e: raise ImportError( - str(e) + '.\nFisher information computation needs ' - '"numdifftools". Please install first.' + +'{}\nFisher information analyzer needs "numdifftools". Please install ' + 'it first.'.format(str(e)) ) - derivs = [] - ori_param_vals = self.params.get_x0() - for i, p in enumerate(ori_param_vals): - values = copy.deepcopy(ori_param_vals) - Jfunc = nd.Jacobian(self._get_prediction) - df = Jfunc(p, i, values, in_out) - derivs.append(df.reshape((-1,))) - # restore param values back - self.params.update_params(ori_param_vals) + # compute Jacobian of forces w.r.t. parameters + original_params = self.calculator.get_opt_params() + Jfunc = nd.Jacobian(self._compute_forces_one_config) + j = Jfunc(copy.deepcopy(original_params), ca) + + # restore params back + self.calculator.update_opt_params(original_params) - return np.array(derivs) + return j - def _get_prediction(self, x, idx, values, in_out): - """ Compute predictions using specific parameter. + def _compute_forces_one_config(self, params, ca): + """ Compute forces using a specific set of model parameters. Parameters ---------- - - values: list of float + params: list the parameter values - idx: int - the index of 'x' in the value list - - x: float - the specific parameter value at slot 'idx' + ca: object + `compute argument` associated with one configuration Return ------ - forces: list of floats - the forces on atoms in this configuration + forces: 1D array + the forces on atoms in this configuration """ - values[idx] = x - self.params.update_params(values) - self.calculator.update_params(self.params) - self.calculator.compute(in_out) - forces = self.calculator.get_forces(in_out) + self.calculator.update_opt_params(params) + self.calculator.compute(ca) + forces = self.calculator.get_forces(ca) forces = np.reshape(forces, (-1,)) + return forces diff --git a/kliff/analyzers/rmse.py b/kliff/analyzers/rmse.py index d91e4870..8515e13c 100644 --- a/kliff/analyzers/rmse.py +++ b/kliff/analyzers/rmse.py @@ -12,7 +12,7 @@ logger = kliff.logger.get_logger(__name__) -class energy_forces_RMSE: +class EnergyForcesRMSE: r"""Analyzer to compute the root-mean-square error (RMSE) for energy and forces. The `energy difference norm` for a configuration is defined as: @@ -52,7 +52,7 @@ def __init__(self, calculator, energy=True, forces=True): self.compute_energy = energy self.compute_forces = forces - def run(self, normalize=True, verbose=1, sort=None, path=None): + def run(self, normalize=True, sort=None, path=None, verbose=1): r"""Run the RMSE analyzer. Parameters @@ -61,15 +61,6 @@ def run(self, normalize=True, verbose=1, sort=None, path=None): Whether to normalize the energy (forces) by the number of atoms in a configuration. - verbose: int (optional) - Verbose level of the output info. Available values are: 0, 1, 2. - If ``verbose=0``, only output the energy and forces RMSEs for the dataset. - If ``verbose==1``, output the norms of the energy and forces for each - configuration additionally. - If ``verbose==2``, output the difference of the energy and forces for each - atom, and the information is written to extended XYZ files with the location - specified by ``path``. - sort: str (optional) Sort per configuration information according to `energy` or `forces`. If `None`, no sort. This works only when per configuration information is @@ -80,6 +71,15 @@ def run(self, normalize=True, verbose=1, sort=None, path=None): the file specified by `path`. Note, if ``verbose==3``, the difference of energy and forces will be written to a directory named `energy_forces_RMSE-difference`. + + verbose: int (optional) + Verbose level of the output info. Available values are: 0, 1, 2. + If ``verbose=0``, only output the energy and forces RMSEs for the dataset. + If ``verbose==1``, output the norms of the energy and forces for each + configuration additionally. + If ``verbose==2``, output the difference of the energy and forces for each + atom, and the information is written to extended XYZ files with the location + specified by ``path``. """ msg = 'Start analyzing energy and forces RMSE.' @@ -99,7 +99,7 @@ def run(self, normalize=True, verbose=1, sort=None, path=None): if i % 100 == 0: msg = 'Processing configuration {}.'.format(i) log_entry(logger, msg, level='info') - prefix = 'energy_forces_RMSE-difference' + prefix = 'analysis_energy_forces_RMSE-difference' enorm, fnorm = self._compute_single_config( ca, normalize, verbose, common, prefix ) diff --git a/kliff/models/kim.py b/kliff/models/kim.py index 20b4bf2e..6201fe6e 100644 --- a/kliff/models/kim.py +++ b/kliff/models/kim.py @@ -355,8 +355,8 @@ def update_model_params(self): # only update optimizing components num_params = self.get_number_of_opt_params() for i in range(num_params): - v, p, c = self.get_opt_param_value_and_indices(i) - self.kim_model.set_parameter(p, c, v) + _, value, p_idx, c_idx = self.get_opt_param_name_value_and_indices(i) + self.kim_model.set_parameter(p_idx, c_idx, value) # refresh model self.kim_model.clear_then_refresh() diff --git a/kliff/models/model.py b/kliff/models/model.py index 9f638c0e..64f4945f 100644 --- a/kliff/models/model.py +++ b/kliff/models/model.py @@ -343,8 +343,8 @@ def get_number_of_opt_params(self): def get_opt_params(self): return self.fitting_params.get_opt_params() - def get_opt_param_value_and_indices(self, k): - return self.fitting_params.get_opt_param_value_and_indices(k) + def get_opt_param_name_value_and_indices(self, k): + return self.fitting_params.get_opt_param_name_value_and_indices(k) def has_opt_params_bounds(self): return self.fitting_params.has_opt_params_bounds() @@ -353,7 +353,7 @@ def get_opt_params_bounds(self): return self.fitting_params.get_opt_params_bounds() def update_fitting_params(self, opt_params): - r"""Update from optimzier to fitting params.""" + r"""Update from optimizer to fitting params.""" self.fitting_params.update_params(opt_params) # TODO if parameters relation set, remove the parameters from fitting params diff --git a/kliff/models/parameter.py b/kliff/models/parameter.py index db2c5857..e57cb4d5 100644 --- a/kliff/models/parameter.py +++ b/kliff/models/parameter.py @@ -6,6 +6,7 @@ from collections.abc import Iterable import numpy as np import kliff +from ..error import InputError logger = kliff.logger.get_logger(__name__) @@ -360,15 +361,15 @@ def get_opt_params(self): raise ParameterError('No parameters specified to optimize.') return np.asarray(opt_x0) - def get_opt_param_value_and_indices(self, k): - r"""Get the `value`, `parameter_index`, and `component_index` of an optimizing - parameter given the slot `k`. + def get_opt_param_name_value_and_indices(self, k): + r"""Get the `name`, `value`, `parameter_index`, and `component_index` of an + optimizing parameter given the slot `k`. """ name = self._index[k].name p_idx = self._index[k].p_idx c_idx = self._index[k].c_idx value = self.params[name]['value'][c_idx] - return value, p_idx, c_idx + return name, value, p_idx, c_idx def get_opt_params_bounds(self): r"""Get the lower and upper parameter bounds.""" diff --git a/kliff/utils.py b/kliff/utils.py index 4316fb58..633892bc 100644 --- a/kliff/utils.py +++ b/kliff/utils.py @@ -52,4 +52,4 @@ def split_string(string, length=80, starter=None): sub_string.append(sub) string = string[end:] - return '\n'.join(sub_string) + return '\n'.join(sub_string) + '\n'