From d35632b6e214dd8db1f63e6522545177522df2a9 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Mon, 29 Jul 2024 23:46:54 -0700 Subject: [PATCH 01/24] Almost entirely passing --- espei/error_functions/activity_error.py | 22 ++--- .../equilibrium_thermochemical_error.py | 29 +++--- .../non_equilibrium_thermochemical_error.py | 93 +++++++++++++++---- espei/error_functions/zpf_error.py | 92 +++++++++--------- espei/paramselect.py | 2 +- espei/plot.py | 3 +- espei/shadow_functions.py | 33 ++++--- tests/test_error_functions.py | 29 +++--- tests/test_parameter_generation.py | 2 +- 9 files changed, 176 insertions(+), 129 deletions(-) diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py index 0db807b8..b8df3810 100644 --- a/espei/error_functions/activity_error.py +++ b/espei/error_functions/activity_error.py @@ -14,10 +14,11 @@ import numpy as np import numpy.typing as npt import tinydb -from pycalphad import Database, equilibrium, variables as v +from pycalphad import Database, variables as v from pycalphad.plot.eqplot import _map_coord_to_variable from pycalphad.core.utils import filter_phases, unpack_components from scipy.stats import norm +from pycalphad import Workspace from espei.core_utils import ravel_conditions from espei.error_functions.residual_base import ResidualFunction, residual_function_registry @@ -28,7 +29,7 @@ _log = logging.getLogger(__name__) -def target_chempots_from_activity(component, target_activity, temperatures, reference_result): +def target_chempots_from_activity(component, target_activity, temperatures, wks_ref): """ Return an array of experimental chemical potentials for the component @@ -50,8 +51,9 @@ def target_chempots_from_activity(component, target_activity, temperatures, refe """ # acr_i = exp((mu_i - mu_i^{ref})/(RT)) # so mu_i = R*T*ln(acr_i) + mu_i^{ref} - ref_chempot = reference_result["MU"].sel(component=component).values.flatten() - return v.R * temperatures * np.log(target_activity) + ref_chempot + ref_chempot = wks_ref.get(v.MU(component)) + exp_chem_pots = v.R * temperatures * np.log(target_activity) + ref_chempot + return exp_chem_pots # TODO: roll this function into ActivityResidual @@ -88,9 +90,7 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None, data_comps = ds['components'] data_phases = filter_phases(dbf, unpack_components(dbf, data_comps), candidate_phases=phases) ref_conditions = {_map_coord_to_variable(coord): val for coord, val in ref['conditions'].items()} - ref_result = equilibrium(dbf, data_comps, ref['phases'], ref_conditions, - model=phase_models, parameters=parameters, - callables=callables) + wks_ref = Workspace(database=dbf, components=data_comps, phases=ref['phases'], conditions=ref_conditions, parameters=parameters) # calculate current chemical potentials # get the conditions @@ -113,15 +113,13 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None, # assume now that the ravelled conditions all have the same size conditions_list = [{c: conditions[c][i] for c in conditions.keys()} for i in range(len(conditions[v.T]))] for conds in conditions_list: - sample_eq_res = equilibrium(dbf, data_comps, data_phases, conds, - model=phase_models, parameters=parameters, - callables=callables) - dataset_computed_chempots.append(float(sample_eq_res.MU.sel(component=acr_component).values.flatten()[0])) + wks_sample = Workspace(database=dbf, components=data_comps, phases=data_phases, conditions=conds, parameters=parameters) + dataset_computed_chempots.append(wks_sample.get(v.MU(acr_component))) dataset_weights.append(std_dev / data_weight / ds.get("weight", 1.0)) # calculate target chempots dataset_activities = np.array(ds['values']).flatten() - dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], ref_result) + dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], wks_ref) dataset_residuals = (np.asarray(dataset_computed_chempots) - np.asarray(dataset_target_chempots, dtype=float)).tolist() _log.debug('Data: %s, chemical potential difference: %s, reference: %s', dataset_activities, dataset_residuals, ds["reference"]) residuals.extend(dataset_residuals) diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py index dc2b988d..b7491ac6 100644 --- a/espei/error_functions/equilibrium_thermochemical_error.py +++ b/espei/error_functions/equilibrium_thermochemical_error.py @@ -11,12 +11,11 @@ import tinydb from tinydb import where from scipy.stats import norm -from pycalphad.plot.eqplot import _map_coord_to_variable from pycalphad import Database, Model, ReferenceState, variables as v -from pycalphad.core.equilibrium import _eqcalculate -from pycalphad.codegen.callables import build_phase_records from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_components, unpack_condition from pycalphad.core.phase_rec import PhaseRecord +from pycalphad.codegen.phase_record_factory import PhaseRecordFactory +from pycalphad import Workspace, as_property from espei.error_functions.residual_base import ResidualFunction, residual_function_registry from espei.phase_models import PhaseModelSpecification @@ -88,6 +87,7 @@ def build_eqpropdata(data: tinydb.database.Document, reference = data.get('reference', '') # Models are now modified in response to the data from this data + # TODO: build a reference state MetaProperty with the reference state information, maybe just-in-time, below if 'reference_states' in data: property_output = output[:-1] if output.endswith('R') else output # unreferenced model property so we can tell shift_reference_state what to build. reference_states = [] @@ -100,7 +100,7 @@ def build_eqpropdata(data: tinydb.database.Document, pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')]) comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')]) - phase_records = build_phase_records(dbf, species, data_phases, {**pot_conds, **comp_conds}, models, parameters=parameters, build_gradients=True, build_hessians=True) + phase_records = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds}, models, parameters=parameters) # Now we need to unravel the composition conditions # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the @@ -210,25 +210,20 @@ def calc_prop_differences(eqpropdata: EqPropData, phase_records = eqpropdata.phase_records update_phase_record_parameters(phase_records, parameters) params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters)) - output = eqpropdata.output + output = as_property(eqpropdata.output) weights = np.array(eqpropdata.weight, dtype=np.float64) samples = np.array(eqpropdata.samples, dtype=np.float64) + wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_records, parameters=params_dict) calculated_data = [] for comp_conds in eqpropdata.composition_conds: cond_dict = OrderedDict(**pot_conds, **comp_conds) - # str_statevar_dict must be sorted, assumes that pot_conds are. - str_statevar_dict = OrderedDict([(str(key), vals) for key, vals in pot_conds.items()]) - grid = calculate_(species, phases, str_statevar_dict, models, phase_records, pdens=50, fake_points=True) - multi_eqdata = _equilibrium(phase_records, cond_dict, grid) - # TODO: could be kind of slow. Callables (which are cachable) must be built. - propdata = _eqcalculate(dbf, species, phases, cond_dict, output, data=multi_eqdata, per_phase=False, callables=None, parameters=params_dict, model=models) - - if 'vertex' in propdata.data_vars[output][0]: - raise ValueError(f"Property {output} cannot be used to calculate equilibrium thermochemical error because each phase has a unique value for this property.") - - vals = getattr(propdata, output).flatten().tolist() - calculated_data.extend(vals) + wks.conditions = cond_dict + wks.parameters = params_dict # these reset models and phase_records through depends_on -> lose Model.shift_reference_state, etc. + wks.models = models + wks.phase_record_factory = phase_records + vals = wks.get(output) + calculated_data.extend(np.atleast_1d(vals).tolist()) calculated_data = np.array(calculated_data, dtype=np.float64) diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index c9c05db9..375d4674 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -12,16 +12,17 @@ import numpy.typing as npt from symengine import Symbol from tinydb import where - +from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from pycalphad import Database, Model, ReferenceState, variables as v -from pycalphad.codegen.callables import build_phase_records from pycalphad.core.utils import unpack_components, get_pure_elements, filter_phases +from pycalphad import Workspace +from pycalphad.property_framework import IsolatedPhase +from pycalphad.property_framework.metaproperties import find_first_compset from espei.datasets import Dataset from espei.core_utils import ravel_conditions, get_prop_data, filter_temperatures from espei.parameter_selection.redlich_kister import calc_interaction_product from espei.phase_models import PhaseModelSpecification -from espei.shadow_functions import calculate_, update_phase_record_parameters from espei.sublattice_tools import canonicalize, recursive_tuplify, tuplify from espei.typing import SymbolName from espei.utils import database_symbols_to_fit, PickleableTinyDB @@ -286,7 +287,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic model_cls = model.get(phase_name, Model) mod = model_cls(dbf, comps, phase_name, parameters=symbols_to_fit) if prop.endswith('_FORM'): - output = ''.join(prop.split('_')[:-1])+'R' + output = ''.join(prop.split('_')[:-1]) mod.shift_reference_state(ref_states, dbf, contrib_mods={e: symengine.S.Zero for e in exclusion}) else: output = prop @@ -298,42 +299,90 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic mod.endmember_reference_model.models[contrib] = symengine.S.Zero except AttributeError: mod.reference_model.models[contrib] = symengine.S.Zero + model_dict = {phase_name: mod} species = sorted(unpack_components(dbf, comps), key=str) data_dict['species'] = species - model_dict = {phase_name: mod} statevar_dict = {getattr(v, c, None): vals for c, vals in calculate_dict.items() if isinstance(getattr(v, c, None), v.StateVariable)} statevar_dict = OrderedDict(sorted(statevar_dict.items(), key=lambda x: str(x[0]))) + phase_records = PhaseRecordFactory(dbf, species, statevar_dict, model_dict, + parameters={s: 0 for s in symbols_to_fit}) str_statevar_dict = OrderedDict((str(k), vals) for k, vals in statevar_dict.items()) - phase_records = build_phase_records(dbf, species, [phase_name], statevar_dict, model_dict, - output=output, parameters={s: 0 for s in symbols_to_fit}, - build_gradients=False, build_hessians=False) data_dict['str_statevar_dict'] = str_statevar_dict data_dict['phase_records'] = phase_records data_dict['calculate_dict'] = calculate_dict data_dict['model'] = model_dict data_dict['output'] = output data_dict['weights'] = np.array(property_std_deviation[prop.split('_')[0]])/np.array(calculate_dict.pop('weights')) + data_dict['constituents'] = constituents all_data_dicts.append(data_dict) return all_data_dicts -def compute_fixed_configuration_property_differences(calc_data: FixedConfigurationCalculationData, parameters): +def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfigurationCalculationData, parameters): phase_name = calc_data['phase_name'] output = calc_data['output'] phase_records = calc_data['phase_records'] sample_values = calc_data['calculate_dict']['values'] - update_phase_record_parameters(phase_records, parameters) - results = calculate_(calc_data['species'], [phase_name], - calc_data['str_statevar_dict'], calc_data['model'], - phase_records, output=output, broadcast=False, - points=calc_data['calculate_dict']['points'])[output] - differences = results - sample_values + constituent_list = [] + sublattice_list = [] + counter = 0 + for sublattice in calc_data['constituents']: + for const in sublattice: + sublattice_list.append(counter) + constituent_list.append(const) + counter = counter + 1 + + differences = [] + for index in range(len(sample_values)): + cond_dict = {} + for key in calc_data['str_statevar_dict'].keys(): + cond_dict.update({key: calc_data['str_statevar_dict'][key][index]}) + + # here we build the internal DOF as a dictionary to use as conditions + dof = {} + for site_frac in range(len(constituent_list)): + comp = constituent_list[site_frac] + occupancy = calc_data['calculate_dict']['points'][index,site_frac] + sublattice = sublattice_list[site_frac] + dof.update({v.Y(phase_name,sublattice,comp): occupancy}) + + # TODO: convergence seems like it can get messed up if including DOF as conditions. + # We temporarily disable that, but be aware that this will allow the minimizer to minimizer internal DOF, so it will only truly work for phases with (# internal DOF) = (# composition conditions) + # wks = Workspace(database=dbf, components=calc_data['species'], phases=phase_name, models=calc_data["model"], conditions={**cond_dict, **dof}) + # TODO: If tests are passing with this, that's bad. We need a phase with more internal DOF (think Laves 2SL) + # -- I believe the test_equilibrium_thermochemcial_error_species fails due to this, but more investigation needed + # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species + active_pure_elements = [list(x.constituents.keys()) for x in calc_data["species"]] + active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"}) + ind_comps = len(active_pure_elements) - 1 + for comp in active_pure_elements: + if v.Species(comp) != v.Species('VA') and ind_comps > 0: + cond_dict[v.X(comp)] = float(calc_data["model"][phase_name].moles(comp).xreplace(dof)) + ind_comps = ind_comps - 1 + # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why. + # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered. + # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid. + wks = Workspace(database=dbf, components=calc_data['species'], phases=[phase_name], conditions={**cond_dict}, models=calc_data["model"], phase_record_factory=phase_records, parameters=parameters) + wks.phase_record_factory.models = calc_data["model"] # if commented: 2 failed, 1 passed. uncommented 1 failed, 2 passed + # Sometimes in miscibility gaps the solver has trouble converging for + # IsolatedPhase if the first compset it finds is at the edge of + # composition space. Here, since we know the degrees of freedom, we + # initialize the compset for isolated phase correctly in the first + # place. This is kind of a hack until I figure out full DOF support. + compset = find_first_compset(phase_name, wks) + new_sitefracs = np.array([sf for _, sf in sorted(dof.items(), key=lambda y: (y[0].phase_name, y[0].sublattice_index, y[0].species.name))]) + new_sitefracs[new_sitefracs < 1e-14] = 1e-14 # fix zeros, otherwise we get ZeroDivisionErrors in the minimizer. + new_statevars = np.array(compset.dof[:len(compset.phase_record.state_variables)]) # not actually changed + compset.update(new_sitefracs, compset.NP, new_statevars) + results = wks.get(IsolatedPhase(compset, wks=wks)(output)) + sample_differences = results - sample_values[index] + differences.append(sample_differences) return differences -def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: List[FixedConfigurationCalculationData], parameters=None): +def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: List[FixedConfigurationCalculationData], dbf, parameters=None): """ Calculate the weighted single phase error in the Database @@ -351,13 +400,14 @@ def calculate_non_equilibrium_thermochemical_probability(thermochemical_data: Li """ if parameters is None: - parameters = np.array([]) + parameters = {} prob_error = 0.0 for data in thermochemical_data: phase_name = data['phase_name'] sample_values = data['calculate_dict']['values'] - differences = compute_fixed_configuration_property_differences(data, parameters) + differences = compute_fixed_configuration_property_differences(dbf, data, parameters) + differences = np.array(differences) probabilities = norm.logpdf(differences, loc=0, scale=data['weights']) prob_sum = np.sum(probabilities) _log.debug("%s(%s) - probability sum: %0.2f, data: %s, differences: %s, probabilities: %s, references: %s", data['prop'], phase_name, prob_sum, sample_values, differences, probabilities, data['calculate_dict']['references']) @@ -391,12 +441,14 @@ def __init__( if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) self.thermochemical_data = get_thermochemical_data(database, comps, phases, datasets, model_dict, weight_dict=self.weight, symbols_to_fit=symbols_to_fit) + self._symbols_to_fit = symbols_to_fit + self.dbf = database def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[float]]: residuals = [] weights = [] for data in self.thermochemical_data: - dataset_residuals = compute_fixed_configuration_property_differences(data, parameters).tolist() + dataset_residuals = compute_fixed_configuration_property_differences(self.dbf, data, dict(zip(self._symbols_to_fit, parameters))) residuals.extend(dataset_residuals) dataset_weights = np.asarray(data["weights"], dtype=float).flatten().tolist() if len(dataset_weights) != len(dataset_residuals): @@ -407,7 +459,8 @@ def get_residuals(self, parameters: npt.ArrayLike) -> Tuple[List[float], List[fl return residuals, weights def get_likelihood(self, parameters) -> float: - likelihood = calculate_non_equilibrium_thermochemical_probability(self.thermochemical_data, parameters) + parameters = {param_name: param for param_name, param in zip(self._symbols_to_fit, parameters.tolist())} + likelihood = calculate_non_equilibrium_thermochemical_probability(self.thermochemical_data, self.dbf, parameters) return likelihood diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index c1b0f9f3..0617e075 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -21,17 +21,16 @@ import numpy as np from numpy.typing import ArrayLike -from pycalphad import Database, Model, variables as v -from pycalphad.codegen.callables import build_phase_records +from pycalphad import Database, Model, Workspace, variables as v +from pycalphad.property_framework import IsolatedPhase from pycalphad.core.utils import instantiate_models, filter_phases, unpack_components from pycalphad.core.phase_rec import PhaseRecord -from pycalphad.core.calculate import _sample_phase_constitution -from pycalphad.core.utils import point_sample +from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from scipy.stats import norm import tinydb from espei.phase_models import PhaseModelSpecification -from espei.shadow_functions import equilibrium_, calculate_, no_op_equilibrium_, update_phase_record_parameters, constrained_equilibrium +from espei.shadow_functions import calculate_, update_phase_record_parameters from espei.typing import SymbolName from espei.utils import PickleableTinyDB, database_symbols_to_fit from .residual_base import ResidualFunction, residual_function_registry @@ -162,18 +161,22 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat desired_data = datasets.search((tinydb.where('output') == 'ZPF') & (tinydb.where('components').test(lambda x: set(x).issubset(comps))) & (tinydb.where('phases').test(lambda x: len(set(phases).intersection(x)) > 0))) + wks = Workspace(dbf, comps, phases) zpf_data = [] # 1:1 correspondence with each dataset for data in desired_data: + current_wks = wks.copy() data_comps = list(set(data['components']).union({'VA'})) - species = sorted(unpack_components(dbf, data_comps), key=str) - data_phases = filter_phases(dbf, species, candidate_phases=phases) - models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters) - # assumed N, P, T state variables - phase_recs = build_phase_records(dbf, species, data_phases, {v.N, v.P, v.T}, models, parameters=parameters, build_gradients=True, build_hessians=True) - all_phase_points = {phase_name: _sample_phase_constitution(models[phase_name], point_sample, True, 50) for phase_name in data_phases} all_regions = data['values'] conditions = data['conditions'] + current_wks.components = data_comps + current_wks.conditions = conditions + current_wks.phases = phases + models = instantiate_models(dbf, current_wks.components, current_wks.phases, model=model, parameters=parameters) + current_wks.models = models + models = current_wks.models.unwrap() + statevars = {v.N, v.P, v.T} # TODO: hardcoded + current_wks.phase_record_factory = PhaseRecordFactory(dbf, current_wks.components, statevars, models, parameters=parameters) phase_regions = [] # Each phase_region is one set of phases in equilibrium (on a tie-line), # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]] @@ -190,7 +193,7 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat if phase_name.upper() == '__HYPERPLANE__': if np.any(np.isnan(composition)): # TODO: make this a part of the dataset checker raise ValueError(f"__HYPERPLANE__ vertex ({vertex}) must have all independent compositions defined to make a well-defined hyperplane (from dataset: {data})") - vtx = RegionVertex(phase_name, composition, comp_conds, None, phase_recs, disordered_flag, False) + vtx = RegionVertex(phase_name, composition, comp_conds, None, current_wks.phase_record_factory, disordered_flag, False) hyperplane_vertices.append(vtx) continue # Construct single-phase points satisfying the conditions for each phase in the region @@ -204,28 +207,33 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat phase_points = None else: has_missing_comp_cond = False - # Only sample points that have an average mass residual within tol - tol = 0.02 - phase_points = _subsample_phase_points(phase_recs[phase_name], all_phase_points[phase_name], composition, tol) + vertex_wks = current_wks.copy() + vertex_wks.phases = [phase_name] + vertex_wks.conditions = {**pot_conds, **comp_conds} + compset = next(vertex_wks.enumerate_composition_sets())[1][0] + # TODO: phase points can probably be removed, they should not be needed now that IsolatedPhase exists + phase_points = np.array([compset.dof[len(compset.phase_record.state_variables):]]) assert phase_points.shape[0] > 0, f"phase {phase_name} must have at least one set of points within the target tolerance {pot_conds} {comp_conds}" - vtx = RegionVertex(phase_name, composition, comp_conds, phase_points, phase_recs, disordered_flag, has_missing_comp_cond) + vtx = RegionVertex(phase_name, composition, comp_conds, phase_points, current_wks.phase_record_factory, disordered_flag, has_missing_comp_cond) vertices.append(vtx) if len(hyperplane_vertices) == 0: # Define the hyperplane at the vertices of the ZPF points hyperplane_vertices = vertices - region = PhaseRegion(hyperplane_vertices, vertices, pot_conds, species, data_phases, models) + region = PhaseRegion(hyperplane_vertices, vertices, pot_conds, current_wks.components, current_wks.phases, models) phase_regions.append(region) data_dict = { 'weight': data.get('weight', 1.0), 'phase_regions': phase_regions, - 'dataset_reference': data['reference'] + 'dataset_reference': data['reference'], + 'dbf': dbf, # TODO: not ideal to ship databases across the wire, but we can accept it for now. + 'parameter_dict': parameters } zpf_data.append(data_dict) return zpf_data -def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, approximate_equilibrium: bool = False) -> np.ndarray: +def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameters: np.ndarray, approximate_equilibrium: bool = False) -> np.ndarray: """ Calculate the chemical potentials for the target hyperplane, one vertex at a time @@ -238,15 +246,12 @@ def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, appro are taken as the target hyperplane for the given equilibria. """ - if approximate_equilibrium: - _equilibrium = no_op_equilibrium_ - else: - _equilibrium = equilibrium_ target_hyperplane_chempots = [] - target_hyperplane_phases = [] species = phase_region.species phases = phase_region.phases models = phase_region.models + param_keys = list(models.values())[0]._parameters_arg + parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters)) for vertex in phase_region.hyperplane_vertices: phase_records = vertex.phase_records update_phase_record_parameters(phase_records, parameters) @@ -257,16 +262,14 @@ def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, appro else: # Extract chemical potential hyperplane from multi-phase calculation # Note that we consider all phases in the system, not just ones in this tie region - str_statevar_dict = OrderedDict([(str(key), cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)]) - grid = calculate_(species, phases, str_statevar_dict, models, phase_records, pdens=50, fake_points=True) - multi_eqdata = _equilibrium(phase_records, cond_dict, grid) - target_hyperplane_phases.append(multi_eqdata.Phase.squeeze()) - # Does there exist only a single phase in the result with zero internal degrees of freedom? - # We should exclude those chemical potentials from the average because they are meaningless. - num_phases = np.sum(multi_eqdata.Phase.squeeze() != '') - Y_values = multi_eqdata.Y.squeeze() + wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_records, conditions=cond_dict, parameters=parameters_dict) + # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species + active_pure_elements = [list(x.constituents.keys()) for x in species] + active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"}) + MU_values = [wks.get(v.MU(comp)) for comp in active_pure_elements] + num_phases = np.sum(wks.eq.Phase.squeeze() != '') + Y_values = wks.eq.Y.squeeze() no_internal_dof = np.all((np.isclose(Y_values, 1.)) | np.isnan(Y_values)) - MU_values = multi_eqdata.MU.squeeze() if (num_phases == 1) and no_internal_dof: target_hyperplane_chempots.append(np.full_like(MU_values, np.nan)) else: @@ -276,12 +279,14 @@ def estimate_hyperplane(phase_region: PhaseRegion, parameters: np.ndarray, appro def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, - phase_region: PhaseRegion, vertex: RegionVertex, - parameters: np.ndarray, approximate_equilibrium: bool = False) -> float: + phase_region: PhaseRegion, dbf: Database, parameter_dict, vertex: RegionVertex, + parameters: np.ndarray, approximate_equilibrium: bool = False) -> Tuple[float,List[float]]: """Calculate the integrated driving force between the current hyperplane and target hyperplane. """ species = phase_region.species models = phase_region.models + param_keys = list(models.values())[0]._parameters_arg + parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters)) current_phase = vertex.phase_name cond_dict = {**phase_region.potential_conds, **vertex.comp_conds} str_statevar_dict = OrderedDict([(str(key),cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)]) @@ -322,14 +327,9 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, driving_force = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM driving_force = float(np.squeeze(driving_force)) else: - # Extract energies from single-phase calculations - grid = calculate_(species, [current_phase], str_statevar_dict, models, phase_records, points=phase_points, pdens=50, fake_points=True) - # TODO: consider enabling approximate for this? - converged, energy = constrained_equilibrium(phase_records, cond_dict, grid) - if not converged: - _log.debug('Calculation failure: constrained equilibrium not converged for %s, conditions: %s, parameters %s', current_phase, cond_dict, parameters) - return np.inf - driving_force = float(np.dot(target_hyperplane_chempots, vertex.composition) - float(energy)) + wks = Workspace(database=dbf, components=species, phases=current_phase, models=models, phase_record_factory=phase_records, conditions=cond_dict, parameters=parameters_dict) + constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM')) + driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex.composition) - constrained_energy return driving_force @@ -380,7 +380,7 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]], for phase_region in data['phase_regions']: # 1. Calculate the average multiphase hyperplane eq_str = phase_region.eq_str() - target_hyperplane = estimate_hyperplane(phase_region, parameters, approximate_equilibrium=approximate_equilibrium) + target_hyperplane = estimate_hyperplane(phase_region, data['dbf'], parameters, approximate_equilibrium=approximate_equilibrium) if np.any(np.isnan(target_hyperplane)): _log.debug('NaN target hyperplane. Equilibria: (%s), driving force: 0.0, reference: %s.', eq_str, dataset_ref) data_driving_forces.extend([0]*len(phase_region.vertices)) @@ -388,7 +388,7 @@ def calculate_zpf_driving_forces(zpf_data: Sequence[Dict[str, Any]], continue # 2. Calculate the driving force to that hyperplane for each vertex for vertex in phase_region.vertices: - driving_force = driving_force_to_hyperplane(target_hyperplane, phase_region, vertex, parameters, + driving_force = driving_force_to_hyperplane(target_hyperplane, phase_region, data['dbf'], data['parameter_dict'], vertex, parameters, approximate_equilibrium=approximate_equilibrium, ) if np.isinf(driving_force) and short_circuit: @@ -421,7 +421,7 @@ def calculate_zpf_error(zpf_data: Sequence[Dict[str, Any]], return 0.0 driving_forces, weights = calculate_zpf_driving_forces(zpf_data, parameters, approximate_equilibrium, short_circuit=True) # Driving forces and weights are 2D ragged arrays with the shape (len(zpf_data), len(zpf_data['values'])) - driving_forces = np.concatenate(driving_forces) + driving_forces = np.concatenate(driving_forces).T weights = np.concatenate(weights) if np.any(np.logical_or(np.isinf(driving_forces), np.isnan(driving_forces))): return -np.inf diff --git a/espei/paramselect.py b/espei/paramselect.py index 27a85f3e..758fa98c 100644 --- a/espei/paramselect.py +++ b/espei/paramselect.py @@ -108,7 +108,7 @@ def _build_feature_matrix(sample_condition_dicts: List[Dict[Symbol, float]], sym M = len(sample_condition_dicts) N = len(symbolic_coefficients) feature_matrix = np.empty((M, N)) - coeffs = ImmutableDenseMatrix(symbolic_coefficients) + coeffs = ImmutableDenseMatrix([symbolic_coefficients]) # see https://github.com/symengine/symengine.py/issues/485 for i in range(M): # Profiling-guided optimization # At the time, we got a 3x performance speedup compared to calling subs diff --git a/espei/plot.py b/espei/plot.py index ca329ba8..a2357ebe 100644 --- a/espei/plot.py +++ b/espei/plot.py @@ -504,7 +504,6 @@ def _get_interaction_predicted_values(dbf, comps, phase_name, configuration, out grid = np.linspace(0, 1, num=100) point_matrix = grid[None].T * second_endpoint + (1 - grid)[None].T * first_endpoint # TODO: Real temperature support - point_matrix = point_matrix[None, None] predicted_values = calculate( dbf, comps, [phase_name], output=output, T=298.15, P=101325, points=point_matrix, model=mod)[output].values.flatten() @@ -715,7 +714,7 @@ def plot_endmember(dbf, comps, phase_name, configuration, output, datasets=None, else: mod = Model(dbf, comps, phase_name) prop = output - endmember = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction))[None, None] + endmember = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction)) # Set up the domain of the calculation species = unpack_components(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those diff --git a/espei/shadow_functions.py b/espei/shadow_functions.py index 75179d9e..9aa34e86 100644 --- a/espei/shadow_functions.py +++ b/espei/shadow_functions.py @@ -8,24 +8,21 @@ from numpy.typing import ArrayLike import numpy as np from pycalphad import Model, variables as v +from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from pycalphad.core.phase_rec import PhaseRecord from pycalphad.core.composition_set import CompositionSet from pycalphad.core.starting_point import starting_point from pycalphad.core.eqsolver import _solve_eq_at_conditions -from pycalphad.core.equilibrium import _adjust_conditions +from pycalphad.core.workspace import _adjust_conditions from pycalphad.core.utils import get_state_variables, unpack_kwarg, point_sample from pycalphad.core.light_dataset import LightDataset from pycalphad.core.calculate import _sample_phase_constitution, _compute_phase_values from pycalphad.core.solver import Solver -def update_phase_record_parameters(phase_records: Dict[str, PhaseRecord], parameters: ArrayLike) -> None: +def update_phase_record_parameters(phase_record_factory: PhaseRecordFactory, parameters: ArrayLike) -> None: if parameters.size > 0: - for phase_name, phase_record in phase_records.items(): - # very important that these are floats, otherwise parameters can end up - # with garbage data. `np.asarray` does not create a copy if the type is - # correct - phase_record.parameters[:] = np.asarray(parameters, dtype=np.float64) + phase_record_factory.param_values[:] = np.asarray(parameters, dtype=np.float64) def _single_phase_start_point(conditions, state_variables, phase_records, grid): """Return a single CompositionSet object to use in a point calculation @@ -55,7 +52,7 @@ def _single_phase_start_point(conditions, state_variables, phase_records, grid): Y = grid.Y[..., idx_min, :].squeeze()[:prx.phase_dof] # Get current state variables # TODO: can we assume sorting - state_vars = np.array([conditions[sv][0] for sv in sorted(state_variables, key=str)]) + state_vars = np.array([conditions[sv][0].to(sv.implementation_units).magnitude for sv in sorted(state_variables, key=str)]) compset = CompositionSet(prx) compset.update(Y, 1.0, state_vars) return compset @@ -74,6 +71,8 @@ def calculate_(species: Sequence[v.Species], phases: Sequence[str], points_dict = unpack_kwarg(points, default_arg=None) pdens_dict = unpack_kwarg(pdens, default_arg=50) nonvacant_components = [x for x in sorted(species) if x.number_of_atoms > 0] + cur_phase_local_conditions = {} # XXX: Temporary hack to allow compatibility + str_phase_local_conditions = {} # XXX: Temporary hack to allow compatibility maximum_internal_dof = max(prx.phase_dof for prx in phase_records.values()) all_phase_data = [] for phase_name in sorted(phases): @@ -81,11 +80,10 @@ def calculate_(species: Sequence[v.Species], phases: Sequence[str], phase_record = phase_records[phase_name] points = points_dict[phase_name] if points is None: - points = _sample_phase_constitution(mod, point_sample, True, pdens_dict[phase_name]) + points = _sample_phase_constitution(mod, point_sample, True, pdens_dict[phase_name], cur_phase_local_conditions) points = np.atleast_2d(points) - fp = fake_points and (phase_name == sorted(phases)[0]) - phase_ds = _compute_phase_values(nonvacant_components, str_statevar_dict, + phase_ds = _compute_phase_values(nonvacant_components, str_statevar_dict, str_phase_local_conditions, points, phase_record, output, maximum_internal_dof, broadcast=broadcast, largest_energy=float(1e10), fake_points=fp, @@ -125,12 +123,12 @@ def constrained_equilibrium(phase_records: Dict[str, PhaseRecord], statevars = get_state_variables(conds=conditions) conditions = _adjust_conditions(conditions) # Assume that all conditions keys are lists with exactly one element (point calculation) - str_conds = OrderedDict([(str(ky), conditions[ky][0]) for ky in sorted(conditions.keys(), key=str)]) + unitless_conds = OrderedDict([(ky, conditions[ky].to(ky.implementation_units).magnitude) for ky in sorted(conditions.keys(), key=str)]) compset = _single_phase_start_point(conditions, statevars, phase_records, grid) solution_compsets = [compset] solver = Solver() # modifies `solution_compsets` and `compset` in place - solver_result = solver.solve(solution_compsets, str_conds) + solver_result = solver.solve(solution_compsets, unitless_conds) energy = compset.NP * compset.energy return solver_result.converged, energy @@ -142,9 +140,9 @@ def equilibrium_(phase_records: Dict[str, PhaseRecord], """ statevars = sorted(get_state_variables(conds=conditions), key=str) conditions = _adjust_conditions(conditions) - str_conds = OrderedDict([(str(ky), conditions[ky]) for ky in sorted(conditions.keys(), key=str)]) - start_point = starting_point(conditions, statevars, phase_records, grid) - return _solve_eq_at_conditions(start_point, phase_records, grid, str_conds, statevars, False) + stripped_conds = OrderedDict([(ky, conditions[ky].to(ky.implementation_units).magnitude) for ky in sorted(conditions.keys(), key=str)]) + start_point = starting_point(stripped_conds, statevars, phase_records, grid) + return _solve_eq_at_conditions(start_point, phase_records, grid, conditions, statevars, False) def no_op_equilibrium_(phase_records: Dict[str, PhaseRecord], @@ -164,4 +162,5 @@ def no_op_equilibrium_(phase_records: Dict[str, PhaseRecord], """ statevars = get_state_variables(conds=conditions) conditions = _adjust_conditions(conditions) - return starting_point(conditions, statevars, phase_records, grid) + stripped_conds = OrderedDict([(ky, conditions[ky].to(ky.implementation_units).magnitude) for ky in sorted(conditions.keys(), key=str)]) + return starting_point(stripped_conds, statevars, phase_records, grid) diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py index 6fa2bc78..7e98aee5 100644 --- a/tests/test_error_functions.py +++ b/tests/test_error_functions.py @@ -12,7 +12,7 @@ import scipy.stats from tinydb import where -from pycalphad import Database, Model, variables as v +from pycalphad import Database from espei.paramselect import generate_parameters from espei.error_functions import * @@ -82,7 +82,7 @@ def test_get_thermochemical_data_filters_invalid_sublattice_configurations(datas print('thermochemical data:', thermochemical_data) assert thermochemical_data[0]["calculate_dict"]["values"].shape == (2,) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, -14.28729) @@ -110,7 +110,7 @@ def test_get_thermochemical_data_filters_configurations_when_all_configurations_ print('thermochemical data:', thermochemical_data) assert thermochemical_data[0]["calculate_dict"]["values"].shape == (0,) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, 0) @@ -122,7 +122,7 @@ def test_non_equilibrium_thermochemical_error_with_multiple_X_points(datasets_db phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, -4061.119001241541, rtol=1e-6) @@ -135,7 +135,7 @@ def test_non_equilibrium_thermochemical_error_with_multiple_T_points(datasets_db phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error,-14.287293263253728, rtol=1e-6) @@ -147,7 +147,10 @@ def test_non_equilibrium_thermochemical_error_with_multiple_T_X_points(datasets_ phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + symbols_to_fit = database_symbols_to_fit(dbf) + initial_guess = np.array([unpack_piecewise(dbf.symbols[s]) for s in symbols_to_fit]) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf, parameters=dict(zip(symbols_to_fit, initial_guess))) + assert np.isclose(float(error), -3282497.2380024833, rtol=1e-6) def test_non_equilibrium_thermochemical_error_for_mixing_entropy_error_is_excess_only(datasets_db): @@ -198,7 +201,7 @@ def test_non_equilibrium_thermochemical_error_for_mixing_entropy_error_is_excess zero_error_prob = scipy.stats.norm(loc=0, scale=0.2).logpdf(0.0) # SM weight = 0.2 # Explicitly pass parameters={} to not try fitting anything thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, zero_error_prob, atol=1e-6) @@ -249,7 +252,7 @@ def test_non_equilibrium_thermochemical_error_for_of_enthalpy_mixing(datasets_db zero_error_prob = scipy.stats.norm(loc=0, scale=500.0).logpdf(0.0) # HM weight = 500 # Explicitly pass parameters={} to not try fitting anything thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, zero_error_prob, atol=1e-6) @@ -264,16 +267,16 @@ def test_subsystem_non_equilibrium_thermochemcial_probability(datasets_db): # Truth thermochemical_data = get_thermochemical_data(dbf_bin, ['CR', 'NI', 'VA'], phases, datasets_db) - bin_prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + bin_prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_bin) # Getting binary subsystem data explictly (from binary input) thermochemical_data = get_thermochemical_data(dbf_tern, ['CR', 'NI', 'VA'], phases, datasets_db) - prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_tern) assert np.isclose(prob, bin_prob) # Getting binary subsystem from ternary input thermochemical_data = get_thermochemical_data(dbf_tern, ['CR', 'FE', 'NI', 'VA'], phases, datasets_db) - prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf_tern) assert np.isclose(prob, bin_prob) @@ -404,7 +407,7 @@ def test_non_equilibrium_thermochemcial_species(datasets_db): phases = ['LIQUID'] thermochemical_data = get_thermochemical_data(dbf, ['LI', 'SN'], phases, datasets_db) - prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + prob = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) # Near zero error and non-zero error assert np.isclose(prob, (-7.13354663 + -22.43585011)) @@ -483,7 +486,7 @@ def test_equilibrium_thermochemical_error_computes_correct_probability(datasets_ def test_driving_force_miscibility_gap(datasets_db): datasets_db.insert(A_B_DATASET_ALPHA) dbf = Database(A_B_REGULAR_SOLUTION_TDB) - parameters = {"L_ALPHA": None} + parameters = {"L_ALPHA": 0} zpf_data = get_zpf_data(dbf, ["A", "B"], ["ALPHA"], datasets_db, parameters) # probability for zero error error with ZPF weight = 1000.0 diff --git a/tests/test_parameter_generation.py b/tests/test_parameter_generation.py index 224dcf32..3bf803fe 100644 --- a/tests/test_parameter_generation.py +++ b/tests/test_parameter_generation.py @@ -143,7 +143,7 @@ def test_mixing_energies_are_fit(datasets_db): zero_error_prob = scipy.stats.norm(loc=0, scale=500.0).logpdf(0.0) # HM weight = 500 # Explicitly pass parameters={} to not try fitting anything thermochemical_data = get_thermochemical_data(dbf, sorted(read_dbf.elements), list(read_dbf.phases.keys()), datasets_db, symbols_to_fit=[]) - error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) + error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, zero_error_prob, atol=1e-6) def test_duplicate_parameters_are_not_added_with_input_database(datasets_db): From 226deee0f2affea11f74fad7b5e627c8dadc3213 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Tue, 30 Jul 2024 00:04:48 -0700 Subject: [PATCH 02/24] non_equilibrium_thermochemical_error cleanup --- .../non_equilibrium_thermochemical_error.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index 375d4674..2297c5be 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -320,10 +320,13 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfigurationCalculationData, parameters): + species = calc_data['species'] phase_name = calc_data['phase_name'] + models = calc_data['model'] # Dict[PhaseName: Model] output = calc_data['output'] phase_records = calc_data['phase_records'] sample_values = calc_data['calculate_dict']['values'] + str_statevar_dict = calc_data['str_statevar_dict'] constituent_list = [] sublattice_list = [] @@ -337,8 +340,8 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig differences = [] for index in range(len(sample_values)): cond_dict = {} - for key in calc_data['str_statevar_dict'].keys(): - cond_dict.update({key: calc_data['str_statevar_dict'][key][index]}) + for sv_key, sv_val in str_statevar_dict.items(): + cond_dict.update({sv_key: sv_val[index]}) # here we build the internal DOF as a dictionary to use as conditions dof = {} @@ -350,22 +353,22 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig # TODO: convergence seems like it can get messed up if including DOF as conditions. # We temporarily disable that, but be aware that this will allow the minimizer to minimizer internal DOF, so it will only truly work for phases with (# internal DOF) = (# composition conditions) - # wks = Workspace(database=dbf, components=calc_data['species'], phases=phase_name, models=calc_data["model"], conditions={**cond_dict, **dof}) + # wks = Workspace(database=dbf, components=species, phases=phase_name, models=models, conditions={**cond_dict, **dof}) # TODO: If tests are passing with this, that's bad. We need a phase with more internal DOF (think Laves 2SL) # -- I believe the test_equilibrium_thermochemcial_error_species fails due to this, but more investigation needed # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species - active_pure_elements = [list(x.constituents.keys()) for x in calc_data["species"]] + active_pure_elements = [list(x.constituents.keys()) for x in species] active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"}) ind_comps = len(active_pure_elements) - 1 for comp in active_pure_elements: if v.Species(comp) != v.Species('VA') and ind_comps > 0: - cond_dict[v.X(comp)] = float(calc_data["model"][phase_name].moles(comp).xreplace(dof)) + cond_dict[v.X(comp)] = float(models[phase_name].moles(comp).xreplace(dof)) ind_comps = ind_comps - 1 # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why. # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered. # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid. - wks = Workspace(database=dbf, components=calc_data['species'], phases=[phase_name], conditions={**cond_dict}, models=calc_data["model"], phase_record_factory=phase_records, parameters=parameters) - wks.phase_record_factory.models = calc_data["model"] # if commented: 2 failed, 1 passed. uncommented 1 failed, 2 passed + wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_records, parameters=parameters) + wks.phase_record_factory.models = models # Sometimes in miscibility gaps the solver has trouble converging for # IsolatedPhase if the first compset it finds is at the edge of # composition space. Here, since we know the degrees of freedom, we From 1803fb78d855d34a1f9eff8a9892d7445ae1f287 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Fri, 2 Aug 2024 11:50:25 -0700 Subject: [PATCH 03/24] Delete unusued shadow_functions --- .../equilibrium_thermochemical_error.py | 7 +- espei/shadow_functions.py | 82 ------------------- 2 files changed, 1 insertion(+), 88 deletions(-) diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py index b7491ac6..bcaa172c 100644 --- a/espei/error_functions/equilibrium_thermochemical_error.py +++ b/espei/error_functions/equilibrium_thermochemical_error.py @@ -19,7 +19,7 @@ from espei.error_functions.residual_base import ResidualFunction, residual_function_registry from espei.phase_models import PhaseModelSpecification -from espei.shadow_functions import equilibrium_, calculate_, no_op_equilibrium_, update_phase_record_parameters +from espei.shadow_functions import update_phase_record_parameters from espei.typing import SymbolName from espei.utils import PickleableTinyDB, database_symbols_to_fit @@ -197,11 +197,6 @@ def calc_prop_differences(eqpropdata: EqPropData, * weights for this dataset """ - if approximate_equilibrium: - _equilibrium = no_op_equilibrium_ - else: - _equilibrium = equilibrium_ - dbf = eqpropdata.dbf species = eqpropdata.species phases = eqpropdata.phases diff --git a/espei/shadow_functions.py b/espei/shadow_functions.py index 9aa34e86..4046ff64 100644 --- a/espei/shadow_functions.py +++ b/espei/shadow_functions.py @@ -24,39 +24,6 @@ def update_phase_record_parameters(phase_record_factory: PhaseRecordFactory, par if parameters.size > 0: phase_record_factory.param_values[:] = np.asarray(parameters, dtype=np.float64) -def _single_phase_start_point(conditions, state_variables, phase_records, grid): - """Return a single CompositionSet object to use in a point calculation - - Assumes the grid has includes only candidate phases. The starting point will be - generated by taking the minimum energy, regardless of whether mass balance is - satisfied. - - Parameters - ---------- - conditions : Dict[v.StateVariable, ArrayLike] - Conditions in this region. Assumes state variable conditions have size == 1. - state_variables : List[v.StateVariable] - Active state variables in the calculation - phase_records : Dict[str, PhaseRecord] - Phase records, can have more than just the phase of interest - grid : LightDataset - Sampled grid of points - - """ - # assumes state variables in the conditions have size == 1 - idx_min = grid.GM.argmin() - # Assumes ordering of dimensions is [state variables, points, data_variable...] - # Get phase record - phase_name = str(grid.Phase[..., idx_min].squeeze()) - prx = phase_records[phase_name] - Y = grid.Y[..., idx_min, :].squeeze()[:prx.phase_dof] - # Get current state variables - # TODO: can we assume sorting - state_vars = np.array([conditions[sv][0].to(sv.implementation_units).magnitude for sv in sorted(state_variables, key=str)]) - compset = CompositionSet(prx) - compset.update(Y, 1.0, state_vars) - return compset - def calculate_(species: Sequence[v.Species], phases: Sequence[str], str_statevar_dict: Dict[str, np.ndarray], models: Dict[str, Model], @@ -115,52 +82,3 @@ def calculate_(species: Sequence[v.Species], phases: Sequence[str], final_ds = all_phase_data[0] final_ds.attrs['phase_indices'] = islice_by_phase return final_ds - - -def constrained_equilibrium(phase_records: Dict[str, PhaseRecord], - conditions: Dict[v.StateVariable, np.ndarray], grid: LightDataset): - """Perform an equilibrium calculation with just a single composition set that is constrained to the global composition condition""" - statevars = get_state_variables(conds=conditions) - conditions = _adjust_conditions(conditions) - # Assume that all conditions keys are lists with exactly one element (point calculation) - unitless_conds = OrderedDict([(ky, conditions[ky].to(ky.implementation_units).magnitude) for ky in sorted(conditions.keys(), key=str)]) - compset = _single_phase_start_point(conditions, statevars, phase_records, grid) - solution_compsets = [compset] - solver = Solver() - # modifies `solution_compsets` and `compset` in place - solver_result = solver.solve(solution_compsets, unitless_conds) - energy = compset.NP * compset.energy - return solver_result.converged, energy - -def equilibrium_(phase_records: Dict[str, PhaseRecord], - conditions: Dict[v.StateVariable, np.ndarray], grid: LightDataset - ) -> LightDataset: - """ - Perform a fast equilibrium calculation with virtually no overhead. - """ - statevars = sorted(get_state_variables(conds=conditions), key=str) - conditions = _adjust_conditions(conditions) - stripped_conds = OrderedDict([(ky, conditions[ky].to(ky.implementation_units).magnitude) for ky in sorted(conditions.keys(), key=str)]) - start_point = starting_point(stripped_conds, statevars, phase_records, grid) - return _solve_eq_at_conditions(start_point, phase_records, grid, conditions, statevars, False) - - -def no_op_equilibrium_(phase_records: Dict[str, PhaseRecord], - conditions: Dict[v.StateVariable, np.ndarray], - grid: LightDataset, - ) -> LightDataset: - """ - Perform a fast "equilibrium" calculation with virtually no overhead that - doesn't refine the solution or do global minimization, but just returns - the starting point. - - Notes - ----- - Uses a placeholder first argument for the same signature as - ``_equilibrium``, but ``species`` are not needed. - - """ - statevars = get_state_variables(conds=conditions) - conditions = _adjust_conditions(conditions) - stripped_conds = OrderedDict([(ky, conditions[ky].to(ky.implementation_units).magnitude) for ky in sorted(conditions.keys(), key=str)]) - return starting_point(stripped_conds, statevars, phase_records, grid) From 4926a6cdbdbd9acd8d9fe3e13e2ea2ea73893457 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Fri, 2 Aug 2024 11:57:35 -0700 Subject: [PATCH 04/24] Remove usages of approximate equilibrium in test_error_functions - it doesn't do anything --- tests/test_error_functions.py | 44 +++++++++++------------------------ 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py index 7e98aee5..751c78a8 100644 --- a/tests/test_error_functions.py +++ b/tests/test_error_functions.py @@ -354,11 +354,8 @@ def test_zpf_error_species(datasets_db): zero_error_probability = 2 * scipy.stats.norm(loc=0, scale=1000.0).logpdf(0.0) zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {}) - exact_likelihood = calculate_zpf_error(zpf_data, approximate_equilibrium=False) - assert np.isclose(exact_likelihood, zero_error_probability) - approx_likelihood = calculate_zpf_error(zpf_data, approximate_equilibrium=True) - # accept higher tolerance for approximate - assert np.isclose(approx_likelihood, zero_error_probability, rtol=1e-4) + likelihood = calculate_zpf_error(zpf_data) + assert np.isclose(likelihood, zero_error_probability) def test_zpf_error_equilibrium_failure(datasets_db): @@ -374,10 +371,8 @@ def test_zpf_error_equilibrium_failure(datasets_db): zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {}) with mock.patch('espei.error_functions.zpf_error.estimate_hyperplane', return_value=np.array([np.nan, np.nan])): - exact_likelihood = calculate_zpf_error(zpf_data) - assert np.isclose(exact_likelihood, zero_error_probability, rtol=1e-6) - approx_likelihood = calculate_zpf_error(zpf_data) - assert np.isclose(approx_likelihood, zero_error_probability, rtol=1e-6) + likelihood = calculate_zpf_error(zpf_data) + assert np.isclose(likelihood, zero_error_probability, rtol=1e-6) def test_zpf_error_works_for_stoichiometric_cmpd_tielines(datasets_db): @@ -392,10 +387,8 @@ def test_zpf_error_works_for_stoichiometric_cmpd_tielines(datasets_db): zero_error_probability = 2 * scipy.stats.norm(loc=0, scale=1000.0).logpdf(0.0) zpf_data = get_zpf_data(dbf, comps, phases, datasets_db, {}) - exact_likelihood = calculate_zpf_error(zpf_data) - assert np.isclose(exact_likelihood, zero_error_probability, rtol=1e-6) - approx_likelihood = calculate_zpf_error(zpf_data) - assert np.isclose(approx_likelihood, zero_error_probability, rtol=1e-6) + likelihood = calculate_zpf_error(zpf_data) + assert np.isclose(likelihood, zero_error_probability, rtol=1e-6) def test_non_equilibrium_thermochemcial_species(datasets_db): @@ -423,13 +416,8 @@ def test_equilibrium_thermochemcial_error_species(datasets_db): eqdata = get_equilibrium_thermochemical_data(dbf, ['LI', 'SN'], phases, datasets_db) # Thermo-Calc truth_values = np.array([0.0, -28133.588, -40049.995, 0.0]) - # Approximate - errors_approximate, weights = calc_prop_differences(eqdata[0], np.array([]), True) - # Looser tolerances because the equilibrium is approximate, note that this is pdens dependent - assert np.all(np.isclose(errors_approximate, truth_values, atol=1e-5, rtol=1e-3)) - # Exact - errors_exact, weights = calc_prop_differences(eqdata[0], np.array([]), False) - assert np.all(np.isclose(errors_exact, truth_values, atol=1e-5)) + residuals, weights = calc_prop_differences(eqdata[0], np.array([])) + assert np.all(np.isclose(residuals, truth_values, atol=1e-5)) def test_equilibrium_thermochemical_error_unsupported_property(datasets_db): @@ -442,8 +430,8 @@ def test_equilibrium_thermochemical_error_unsupported_property(datasets_db): phases = list(dbf.phases.keys()) eqdata = get_equilibrium_thermochemical_data(dbf, ['CR', 'NI'], phases, datasets_db) - errors_exact, weights = calc_prop_differences(eqdata[0], np.array([])) - assert np.all(np.isclose(errors_exact, EXPECTED_VALUES, atol=1e-3)) + residuals, weights = calc_prop_differences(eqdata[0], np.array([])) + assert np.all(np.isclose(residuals, EXPECTED_VALUES, atol=1e-3)) def test_equilibrium_property_residual_function(datasets_db): @@ -494,25 +482,19 @@ def test_driving_force_miscibility_gap(datasets_db): # Ideal solution case params = np.array([0.0]) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=False) - assert np.isclose(prob, zero_error_prob) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=True) + prob = calculate_zpf_error(zpf_data, parameters=params) assert np.isclose(prob, zero_error_prob) # Negative interaction case params = np.array([-10000.0]) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=False) - assert np.isclose(prob, zero_error_prob) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=True) + prob = calculate_zpf_error(zpf_data, parameters=params) assert np.isclose(prob, zero_error_prob) # Miscibility gap case params = np.array([10000.0]) - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=False) + prob = calculate_zpf_error(zpf_data, parameters=params) # Remember these are log probabilities, so more negative means smaller probability and larger error assert prob < zero_error_prob - prob = calculate_zpf_error(zpf_data, parameters=params, approximate_equilibrium=True) - assert prob < zero_error_prob def test_setting_up_context_with_custom_models(datasets_db): From 9083c5bdb103e4db75e9f58eabbd87a130f19111 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Fri, 2 Aug 2024 12:05:42 -0700 Subject: [PATCH 05/24] slight tolerance adjustment to make test pass. i think it's fine it's for what should be a zero --- tests/test_error_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py index 751c78a8..b9814f95 100644 --- a/tests/test_error_functions.py +++ b/tests/test_error_functions.py @@ -417,7 +417,7 @@ def test_equilibrium_thermochemcial_error_species(datasets_db): # Thermo-Calc truth_values = np.array([0.0, -28133.588, -40049.995, 0.0]) residuals, weights = calc_prop_differences(eqdata[0], np.array([])) - assert np.all(np.isclose(residuals, truth_values, atol=1e-5)) + assert np.all(np.isclose(residuals, truth_values, atol=3e-5)) def test_equilibrium_thermochemical_error_unsupported_property(datasets_db): From 39bb89e5d1821f43cf8383ac844ba766ffe9e04a Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Fri, 2 Aug 2024 12:06:56 -0700 Subject: [PATCH 06/24] drop py 38 --- .github/workflows/tests.yaml | 4 ++-- setup.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9b9f6420..54ce4ad4 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -33,7 +33,7 @@ jobs: fail-fast: false max-parallel: 100 matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: @@ -52,7 +52,7 @@ jobs: fail-fast: false max-parallel: 100 matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: diff --git a/setup.py b/setup.py index c4a31e48..123df475 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,6 @@ def readme(fname): 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', From 5330399dd0f955efc65d160ba4eeb86179342331 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Fri, 2 Aug 2024 13:45:39 -0700 Subject: [PATCH 07/24] Remove dead code --- espei/error_functions/zpf_error.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index 0617e075..d4b6371a 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -118,24 +118,6 @@ def _compute_vertex_composition(comps: Sequence[str], comp_conds: Dict[str, floa return vertex_composition -def _subsample_phase_points(phase_record, phase_points, target_composition, additional_distance_radius=0.02): - # Compute the mole fractions of each point - phase_compositions = np.zeros((phase_points.shape[0], target_composition.size), order='F') - # TODO: potential bug here if the composition has dependence (even piecewise - # dependence) in the state variables. The compositions may be nan in this case. - statevar_placeholder = np.ones((phase_points.shape[0], phase_record.num_statevars)) - dof = np.hstack((statevar_placeholder, phase_points)) - for el_idx in range(target_composition.size): - phase_record.mass_obj_2d(phase_compositions[:, el_idx], dof, el_idx) - - # Find the points indicdes where the mass is within the radius of minimum distance + additional_distance_radius - distances = np.mean(np.abs(phase_compositions - target_composition), axis=1) - idxs = np.nonzero(distances < (distances.min() + additional_distance_radius))[0] - - # Return the sub-space of points where this condition holds valid - return phase_points[idxs] - - def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], datasets: PickleableTinyDB, parameters: Dict[str, float], model: Optional[Dict[str, Type[Model]]] = None): """ Return the ZPF data used in the calculation of ZPF error From c987ac0f0b3eae054c33e096e2b962fab5368e3f Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Fri, 2 Aug 2024 13:45:51 -0700 Subject: [PATCH 08/24] Time residuals --- espei/optimizers/opt_mcmc.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/espei/optimizers/opt_mcmc.py b/espei/optimizers/opt_mcmc.py index f4820218..5d0cc865 100644 --- a/espei/optimizers/opt_mcmc.py +++ b/espei/optimizers/opt_mcmc.py @@ -289,12 +289,14 @@ def predict(params, **ctx): lnlike = 0.0 likelihoods = {} for residual_obj in ctx.get("residual_objs", []): + residual_starttime = time.time() likelihood = residual_obj.get_likelihood(params) - likelihoods[type(residual_obj).__name__] = likelihood + residual_time = time.time() - residual_starttime + likelihoods[type(residual_obj).__name__] = (likelihood, residual_time) lnlike += likelihood liketime = time.time() - starttime - like_str = ". ".join([f"{ky}: {vl:0.3f}" for ky, vl in likelihoods.items()]) + like_str = ". ".join([f"{ky}: {vl[0]:0.3f} ({vl[1]:0.2f} s)" for ky, vl in likelihoods.items()]) lnlike = np.array(lnlike, dtype=np.float64) _log.trace('Likelihood - %0.2fs - %s. Total: %0.3f.', liketime, like_str, lnlike) From af4bd046952c70bf07527dd60334dfb0cbe4e8b1 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Fri, 2 Aug 2024 20:38:52 -0700 Subject: [PATCH 09/24] Fix internal DOF, now we're perfectly matching release version --- .../non_equilibrium_thermochemical_error.py | 70 ++++++++++++++----- tests/test_error_functions.py | 13 ++++ tests/testing_data.py | 28 +++++++- 3 files changed, 91 insertions(+), 20 deletions(-) diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index 2297c5be..2eab80e7 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -18,6 +18,8 @@ from pycalphad import Workspace from pycalphad.property_framework import IsolatedPhase from pycalphad.property_framework.metaproperties import find_first_compset +from pycalphad.core.solver import Solver, SolverResult + from espei.datasets import Dataset from espei.core_utils import ravel_conditions, get_prop_data, filter_temperatures @@ -30,6 +32,45 @@ _log = logging.getLogger(__name__) +class NoSolveSolver(Solver): + def solve(self, composition_sets, conditions): + """ + Initialize a solver, but don't actually do anything - i.e. return the SolverResult as if the calculation converged. + + """ + spec = self.get_system_spec(composition_sets, conditions) + self._fix_state_variables_in_compsets(composition_sets, conditions) + state = spec.get_new_state(composition_sets) + # DO NOT: converged = spec.run_loop(state, 1000) + + if self.remove_metastable: + phase_idx = 0 + compsets_to_remove = [] + for compset in composition_sets: + # Mark unstable phases for removal + if compset.NP <= 0.0 and not compset.fixed: + compsets_to_remove.append(int(phase_idx)) + phase_idx += 1 + # Watch removal order here, as the indices of composition_sets are changing! + for idx in reversed(compsets_to_remove): + del composition_sets[idx] + + phase_amt = [compset.NP for compset in composition_sets] + + x = composition_sets[0].dof + state_variables = composition_sets[0].phase_record.state_variables + num_statevars = len(state_variables) + for compset in composition_sets[1:]: + x = np.r_[x, compset.dof[num_statevars:]] + x = np.r_[x, phase_amt] + chemical_potentials = np.array(state.chemical_potentials) + + if self.verbose: + print('Chemical Potentials', chemical_potentials) + print(np.asarray(x)) + return SolverResult(converged=True, x=x, chemical_potentials=chemical_potentials) + + # TODO: make into an object similar to how ZPF data works? FixedConfigurationCalculationData = NewType("FixedConfigurationCalculationData", Dict[str, Any]) @@ -287,7 +328,8 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic model_cls = model.get(phase_name, Model) mod = model_cls(dbf, comps, phase_name, parameters=symbols_to_fit) if prop.endswith('_FORM'): - output = ''.join(prop.split('_')[:-1]) + output = ''.join(prop.split('_')[:-1])+"R" + # print("SHIFTING", output) mod.shift_reference_state(ref_states, dbf, contrib_mods={e: symengine.S.Zero for e in exclusion}) else: output = prop @@ -318,7 +360,6 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic return all_data_dicts - def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfigurationCalculationData, parameters): species = calc_data['species'] phase_name = calc_data['phase_name'] @@ -343,7 +384,7 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig for sv_key, sv_val in str_statevar_dict.items(): cond_dict.update({sv_key: sv_val[index]}) - # here we build the internal DOF as a dictionary to use as conditions + # Build internal DOF as if they were used in conditions dof = {} for site_frac in range(len(constituent_list)): comp = constituent_list[site_frac] @@ -351,12 +392,8 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig sublattice = sublattice_list[site_frac] dof.update({v.Y(phase_name,sublattice,comp): occupancy}) - # TODO: convergence seems like it can get messed up if including DOF as conditions. - # We temporarily disable that, but be aware that this will allow the minimizer to minimizer internal DOF, so it will only truly work for phases with (# internal DOF) = (# composition conditions) - # wks = Workspace(database=dbf, components=species, phases=phase_name, models=models, conditions={**cond_dict, **dof}) - # TODO: If tests are passing with this, that's bad. We need a phase with more internal DOF (think Laves 2SL) - # -- I believe the test_equilibrium_thermochemcial_error_species fails due to this, but more investigation needed # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species + # Build composition conditions, probably not necessary given that we don't actually solve anything, but still useful in terms of derivatives probably. active_pure_elements = [list(x.constituents.keys()) for x in species] active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"}) ind_comps = len(active_pure_elements) - 1 @@ -369,17 +406,16 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid. wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_records, parameters=parameters) wks.phase_record_factory.models = models - # Sometimes in miscibility gaps the solver has trouble converging for - # IsolatedPhase if the first compset it finds is at the edge of - # composition space. Here, since we know the degrees of freedom, we - # initialize the compset for isolated phase correctly in the first - # place. This is kind of a hack until I figure out full DOF support. + # We then get a composition set and we use a special "NoSolveSolver" to + # ensure that we don't change from the data-specified DOF. compset = find_first_compset(phase_name, wks) new_sitefracs = np.array([sf for _, sf in sorted(dof.items(), key=lambda y: (y[0].phase_name, y[0].sublattice_index, y[0].species.name))]) - new_sitefracs[new_sitefracs < 1e-14] = 1e-14 # fix zeros, otherwise we get ZeroDivisionErrors in the minimizer. - new_statevars = np.array(compset.dof[:len(compset.phase_record.state_variables)]) # not actually changed - compset.update(new_sitefracs, compset.NP, new_statevars) - results = wks.get(IsolatedPhase(compset, wks=wks)(output)) + new_statevars = np.array(compset.dof[:len(compset.phase_record.state_variables)]) # no updates expected + compset.update(new_sitefracs, 1.0, new_statevars) + iso_phase = IsolatedPhase(compset, wks=wks) + iso_phase.solver = NoSolveSolver() + wks.solver = NoSolveSolver() + results = wks.get(iso_phase(output)) sample_differences = results - sample_values[index] differences.append(sample_differences) return differences diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py index b9814f95..7b1541e2 100644 --- a/tests/test_error_functions.py +++ b/tests/test_error_functions.py @@ -100,6 +100,19 @@ def test_fixed_configuration_residual_function(datasets_db): assert np.isclose(likelihood, -14.28729, rtol=1e-6) +def test_fixed_configuration_residual_with_internal_degrees_of_freedom(datasets_db): + """Unstable endmembers in phases that have internal degrees of freedom should retain fixed internal DOF""" + dbf = Database(CU_MG_TDB) + datasets_db.insert(CU_MG_HM_FORM_LAVES_C15_ANTISITE) + + residual_func = FixedConfigurationPropertyResidual(dbf, datasets_db, phase_models=None, symbols_to_fit=[]) + + # Expect that the database has the same energy as the dataset (the former was fit to the latter) + residuals, weights = residual_func.get_residuals(np.asarray([])) + assert len(residuals) == len(weights) + assert np.allclose(residuals, [0.0]) + + def test_get_thermochemical_data_filters_configurations_when_all_configurations_are_invalid(datasets_db): datasets_db.insert(CU_MG_HM_MIX_CUMG2_ALL_INVALID) # No valid configurations diff --git a/tests/testing_data.py b/tests/testing_data.py index 657e7bf1..a58365ea 100644 --- a/tests/testing_data.py +++ b/tests/testing_data.py @@ -33,9 +33,9 @@ $ Generated by brandon (pycalphad 0.5.2.post1) $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ -ELEMENT CU BLANK 0 0 0 ! -ELEMENT MG BLANK 0 0 0 ! -ELEMENT VA BLANK 0 0 0 ! +ELEMENT CU FCC_A1 0 0 0 ! +ELEMENT MG HCP_A3 0 0 0 ! +ELEMENT VA VACUUM 0 0 0 ! FUNCTION GFCCCU 298.15 GHSERCU#; 3200.0 N ! FUNCTION GFCCMG 298.15 -0.9*T + GHSERMG# + 2600; 3000.0 N ! @@ -342,6 +342,28 @@ } +CU_MG_HM_FORM_LAVES_C15_ANTISITE = yaml.load("""{ + "components": ["CU", "MG", "VA"], + "phases": ["LAVES_C15"], + "solver": { + "sublattice_site_ratios": [2, 1], + "sublattice_configurations": [["MG", "CU"]], + "mode": "manual" + }, + "conditions": { + "P": 101325, + "T": [298.15], + }, + + "output": "HM_FORM", + "values": [[[34720.0]]], + "reference": "FAKE DATA", + "comment": "Cu2:Mg is the stable endmember, Mg2:Cu is the antisite endmember that should have high energy." +} +""", Loader=YAML_LOADER) + + + CU_MG_HM_MIX_CUMG2_ANTISITE = yaml.load("""{ "components": ["CU", "MG", "VA"], "phases": ["CUMG2"], From fea3e2a90ed1adc093aa51c1b82f94f467597d71 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sat, 3 Aug 2024 16:48:55 -0500 Subject: [PATCH 10/24] fix test_get_thermochemical_data_filters_invalid_sublattice_configurations --- .../non_equilibrium_thermochemical_error.py | 11 ++++++----- tests/test_error_functions.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index 2eab80e7..c9b8ff5f 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -159,10 +159,11 @@ def get_prop_samples(desired_data, constituents): Dictionary of condition kwargs for pycalphad's calculate and the expected values """ - # TODO: assumes T, P as conditions + # TODO: assumes T, P, N as conditions # calculate needs points, state variable lists, and values to compare to num_dof = sum(map(len, constituents)) calculate_dict = { + 'N': np.array([]), 'P': np.array([]), 'T': np.array([]), 'points': np.atleast_2d([[]]).reshape(-1, num_dof), @@ -175,6 +176,7 @@ def get_prop_samples(desired_data, constituents): # extract the data we care about datum_T = datum['conditions']['T'] datum_P = datum['conditions']['P'] + datum_N = np.full_like(datum_T, 1.0) configurations = datum['solver']['sublattice_configurations'] occupancies = datum['solver'].get('sublattice_occupancies') values = np.array(datum['values']) @@ -187,7 +189,7 @@ def get_prop_samples(desired_data, constituents): weights = np.broadcast_to(np.asarray(datum.get('weight', 1.0)), values.shape) # broadcast and flatten the conditions arrays - P, T = ravel_conditions(values, datum_P, datum_T) + P, T, N = ravel_conditions(values, datum_P, datum_T, datum_N) if occupancies is None: occupancies = [None] * len(configurations) @@ -198,6 +200,7 @@ def get_prop_samples(desired_data, constituents): # add everything to the calculate_dict calculate_dict['P'] = np.concatenate([calculate_dict['P'], P]) calculate_dict['T'] = np.concatenate([calculate_dict['T'], T]) + calculate_dict['N'] = np.concatenate([calculate_dict['N'], N]) calculate_dict['points'] = np.concatenate([calculate_dict['points'], np.tile(points, (values.shape[0]*values.shape[1], 1))], axis=0) calculate_dict['values'] = np.concatenate([calculate_dict['values'], values.flatten()]) calculate_dict['weights'].extend(weights.flatten()) @@ -404,8 +407,7 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why. # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered. # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid. - wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_records, parameters=parameters) - wks.phase_record_factory.models = models + wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_records, parameters=parameters, solver=NoSolveSolver()) # We then get a composition set and we use a special "NoSolveSolver" to # ensure that we don't change from the data-specified DOF. compset = find_first_compset(phase_name, wks) @@ -414,7 +416,6 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig compset.update(new_sitefracs, 1.0, new_statevars) iso_phase = IsolatedPhase(compset, wks=wks) iso_phase.solver = NoSolveSolver() - wks.solver = NoSolveSolver() results = wks.get(iso_phase(output)) sample_differences = results - sample_values[index] differences.append(sample_differences) diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py index 7b1541e2..ad8c6003 100644 --- a/tests/test_error_functions.py +++ b/tests/test_error_functions.py @@ -78,7 +78,7 @@ def test_get_thermochemical_data_filters_invalid_sublattice_configurations(datas dbf = Database(CU_MG_TDB) comps = ["CU", "MG", "VA"] phases = ["CUMG2"] - thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) + thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) print('thermochemical data:', thermochemical_data) assert thermochemical_data[0]["calculate_dict"]["values"].shape == (2,) From 9f384796697a407a68b500c2c3d105e293705fb8 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sat, 3 Aug 2024 16:52:06 -0500 Subject: [PATCH 11/24] set symbols_to_fit to empty list for more tests --- tests/test_error_functions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py index ad8c6003..7af9e5c4 100644 --- a/tests/test_error_functions.py +++ b/tests/test_error_functions.py @@ -134,7 +134,7 @@ def test_non_equilibrium_thermochemical_error_with_multiple_X_points(datasets_db dbf = Database(CU_MG_TDB) phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] - thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) + thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error, -4061.119001241541, rtol=1e-6) @@ -147,7 +147,7 @@ def test_non_equilibrium_thermochemical_error_with_multiple_T_points(datasets_db dbf = Database(CU_MG_TDB) phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] - thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) + thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data, dbf) assert np.isclose(error,-14.287293263253728, rtol=1e-6) From d0279e0b8c5761f77fda35e567ace3cff7b46ca5 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sat, 3 Aug 2024 17:01:19 -0500 Subject: [PATCH 12/24] another fixup for N=1 --- espei/error_functions/non_equilibrium_thermochemical_error.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index c9b8ff5f..225b9615 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -176,7 +176,8 @@ def get_prop_samples(desired_data, constituents): # extract the data we care about datum_T = datum['conditions']['T'] datum_P = datum['conditions']['P'] - datum_N = np.full_like(datum_T, 1.0) + # TODO: fix this when N different from 1 allowed in pycalphad + datum_N = np.full_like(datum['values'], 1.0) configurations = datum['solver']['sublattice_configurations'] occupancies = datum['solver'].get('sublattice_occupancies') values = np.array(datum['values']) From 51df1321a96d3341621956cd2938aac3a533b626 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sat, 3 Aug 2024 17:31:00 -0500 Subject: [PATCH 13/24] zpf_error fix for parameters --- espei/error_functions/zpf_error.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index d4b6371a..bfc41dba 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -143,7 +143,7 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat desired_data = datasets.search((tinydb.where('output') == 'ZPF') & (tinydb.where('components').test(lambda x: set(x).issubset(comps))) & (tinydb.where('phases').test(lambda x: len(set(phases).intersection(x)) > 0))) - wks = Workspace(dbf, comps, phases) + wks = Workspace(dbf, comps, phases, parameters=parameters) zpf_data = [] # 1:1 correspondence with each dataset for data in desired_data: @@ -154,11 +154,6 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat current_wks.components = data_comps current_wks.conditions = conditions current_wks.phases = phases - models = instantiate_models(dbf, current_wks.components, current_wks.phases, model=model, parameters=parameters) - current_wks.models = models - models = current_wks.models.unwrap() - statevars = {v.N, v.P, v.T} # TODO: hardcoded - current_wks.phase_record_factory = PhaseRecordFactory(dbf, current_wks.components, statevars, models, parameters=parameters) phase_regions = [] # Each phase_region is one set of phases in equilibrium (on a tie-line), # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]] @@ -179,7 +174,7 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat hyperplane_vertices.append(vtx) continue # Construct single-phase points satisfying the conditions for each phase in the region - mod = models[phase_name] + mod = current_wks.models[phase_name] if np.any(np.isnan(composition)): # We can't construct points because we don't have a known composition has_missing_comp_cond = True @@ -201,7 +196,7 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat if len(hyperplane_vertices) == 0: # Define the hyperplane at the vertices of the ZPF points hyperplane_vertices = vertices - region = PhaseRegion(hyperplane_vertices, vertices, pot_conds, current_wks.components, current_wks.phases, models) + region = PhaseRegion(hyperplane_vertices, vertices, pot_conds, current_wks.components, current_wks.phases, current_wks.models) phase_regions.append(region) data_dict = { From ad5c1170207720e7cbded4ee3acf6f754f2d78e5 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sat, 3 Aug 2024 17:40:48 -0500 Subject: [PATCH 14/24] zpf_error model fix --- espei/error_functions/zpf_error.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index bfc41dba..f2da2b7f 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -154,6 +154,9 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat current_wks.components = data_comps current_wks.conditions = conditions current_wks.phases = phases + # only init models that are defined for the phases; fallback to default pycalphad behavior if no custom model defined + current_wks.models = {phase_name: model.get(phase_name, current_wks.models[phase_name]) + for phase_name in current_wks.phases} phase_regions = [] # Each phase_region is one set of phases in equilibrium (on a tie-line), # e.g. [["ALPHA", ["B"], [0.25]], ["BETA", ["B"], [0.5]]] From 55d7aaba8472afa457463e26e301ae7d55dbc341 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sat, 3 Aug 2024 17:41:53 -0500 Subject: [PATCH 15/24] zpf_error model None fix --- espei/error_functions/zpf_error.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index f2da2b7f..77c7092c 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -144,6 +144,8 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat (tinydb.where('components').test(lambda x: set(x).issubset(comps))) & (tinydb.where('phases').test(lambda x: len(set(phases).intersection(x)) > 0))) wks = Workspace(dbf, comps, phases, parameters=parameters) + if model is None: + model = {} zpf_data = [] # 1:1 correspondence with each dataset for data in desired_data: From 02f9b2430bf7f37283b8769b75f071a474b6ae97 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sat, 3 Aug 2024 17:44:02 -0500 Subject: [PATCH 16/24] unwrap model DictProxy --- espei/error_functions/zpf_error.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index 77c7092c..e4540fde 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -201,7 +201,7 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat if len(hyperplane_vertices) == 0: # Define the hyperplane at the vertices of the ZPF points hyperplane_vertices = vertices - region = PhaseRegion(hyperplane_vertices, vertices, pot_conds, current_wks.components, current_wks.phases, current_wks.models) + region = PhaseRegion(hyperplane_vertices, vertices, pot_conds, current_wks.components, current_wks.phases, current_wks.models.unwrap()) phase_regions.append(region) data_dict = { From ae2ba79272b5e59277fa7fb54c28b97263626828 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sat, 3 Aug 2024 17:46:44 -0500 Subject: [PATCH 17/24] matplotlib set backend for plotting tests --- tests/test_plotting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index db57262d..33f64479 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -4,7 +4,8 @@ Mostly integration tests that don't validate the plots themselves, but rather that the internal usage and matplotlib usage is correct. """ - +import matplotlib +matplotlib.use('Agg') import matplotlib.pyplot as plt from pycalphad import Database, variables as v From aae1565578a51bd49779a3416604db3c64293e5b Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Tue, 6 Aug 2024 10:52:59 -0700 Subject: [PATCH 18/24] Remove `points` from RegionVertex --- espei/error_functions/zpf_error.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index e4540fde..86113c83 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -43,7 +43,6 @@ class RegionVertex: phase_name: str composition: ArrayLike # 1D of size (number nonvacant pure elements) comp_conds: Dict[v.X, float] - points: ArrayLike phase_records: Dict[str, PhaseRecord] is_disordered: bool has_missing_comp_cond: bool @@ -172,31 +171,21 @@ def get_zpf_data(dbf: Database, comps: Sequence[str], phases: Sequence[str], dat for vertex in phase_region: phase_name, comp_conds, disordered_flag = _extract_phases_comps(vertex) composition = _compute_vertex_composition(data_comps, comp_conds) + # TODO: should maybe have different types of RegionVertex that are semantic for __HYPERPLANE__, known phase composition, unknown phase composition if phase_name.upper() == '__HYPERPLANE__': if np.any(np.isnan(composition)): # TODO: make this a part of the dataset checker raise ValueError(f"__HYPERPLANE__ vertex ({vertex}) must have all independent compositions defined to make a well-defined hyperplane (from dataset: {data})") - vtx = RegionVertex(phase_name, composition, comp_conds, None, current_wks.phase_record_factory, disordered_flag, False) + vtx = RegionVertex(phase_name, composition, comp_conds, current_wks.phase_record_factory, disordered_flag, False) hyperplane_vertices.append(vtx) continue - # Construct single-phase points satisfying the conditions for each phase in the region mod = current_wks.models[phase_name] if np.any(np.isnan(composition)): - # We can't construct points because we don't have a known composition has_missing_comp_cond = True - phase_points = None elif _phase_is_stoichiometric(mod): has_missing_comp_cond = False - phase_points = None else: has_missing_comp_cond = False - vertex_wks = current_wks.copy() - vertex_wks.phases = [phase_name] - vertex_wks.conditions = {**pot_conds, **comp_conds} - compset = next(vertex_wks.enumerate_composition_sets())[1][0] - # TODO: phase points can probably be removed, they should not be needed now that IsolatedPhase exists - phase_points = np.array([compset.dof[len(compset.phase_record.state_variables):]]) - assert phase_points.shape[0] > 0, f"phase {phase_name} must have at least one set of points within the target tolerance {pot_conds} {comp_conds}" - vtx = RegionVertex(phase_name, composition, comp_conds, phase_points, current_wks.phase_record_factory, disordered_flag, has_missing_comp_cond) + vtx = RegionVertex(phase_name, composition, comp_conds, current_wks.phase_record_factory, disordered_flag, has_missing_comp_cond) vertices.append(vtx) if len(hyperplane_vertices) == 0: # Define the hyperplane at the vertices of the ZPF points @@ -272,10 +261,9 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, current_phase = vertex.phase_name cond_dict = {**phase_region.potential_conds, **vertex.comp_conds} str_statevar_dict = OrderedDict([(str(key),cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)]) - phase_points = vertex.points phase_records = vertex.phase_records update_phase_record_parameters(phase_records, parameters) - if phase_points is None: + if vertex.has_missing_comp_cond: # We don't have the phase composition here, so we estimate the driving force. # Can happen if one of the composition conditions is unknown or if the phase is # stoichiometric and the user did not specify a valid phase composition. From 76e0e9130a8365dc41230e1983b2ae98633fc5d0 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Thu, 8 Aug 2024 10:29:19 -0700 Subject: [PATCH 19/24] Support v.Component --- espei/error_functions/activity_error.py | 6 +++--- espei/error_functions/equilibrium_thermochemical_error.py | 6 +++--- .../non_equilibrium_thermochemical_error.py | 8 ++++---- espei/error_functions/zpf_error.py | 4 ++-- espei/plot.py | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py index b8df3810..a2d5e239 100644 --- a/espei/error_functions/activity_error.py +++ b/espei/error_functions/activity_error.py @@ -16,7 +16,7 @@ import tinydb from pycalphad import Database, variables as v from pycalphad.plot.eqplot import _map_coord_to_variable -from pycalphad.core.utils import filter_phases, unpack_components +from pycalphad.core.utils import filter_phases, unpack_species from scipy.stats import norm from pycalphad import Workspace @@ -88,7 +88,7 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None, # data_comps and data_phases ensures that we only do calculations on # the subsystem of the system defining the data. data_comps = ds['components'] - data_phases = filter_phases(dbf, unpack_components(dbf, data_comps), candidate_phases=phases) + data_phases = filter_phases(dbf, unpack_species(dbf, data_comps), candidate_phases=phases) ref_conditions = {_map_coord_to_variable(coord): val for coord, val in ref['conditions'].items()} wks_ref = Workspace(database=dbf, components=data_comps, phases=ref['phases'], conditions=ref_conditions, parameters=parameters) @@ -201,7 +201,7 @@ def __init__( else: comps = sorted(database.elements) model_dict = dict() - phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys())) + phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys())) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) self._symbols_to_fit = symbols_to_fit diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py index bcaa172c..dc44431d 100644 --- a/espei/error_functions/equilibrium_thermochemical_error.py +++ b/espei/error_functions/equilibrium_thermochemical_error.py @@ -12,7 +12,7 @@ from tinydb import where from scipy.stats import norm from pycalphad import Database, Model, ReferenceState, variables as v -from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_components, unpack_condition +from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition from pycalphad.core.phase_rec import PhaseRecord from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from pycalphad import Workspace, as_property @@ -78,7 +78,7 @@ def build_eqpropdata(data: tinydb.database.Document, params_keys, _ = extract_parameters(parameters) data_comps = list(set(data['components']).union({'VA'})) - species = sorted(unpack_components(dbf, data_comps), key=str) + species = sorted(unpack_species(dbf, data_comps), key=str) data_phases = filter_phases(dbf, species, candidate_phases=data['phases']) models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters) output = data['output'] @@ -294,7 +294,7 @@ def __init__( else: comps = sorted(database.elements) model_dict = dict() - phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys())) + phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys())) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) # okay if parameters are initialized to zero, we only need the symbol names diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index 225b9615..6f4199e6 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -14,7 +14,7 @@ from tinydb import where from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from pycalphad import Database, Model, ReferenceState, variables as v -from pycalphad.core.utils import unpack_components, get_pure_elements, filter_phases +from pycalphad.core.utils import unpack_species, get_pure_elements, filter_phases from pycalphad import Workspace from pycalphad.property_framework import IsolatedPhase from pycalphad.property_framework.metaproperties import find_first_compset @@ -286,7 +286,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic else: symbols_to_fit = database_symbols_to_fit(dbf) - species_comps = set(unpack_components(dbf, comps)) + species_comps = set(unpack_species(dbf, comps)) # estimated from NIST TRC uncertainties property_std_deviation = { @@ -346,7 +346,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic except AttributeError: mod.reference_model.models[contrib] = symengine.S.Zero model_dict = {phase_name: mod} - species = sorted(unpack_components(dbf, comps), key=str) + species = sorted(unpack_species(dbf, comps), key=str) data_dict['species'] = species statevar_dict = {getattr(v, c, None): vals for c, vals in calculate_dict.items() if isinstance(getattr(v, c, None), v.StateVariable)} statevar_dict = OrderedDict(sorted(statevar_dict.items(), key=lambda x: str(x[0]))) @@ -478,7 +478,7 @@ def __init__( else: comps = sorted(database.elements) model_dict = dict() - phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys())) + phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys())) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) self.thermochemical_data = get_thermochemical_data(database, comps, phases, datasets, model_dict, weight_dict=self.weight, symbols_to_fit=symbols_to_fit) diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index 86113c83..5e3c428e 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -23,7 +23,7 @@ from numpy.typing import ArrayLike from pycalphad import Database, Model, Workspace, variables as v from pycalphad.property_framework import IsolatedPhase -from pycalphad.core.utils import instantiate_models, filter_phases, unpack_components +from pycalphad.core.utils import instantiate_models, filter_phases, unpack_species from pycalphad.core.phase_rec import PhaseRecord from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from scipy.stats import norm @@ -420,7 +420,7 @@ def __init__( else: comps = sorted(database.elements) model_dict = dict() - phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys())) + phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys())) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(database) # okay if parameters are initialized to zero, we only need the symbol names diff --git a/espei/plot.py b/espei/plot.py index a2357ebe..9233951d 100644 --- a/espei/plot.py +++ b/espei/plot.py @@ -10,7 +10,7 @@ import tinydb from symengine import Symbol from pycalphad import Model, calculate, equilibrium, variables as v -from pycalphad.core.utils import unpack_components +from pycalphad.core.utils import unpack_species from pycalphad.plot.utils import phase_legend from pycalphad.plot.eqplot import eqplot, _map_coord_to_variable, unpack_condition @@ -575,7 +575,7 @@ def plot_interaction(dbf, comps, phase_name, configuration, output, datasets=Non else: desired_data = [] - species = unpack_components(dbf, comps) + species = unpack_species(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active @@ -716,7 +716,7 @@ def plot_endmember(dbf, comps, phase_name, configuration, output, datasets=None, prop = output endmember = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction)) # Set up the domain of the calculation - species = unpack_components(dbf, comps) + species = unpack_species(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active @@ -783,7 +783,7 @@ def _compare_data_to_parameters(dbf, comps, phase_name, desired_data, mod, confi matplotlib.Axes """ - species = unpack_components(dbf, comps) + species = unpack_species(dbf, comps) # phase constituents are Species objects, so we need to be doing intersections with those phase_constituents = dbf.phases[phase_name].constituents # phase constituents must be filtered to only active: From 7b172da48942a0274ffbc84e5b347c4a5c4d5f3d Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Thu, 8 Aug 2024 14:43:26 -0700 Subject: [PATCH 20/24] phase_records -> PhaseRecordFactory --- .../equilibrium_thermochemical_error.py | 16 +++++++-------- .../non_equilibrium_thermochemical_error.py | 10 +++++----- espei/error_functions/zpf_error.py | 20 +++++++++---------- espei/shadow_functions.py | 18 ++++++----------- 4 files changed, 28 insertions(+), 36 deletions(-) diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py index dc44431d..51ee0daf 100644 --- a/espei/error_functions/equilibrium_thermochemical_error.py +++ b/espei/error_functions/equilibrium_thermochemical_error.py @@ -33,7 +33,7 @@ ('composition_conds', Sequence[Dict[v.X, float]]), ('models', Dict[str, Model]), ('params_keys', Dict[str, float]), - ('phase_records', Sequence[Dict[str, PhaseRecord]]), + ('phase_record_factory', PhaseRecordFactory), ('output', str), ('samples', np.ndarray), ('weight', np.ndarray), @@ -100,7 +100,7 @@ def build_eqpropdata(data: tinydb.database.Document, pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')]) comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')]) - phase_records = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds}, models, parameters=parameters) + phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds}, models, parameters=parameters) # Now we need to unravel the composition conditions # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the @@ -115,7 +115,7 @@ def build_eqpropdata(data: tinydb.database.Document, dataset_weights = np.array(data.get('weight', 1.0)) * np.ones(total_num_calculations) weights = (property_std_deviation.get(property_output, 1.0)/data_weight_dict.get(property_output, 1.0)/dataset_weights).flatten() - return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_records, output, samples, weights, reference) + return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_record_factory, output, samples, weights, reference) def get_equilibrium_thermochemical_data(dbf: Database, comps: Sequence[str], @@ -202,21 +202,21 @@ def calc_prop_differences(eqpropdata: EqPropData, phases = eqpropdata.phases pot_conds = eqpropdata.potential_conds models = eqpropdata.models - phase_records = eqpropdata.phase_records - update_phase_record_parameters(phase_records, parameters) + phase_record_factory = eqpropdata.phase_record_factory + update_phase_record_parameters(phase_record_factory, parameters) params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters)) output = as_property(eqpropdata.output) weights = np.array(eqpropdata.weight, dtype=np.float64) samples = np.array(eqpropdata.samples, dtype=np.float64) - wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_records, parameters=params_dict) + wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_record_factory, parameters=params_dict) calculated_data = [] for comp_conds in eqpropdata.composition_conds: cond_dict = OrderedDict(**pot_conds, **comp_conds) wks.conditions = cond_dict - wks.parameters = params_dict # these reset models and phase_records through depends_on -> lose Model.shift_reference_state, etc. + wks.parameters = params_dict # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc. wks.models = models - wks.phase_record_factory = phase_records + wks.phase_record_factory = phase_record_factory vals = wks.get(output) calculated_data.extend(np.atleast_1d(vals).tolist()) diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index 6f4199e6..4faff2d2 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -318,7 +318,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic 'phase_name': phase_name, 'prop': prop, # needs the following keys to be added: - # species, calculate_dict, phase_records, model, output, weights + # species, calculate_dict, phase_record_factory, model, output, weights } # get all the data with these model exclusions if exclusion == tuple([]): @@ -350,11 +350,11 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic data_dict['species'] = species statevar_dict = {getattr(v, c, None): vals for c, vals in calculate_dict.items() if isinstance(getattr(v, c, None), v.StateVariable)} statevar_dict = OrderedDict(sorted(statevar_dict.items(), key=lambda x: str(x[0]))) - phase_records = PhaseRecordFactory(dbf, species, statevar_dict, model_dict, + phase_record_factory = PhaseRecordFactory(dbf, species, statevar_dict, model_dict, parameters={s: 0 for s in symbols_to_fit}) str_statevar_dict = OrderedDict((str(k), vals) for k, vals in statevar_dict.items()) data_dict['str_statevar_dict'] = str_statevar_dict - data_dict['phase_records'] = phase_records + data_dict['phase_record_factory'] = phase_record_factory data_dict['calculate_dict'] = calculate_dict data_dict['model'] = model_dict data_dict['output'] = output @@ -369,7 +369,7 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig phase_name = calc_data['phase_name'] models = calc_data['model'] # Dict[PhaseName: Model] output = calc_data['output'] - phase_records = calc_data['phase_records'] + phase_record_factory = calc_data['phase_record_factory'] sample_values = calc_data['calculate_dict']['values'] str_statevar_dict = calc_data['str_statevar_dict'] @@ -408,7 +408,7 @@ def compute_fixed_configuration_property_differences(dbf, calc_data: FixedConfig # Need to be careful here. Making a workspace erases the custom models that have some contributions excluded (which are passed in). Not sure exactly why. # The models themselves are preserved, but the ones inside the workspace's phase_record_factory get clobbered. # We workaround this by replacing the phase_record_factory models with ours, but this is definitely a hack we'd like to avoid. - wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_records, parameters=parameters, solver=NoSolveSolver()) + wks = Workspace(database=dbf, components=species, phases=[phase_name], conditions={**cond_dict}, models=models, phase_record_factory=phase_record_factory, parameters=parameters, solver=NoSolveSolver()) # We then get a composition set and we use a special "NoSolveSolver" to # ensure that we don't change from the data-specified DOF. compset = find_first_compset(phase_name, wks) diff --git a/espei/error_functions/zpf_error.py b/espei/error_functions/zpf_error.py index 5e3c428e..f6d961e2 100644 --- a/espei/error_functions/zpf_error.py +++ b/espei/error_functions/zpf_error.py @@ -23,8 +23,7 @@ from numpy.typing import ArrayLike from pycalphad import Database, Model, Workspace, variables as v from pycalphad.property_framework import IsolatedPhase -from pycalphad.core.utils import instantiate_models, filter_phases, unpack_species -from pycalphad.core.phase_rec import PhaseRecord +from pycalphad.core.utils import filter_phases, unpack_species from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from scipy.stats import norm import tinydb @@ -43,7 +42,7 @@ class RegionVertex: phase_name: str composition: ArrayLike # 1D of size (number nonvacant pure elements) comp_conds: Dict[v.X, float] - phase_records: Dict[str, PhaseRecord] + phase_record_factory: PhaseRecordFactory is_disordered: bool has_missing_comp_cond: bool @@ -224,8 +223,7 @@ def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameters: np param_keys = list(models.values())[0]._parameters_arg parameters_dict = dict(zip(sorted(map(str, param_keys)), parameters)) for vertex in phase_region.hyperplane_vertices: - phase_records = vertex.phase_records - update_phase_record_parameters(phase_records, parameters) + update_phase_record_parameters(vertex.phase_record_factory, parameters) cond_dict = {**vertex.comp_conds, **phase_region.potential_conds} if vertex.has_missing_comp_cond: # This composition is unknown -- it doesn't contribute to hyperplane estimation @@ -233,7 +231,7 @@ def estimate_hyperplane(phase_region: PhaseRegion, dbf: Database, parameters: np else: # Extract chemical potential hyperplane from multi-phase calculation # Note that we consider all phases in the system, not just ones in this tie region - wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_records, conditions=cond_dict, parameters=parameters_dict) + wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=vertex.phase_record_factory, conditions=cond_dict, parameters=parameters_dict) # TODO: active_pure_elements should be replaced with wks.components when wks.components no longer includes phase constituent Species active_pure_elements = [list(x.constituents.keys()) for x in species] active_pure_elements = sorted(set(el.upper() for constituents in active_pure_elements for el in constituents) - {"VA"}) @@ -261,13 +259,13 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, current_phase = vertex.phase_name cond_dict = {**phase_region.potential_conds, **vertex.comp_conds} str_statevar_dict = OrderedDict([(str(key),cond_dict[key]) for key in sorted(phase_region.potential_conds.keys(), key=str)]) - phase_records = vertex.phase_records - update_phase_record_parameters(phase_records, parameters) + phase_record_factory = vertex.phase_record_factory + update_phase_record_parameters(phase_record_factory, parameters) if vertex.has_missing_comp_cond: # We don't have the phase composition here, so we estimate the driving force. # Can happen if one of the composition conditions is unknown or if the phase is # stoichiometric and the user did not specify a valid phase composition. - single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_records, pdens=50) + single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_record_factory, pdens=50) df = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM driving_force = float(df.max()) elif vertex.is_disordered: @@ -293,11 +291,11 @@ def driving_force_to_hyperplane(target_hyperplane_chempots: np.ndarray, sitefracs_to_add[np.isnan(sitefracs_to_add)] = 1 - np.nansum(sitefracs_to_add) desired_sitefracs[dof_idx:dof_idx + num_subl_dof] = sitefracs_to_add dof_idx += num_subl_dof - single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_records, points=np.asarray([desired_sitefracs])) + single_eqdata = calculate_(species, [current_phase], str_statevar_dict, models, phase_record_factory, points=np.asarray([desired_sitefracs])) driving_force = np.multiply(target_hyperplane_chempots, single_eqdata.X).sum(axis=-1) - single_eqdata.GM driving_force = float(np.squeeze(driving_force)) else: - wks = Workspace(database=dbf, components=species, phases=current_phase, models=models, phase_record_factory=phase_records, conditions=cond_dict, parameters=parameters_dict) + wks = Workspace(database=dbf, components=species, phases=current_phase, models=models, phase_record_factory=phase_record_factory, conditions=cond_dict, parameters=parameters_dict) constrained_energy = wks.get(IsolatedPhase(current_phase,wks=wks)('GM')) driving_force = np.dot(np.squeeze(target_hyperplane_chempots), vertex.composition) - constrained_energy return driving_force diff --git a/espei/shadow_functions.py b/espei/shadow_functions.py index 4046ff64..ff572fa0 100644 --- a/espei/shadow_functions.py +++ b/espei/shadow_functions.py @@ -9,15 +9,9 @@ import numpy as np from pycalphad import Model, variables as v from pycalphad.codegen.phase_record_factory import PhaseRecordFactory -from pycalphad.core.phase_rec import PhaseRecord -from pycalphad.core.composition_set import CompositionSet -from pycalphad.core.starting_point import starting_point -from pycalphad.core.eqsolver import _solve_eq_at_conditions -from pycalphad.core.workspace import _adjust_conditions -from pycalphad.core.utils import get_state_variables, unpack_kwarg, point_sample +from pycalphad.core.utils import unpack_kwarg, point_sample from pycalphad.core.light_dataset import LightDataset from pycalphad.core.calculate import _sample_phase_constitution, _compute_phase_values -from pycalphad.core.solver import Solver def update_phase_record_parameters(phase_record_factory: PhaseRecordFactory, parameters: ArrayLike) -> None: @@ -27,7 +21,7 @@ def update_phase_record_parameters(phase_record_factory: PhaseRecordFactory, par def calculate_(species: Sequence[v.Species], phases: Sequence[str], str_statevar_dict: Dict[str, np.ndarray], models: Dict[str, Model], - phase_records: Dict[str, PhaseRecord], output: Optional[str] = 'GM', + phase_record_factory: PhaseRecordFactory, output: Optional[str] = 'GM', points: Optional[Dict[str, np.ndarray]] = None, pdens: Optional[int] = 50, broadcast: Optional[bool] = True, fake_points: Optional[bool] = False, @@ -40,11 +34,11 @@ def calculate_(species: Sequence[v.Species], phases: Sequence[str], nonvacant_components = [x for x in sorted(species) if x.number_of_atoms > 0] cur_phase_local_conditions = {} # XXX: Temporary hack to allow compatibility str_phase_local_conditions = {} # XXX: Temporary hack to allow compatibility - maximum_internal_dof = max(prx.phase_dof for prx in phase_records.values()) + maximum_internal_dof = max(prx.phase_dof for prx in phase_record_factory.values()) all_phase_data = [] for phase_name in sorted(phases): mod = models[phase_name] - phase_record = phase_records[phase_name] + phase_record = phase_record_factory[phase_name] points = points_dict[phase_name] if points is None: points = _sample_phase_constitution(mod, point_sample, True, pdens_dict[phase_name], cur_phase_local_conditions) @@ -57,9 +51,9 @@ def calculate_(species: Sequence[v.Species], phases: Sequence[str], parameters={}) all_phase_data.append(phase_ds) - # assumes phase_records all have the same nonvacant pure elements, + # assumes phase_record_factory all have the same nonvacant pure elements, # even if those elements are not present in this phase record - fp_offset = len(tuple(phase_records.values())[0].nonvacant_elements) if fake_points else 0 + fp_offset = len(tuple(phase_record_factory.values())[0].nonvacant_elements) if fake_points else 0 running_total = [fp_offset] + list(np.cumsum([phase_ds['X'].shape[-2] for phase_ds in all_phase_data])) islice_by_phase = {phase_name: slice(running_total[phase_idx], running_total[phase_idx+1], None) for phase_idx, phase_name in enumerate(sorted(phases))} From 37f66f2f4fb148e36d00f76e40efb3199293131a Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Thu, 8 Aug 2024 14:46:58 -0700 Subject: [PATCH 21/24] delete dead imports --- espei/error_functions/equilibrium_thermochemical_error.py | 1 - espei/optimizers/opt_scipy.py | 1 - espei/shadow_functions.py | 1 - 3 files changed, 3 deletions(-) diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py index 51ee0daf..01d31e71 100644 --- a/espei/error_functions/equilibrium_thermochemical_error.py +++ b/espei/error_functions/equilibrium_thermochemical_error.py @@ -13,7 +13,6 @@ from scipy.stats import norm from pycalphad import Database, Model, ReferenceState, variables as v from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition -from pycalphad.core.phase_rec import PhaseRecord from pycalphad.codegen.phase_record_factory import PhaseRecordFactory from pycalphad import Workspace, as_property diff --git a/espei/optimizers/opt_scipy.py b/espei/optimizers/opt_scipy.py index a9629a95..93a5c014 100644 --- a/espei/optimizers/opt_scipy.py +++ b/espei/optimizers/opt_scipy.py @@ -4,7 +4,6 @@ from scipy.optimize import minimize from espei.utils import unpack_piecewise from espei.error_functions.context import setup_context -from espei.error_functions import calculate_activity_error from .opt_base import OptimizerBase _log = logging.getLogger(__name__) diff --git a/espei/shadow_functions.py b/espei/shadow_functions.py index ff572fa0..14ea739a 100644 --- a/espei/shadow_functions.py +++ b/espei/shadow_functions.py @@ -3,7 +3,6 @@ pycalphad functions for very fast performance. """ -from collections import OrderedDict from typing import Sequence, Dict, Optional from numpy.typing import ArrayLike import numpy as np From 7e9ede53413a50d5c17a6ad06281a56a3fcd90c0 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Thu, 8 Aug 2024 14:56:02 -0700 Subject: [PATCH 22/24] Fix source of extra points dimensions in plot_interaction --- espei/plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/espei/plot.py b/espei/plot.py index 9233951d..1fa99b0c 100644 --- a/espei/plot.py +++ b/espei/plot.py @@ -626,7 +626,6 @@ def plot_interaction(dbf, comps, phase_name, configuration, output, datasets=Non points[point_idx] = list(OrderedDict(sorted(points[point_idx].items(), key=str)).values()) points = np.array(points, dtype=np.float64) # TODO: Real temperature support - points = points[None, None] stability = calculate(dbf, comps, [phase_name], output=data['output'][:-5], T=temps, P=pressures, points=points, model=mod_srf) From 74c5c51ba946fa5f228960c4fed21f6a9e61c0fb Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Thu, 8 Aug 2024 15:06:35 -0700 Subject: [PATCH 23/24] Remove espei.plot 0.9 deprecations --- espei/plot.py | 408 -------------------------------------------------- 1 file changed, 408 deletions(-) diff --git a/espei/plot.py b/espei/plot.py index 1fa99b0c..3aea28e5 100644 --- a/espei/plot.py +++ b/espei/plot.py @@ -35,106 +35,6 @@ } -def plot_parameters(dbf, comps, phase_name, configuration, symmetry, datasets=None, fig=None, require_data=True): - """ - Plot parameters of interest compared with data in subplots of a single figure - - Parameters - ---------- - dbf : Database - pycalphad thermodynamic database containing the relevant parameters. - comps : list - Names of components to consider in the calculation. - phase_name : str - Name of the considered phase phase - configuration : tuple - Sublattice configuration to plot, such as ('CU', 'CU') or (('CU', 'MG'), 'CU') - symmetry : list - List of lists containing indices of symmetric sublattices e.g. [[0, 1], [2, 3]] - datasets : PickleableTinyDB - ESPEI datasets to compare against. If None, nothing is plotted. - fig : matplotlib.Figure - Figure to create with axes as subplots. - require_data : bool - If True, plot parameters that have data corresponding data. Defaults to - True. Will raise an error for non-interaction configurations. - - Returns - ------- - None - - Examples - -------- - >>> # plot the LAVES_C15 (Cu)(Mg) endmember - >>> plot_parameters(dbf, ['CU', 'MG'], 'LAVES_C15', ('CU', 'MG'), symmetry=None, datasets=datasets) # doctest: +SKIP - >>> # plot the mixing interaction in the first sublattice - >>> plot_parameters(dbf, ['CU', 'MG'], 'LAVES_C15', (('CU', 'MG'), 'MG'), symmetry=None, datasets=datasets) # doctest: +SKIP - - """ - deprecation_msg = ( - "`espei.plot.plot_parameters` is deprecated and will be removed in ESPEI 0.9. " - "Please use `plot_endmember` or `plot_interaction` instead." - ) - warnings.warn(deprecation_msg, category=FutureWarning) - - em_plots = [('T', 'CPM'), ('T', 'CPM_FORM'), ('T', 'SM'), ('T', 'SM_FORM'), - ('T', 'HM'), ('T', 'HM_FORM')] - mix_plots = [ ('Z', 'HM_MIX'), ('Z', 'SM_MIX')] - comps = sorted(comps) - mod = Model(dbf, comps, phase_name) - mod.models['idmix'] = 0 - # This is for computing properties of formation - mod_norefstate = Model(dbf, comps, phase_name, parameters={'GHSER'+(c.upper()*2)[:2]: 0 for c in comps}) - # Is this an interaction parameter or endmember? - if any([isinstance(conf, list) or isinstance(conf, tuple) for conf in configuration]): - plots = mix_plots - else: - plots = em_plots - - # filter which parameters to plot by the data that exists - if require_data and datasets is not None: - filtered_plots = [] - for x_val, y_val in plots: - desired_props = [y_val.split('_')[0]+'_FORM', y_val] if y_val.endswith('_MIX') else [y_val] - solver_qry = (tinydb.where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) - data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry) - data = filter_configurations(data, configuration, symmetry) - data = filter_temperatures(data) - if len(data) > 0: - filtered_plots.append((x_val, y_val, data)) - elif require_data: - raise ValueError('Plots require datasets, but no datasets were passed.') - elif plots == em_plots and not require_data: - # How we treat temperature dependence is ambiguous when there is no data, so we raise an error - raise ValueError('The "require_data=False" option is not supported for non-mixing configurations.') - elif datasets is not None: - filtered_plots = [] - for x_val, y_val in plots: - desired_props = [y_val.split('_')[0]+'_FORM', y_val] if y_val.endswith('_MIX') else [y_val] - solver_qry = (tinydb.where('solver').test(symmetry_filter, configuration, recursive_tuplify(symmetry) if symmetry else symmetry)) - data = get_prop_data(comps, phase_name, desired_props, datasets, additional_query=solver_qry) - data = filter_configurations(data, configuration, symmetry) - data = filter_temperatures(data) - filtered_plots.append((x_val, y_val, data)) - else: - filtered_plots = [(x_val, y_val, []) for x_val, y_val in plots] - - num_plots = len(filtered_plots) - if num_plots == 0: - return - if not fig: - fig = plt.figure(figsize=plt.figaspect(num_plots)) - - # plot them - for i, (x_val, y_val, data) in enumerate(filtered_plots): - if y_val.endswith('_FORM'): - ax = fig.add_subplot(num_plots, 1, i+1) - ax = _compare_data_to_parameters(dbf, comps, phase_name, data, mod_norefstate, configuration, x_val, y_val, ax=ax) - else: - ax = fig.add_subplot(num_plots, 1, i+1) - ax = _compare_data_to_parameters(dbf, comps, phase_name, data, mod, configuration, x_val, y_val, ax=ax) - - def dataplot(comps, phases, conds, datasets, tielines=True, ax=None, plot_kwargs=None, tieline_plot_kwargs=None) -> plt.Axes: """ Plot datapoints corresponding to the components, phases, and conditions. @@ -381,120 +281,6 @@ def dataplot(comps, phases, conds, datasets, tielines=True, ax=None, plot_kwargs return ax -def eqdataplot(eq, datasets, ax=None, plot_kwargs=None): - """ - Plot datapoints corresponding to the components and phases in the eq Dataset. - A convenience function for dataplot. - - Parameters - ---------- - eq : xarray.Dataset - Result of equilibrium calculation. - datasets : PickleableTinyDB - Database of phase equilibria datasets - ax : matplotlib.Axes - Default axes used if not specified. - plot_kwargs : dict - Keyword arguments to pass to dataplot - - Returns - ------- - A plot of phase equilibria points as a figure - - Examples - -------- - - >>> from pycalphad import equilibrium, Database, variables as v # doctest: +SKIP - >>> from pycalphad.plot.eqplot import eqplot # doctest: +SKIP - >>> from espei.datasets import load_datasets, recursive_glob # doctest: +SKIP - >>> datasets = load_datasets(recursive_glob('.', '*.json')) # doctest: +SKIP - >>> dbf = Database('my_databases.tdb') # doctest: +SKIP - >>> my_phases = list(dbf.phases.keys()) # doctest: +SKIP - >>> eq = equilibrium(dbf, ['CU', 'MG', 'VA'], my_phases, {v.P: 101325, v.T: (500, 1000, 10), v.X('MG'): (0, 1, 0.01)}) # doctest: +SKIP - >>> ax = eqplot(eq) # doctest: +SKIP - >>> ax = eqdataplot(eq, datasets, ax=ax) # doctest: +SKIP - - """ - deprecation_msg = ( - "`espei.plot.eqdataplot` is deprecated and will be removed in ESPEI 0.9. " - "Users depending on plotting from an `pycalphad.equilibrium` result should use " - "`pycalphad.plot.eqplot.eqplot` along with `espei.plot.dataplot` directly. " - "Note that pycalphad's mapping can offer signficant reductions in calculation " - "time compared to using `equilibrium` followed by `eqplot`." - ) - warnings.warn(deprecation_msg, category=FutureWarning) - # TODO: support reference legend - conds = OrderedDict([(_map_coord_to_variable(key), unpack_condition(np.asarray(value))) - for key, value in sorted(eq.coords.items(), key=str) - if (key == 'T') or (key == 'P') or (key.startswith('X_'))]) - - phases = list(map(str, sorted(set(np.array(eq.Phase.values.ravel(), dtype='U')) - {''}, key=str))) - comps = list(map(str, sorted(np.array(eq.coords['component'].values, dtype='U'), key=str))) - - ax = dataplot(comps, phases, conds, datasets, ax=ax, plot_kwargs=plot_kwargs) - - return ax - - -def multiplot(dbf, comps, phases, conds, datasets, eq_kwargs=None, plot_kwargs=None, data_kwargs=None): - """ - Plot a phase diagram with datapoints described by datasets. - This is a wrapper around pycalphad.equilibrium, pycalphad's eqplot, and dataplot. - - Parameters - ---------- - dbf : Database - pycalphad thermodynamic database containing the relevant parameters. - comps : list - Names of components to consider in the calculation. - phases : list - Names of phases to consider in the calculation. - conds : dict - Maps StateVariables to values and/or iterables of values. - datasets : PickleableTinyDB - Database of phase equilibria datasets - eq_kwargs : dict - Keyword arguments passed to pycalphad equilibrium() - plot_kwargs : dict - Keyword arguments passed to pycalphad eqplot() - data_kwargs : dict - Keyword arguments passed to dataplot() - - Returns - ------- - A phase diagram with phase equilibria data as a figure - - Examples - -------- - - >>> from pycalphad import Database, variables as v # doctest: +SKIP - >>> from pycalphad.plot.eqplot import eqplot # doctest: +SKIP - >>> from espei.datasets import load_datasets, recursive_glob # doctest: +SKIP - >>> datasets = load_datasets(recursive_glob('.', '*.json')) # doctest: +SKIP - >>> dbf = Database('my_databases.tdb') # doctest: +SKIP - >>> my_phases = list(dbf.phases.keys()) # doctest: +SKIP - >>> multiplot(dbf, ['CU', 'MG', 'VA'], my_phases, {v.P: 101325, v.T: 1000, v.X('MG'): (0, 1, 0.01)}, datasets) # doctest: +SKIP - - """ - deprecation_msg = ( - "`espei.plot.multiplot` is deprecated and will be removed in ESPEI 0.9. " - "Users depending on `multiplot` should use pycalphad's `binplot` or `ternplot` " - "followed by `espei.plot.dataplot`. Note that pycalphad's mapping can offer " - "signficant reductions in calculation time compared to using `multiplot`. See " - "ESPEI's recipes for an example: " - "https://espei.org/en/latest/recipes.html#plot-phase-diagram-with-data" - ) - warnings.warn(deprecation_msg, category=FutureWarning) - eq_kwargs = eq_kwargs or dict() - plot_kwargs = plot_kwargs or dict() - data_kwargs = data_kwargs or dict() - - eq_result = equilibrium(dbf, comps, phases, conds, **eq_kwargs) - ax = eqplot(eq_result, **plot_kwargs) - ax = eqdataplot(eq_result, datasets, ax=ax, plot_kwargs=data_kwargs) - return ax - - def _get_interaction_predicted_values(dbf, comps, phase_name, configuration, output): mod = Model(dbf, comps, phase_name) mod.models['idmix'] = 0 # TODO: better reference state handling @@ -754,200 +540,6 @@ def plot_endmember(dbf, comps, phase_name, configuration, output, datasets=None, return ax -def _compare_data_to_parameters(dbf, comps, phase_name, desired_data, mod, configuration, x, y, ax=None): - """ - Return one set of plotted Axes with data compared to calculated parameters - - Parameters - ---------- - dbf : Database - pycalphad thermodynamic database containing the relevant parameters. - comps : list - Names of components to consider in the calculation. - phase_name : str - Name of the considered phase phase - desired_data : - mod : Model - A pycalphad Model. The Model may or may not have the reference state zeroed out for formation properties. - configuration : - x : str - Model property to plot on the x-axis e.g. 'T', 'HM_MIX', 'SM_FORM' - y : str - Model property to plot on the y-axis e.g. 'T', 'HM_MIX', 'SM_FORM' - ax : matplotlib.Axes - Default axes used if not specified. - - Returns - ------- - matplotlib.Axes - - """ - species = unpack_species(dbf, comps) - # phase constituents are Species objects, so we need to be doing intersections with those - phase_constituents = dbf.phases[phase_name].constituents - # phase constituents must be filtered to only active: - constituents = [[sp.name for sp in sorted(subl_constituents.intersection(species))] for subl_constituents in phase_constituents] - subl_dof = list(map(len, constituents)) - calculate_dict = get_prop_samples(desired_data, constituents) - # we don't take in symmetry here, so we we can't consider equivalent sublattices when canonicalizing - sample_condition_dicts = get_sample_condition_dicts(calculate_dict, canonicalize(constituents, None), phase_name) - endpoints = endmembers_from_interaction(configuration) - interacting_subls = [c for c in recursive_tuplify(configuration) if isinstance(c, tuple)] - disordered_config = False - if (len(set(interacting_subls)) == 1) and (len(interacting_subls[0]) == 2): - # This configuration describes all sublattices with the same two elements interacting - # In general this is a high-dimensional space; just plot the diagonal to see the disordered mixing - endpoints = [endpoints[0], endpoints[-1]] - disordered_config = True - if not ax: - ax = plt.subplot() - bar_chart = False - bar_labels = [] - bar_data = [] - if y.endswith('_FORM'): - # We were passed a Model object with zeroed out reference states - yattr = y[:-5] - else: - yattr = y - if len(endpoints) == 1: - # This is an endmember so we can just compute T-dependent stuff - Ts = calculate_dict['T'] - temperatures = np.asarray(Ts if len(Ts) > 0 else 298.15) - if temperatures.min() != temperatures.max(): - temperatures = np.linspace(temperatures.min(), temperatures.max(), num=100) - else: - # We only have one temperature: let's do a bar chart instead - bar_chart = True - temperatures = temperatures.min() - endmember = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction))[None, None] - predicted_quantities = calculate(dbf, comps, [phase_name], output=yattr, - T=temperatures, P=101325, points=endmember, model=mod, mode='numpy') - if y == 'HM' and x == 'T': - # Shift enthalpy data so that value at minimum T is zero - predicted_quantities[yattr] -= predicted_quantities[yattr].sel(T=temperatures[0]).values.flatten() - response_data = predicted_quantities[yattr].values.flatten() - if not bar_chart: - extra_kwargs = {} - if len(response_data) < 10: - extra_kwargs['markersize'] = 20 - extra_kwargs['marker'] = '.' - extra_kwargs['linestyle'] = 'none' - extra_kwargs['clip_on'] = False - ax.plot(temperatures, response_data, - label='This work', color='k', **extra_kwargs) - ax.set_xlabel(plot_mapping.get(x, x)) - ax.set_ylabel(plot_mapping.get(y, y)) - else: - bar_labels.append('This work') - bar_data.append(response_data[0]) - elif len(endpoints) == 2: - # Binary interaction parameter - first_endpoint = _translate_endmember_to_array(endpoints[0], mod.ast.atoms(v.SiteFraction)) - second_endpoint = _translate_endmember_to_array(endpoints[1], mod.ast.atoms(v.SiteFraction)) - point_matrix = np.linspace(0, 1, num=100)[None].T * second_endpoint + \ - (1 - np.linspace(0, 1, num=100))[None].T * first_endpoint - # TODO: Real temperature support - point_matrix = point_matrix[None, None] - predicted_quantities = calculate(dbf, comps, [phase_name], output=yattr, - T=300, P=101325, points=point_matrix, model=mod, mode='numpy') - response_data = predicted_quantities[yattr].values.flatten() - if not bar_chart: - extra_kwargs = {} - if len(response_data) < 10: - extra_kwargs['markersize'] = 20 - extra_kwargs['marker'] = '.' - extra_kwargs['linestyle'] = 'none' - extra_kwargs['clip_on'] = False - ax.plot(np.linspace(0, 1, num=100), response_data, label='This work', color='k', **extra_kwargs) - ax.set_xlim((0, 1)) - ax.set_xlabel(str(':'.join(endpoints[0])) + ' to ' + str(':'.join(endpoints[1]))) - ax.set_ylabel(plot_mapping.get(y, y)) - else: - bar_labels.append('This work') - bar_data.append(response_data[0]) - else: - raise NotImplementedError('No support for plotting configuration {}'.format(configuration)) - - bib_reference_keys = sorted({entry.get('reference', '') for entry in desired_data}) - symbol_map = bib_marker_map(bib_reference_keys) - - for data in desired_data: - indep_var_data = None - response_data = np.zeros_like(data['values'], dtype=np.float64) - if x == 'T' or x == 'P': - indep_var_data = np.array(data['conditions'][x], dtype=np.float64).flatten() - elif x == 'Z': - if disordered_config: - # Take the second element of the first interacting sublattice as the coordinate - # Because it's disordered all sublattices should be equivalent - # TODO: Fix this to filter because we need to guarantee the plot points are disordered - occ = data['solver']['sublattice_occupancies'] - subl_idx = np.nonzero([isinstance(c, (list, tuple)) for c in occ[0]])[0] - if len(subl_idx) > 1: - subl_idx = int(subl_idx[0]) - else: - subl_idx = int(subl_idx) - indep_var_data = [c[subl_idx][1] for c in occ] - else: - interactions = np.array([cond_dict[Symbol('YS')] for cond_dict in sample_condition_dicts]) - indep_var_data = 1 - (interactions+1)/2 - if y.endswith('_MIX') and data['output'].endswith('_FORM'): - # All the _FORM data we have still has the lattice stability contribution - # Need to zero it out to shift formation data to mixing - mod_latticeonly = Model(dbf, comps, phase_name, parameters={'GHSER'+c.upper(): 0 for c in comps}) - mod_latticeonly.models = {key: value for key, value in mod_latticeonly.models.items() - if key == 'ref'} - temps = data['conditions'].get('T', 300) - pressures = data['conditions'].get('P', 101325) - points = build_sitefractions(phase_name, data['solver']['sublattice_configurations'], - data['solver']['sublattice_occupancies']) - for point_idx in range(len(points)): - missing_variables = mod_latticeonly.ast.atoms(v.SiteFraction) - set(points[point_idx].keys()) - # Set unoccupied values to zero - points[point_idx].update({key: 0 for key in missing_variables}) - # Change entry to a sorted array of site fractions - points[point_idx] = list(OrderedDict(sorted(points[point_idx].items(), key=str)).values()) - points = np.array(points, dtype=np.float64) - # TODO: Real temperature support - points = points[None, None] - stability = calculate(dbf, comps, [phase_name], output=data['output'][:-5], - T=temps, P=pressures, points=points, - model=mod_latticeonly, mode='numpy') - response_data -= stability[data['output'][:-5]].values.squeeze() - - response_data += np.array(data['values'], dtype=np.float64) - response_data = response_data.flatten() - if not bar_chart: - extra_kwargs = {} - extra_kwargs['markersize'] = 8 - extra_kwargs['linestyle'] = 'none' - extra_kwargs['clip_on'] = False - ref = data.get('reference', '') - mark = symbol_map[ref]['markers'] - ax.plot(indep_var_data, response_data, - label=symbol_map[ref]['formatted'], - marker=mark['marker'], - fillstyle=mark['fillstyle'], - **extra_kwargs) - else: - bar_labels.append(data.get('reference', None)) - bar_data.append(response_data[0]) - if bar_chart: - ax.barh(0.02 * np.arange(len(bar_data)), bar_data, - color='k', height=0.01) - endmember_title = ' to '.join([':'.join(i) for i in endpoints]) - ax.get_figure().suptitle('{} (T = {} K)'.format(endmember_title, temperatures), fontsize=20) - ax.set_yticks(0.02 * np.arange(len(bar_data))) - ax.set_yticklabels(bar_labels, fontsize=20) - # This bar chart is rotated 90 degrees, so "y" is now x - ax.set_xlabel(plot_mapping.get(y, y)) - else: - ax.set_frame_on(False) - leg = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend outside - leg.get_frame().set_edgecolor('black') - return ax - - def _translate_endmember_to_array(endmember, variables): site_fractions = sorted(variables, key=str) frac_array = np.zeros(len(site_fractions)) From bd9c18b44f2772302f9dff78b0be6548c9e10a40 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Thu, 8 Aug 2024 15:11:40 -0700 Subject: [PATCH 24/24] Remove deprecated "None" scheduler string --- espei/espei_script.py | 6 ------ espei/input-schema.yaml | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/espei/espei_script.py b/espei/espei_script.py index b09dab57..285bc606 100644 --- a/espei/espei_script.py +++ b/espei/espei_script.py @@ -115,12 +115,6 @@ def get_run_settings(input_dict): if run_settings['mcmc'].get('restart_trace') is None: run_settings['mcmc']['chains_per_parameter'] = run_settings['mcmc'].get('chains_per_parameter', 2) run_settings['mcmc']['chain_std_deviation'] = run_settings['mcmc'].get('chain_std_deviation', 0.1) - if run_settings['mcmc']['scheduler'] == 'None': - warnings.warn( - "Setting scheduler to the string 'None' will be deprecated in ESPEI " - "0.9. Use `null` in YAML or `None` in Python.", FutureWarning - ) - run_settings['mcmc']['scheduler'] = None if not schema.validate(run_settings): raise ValueError(schema.errors) if run_settings.get("generate_parameters") is not None: diff --git a/espei/input-schema.yaml b/espei/input-schema.yaml index d2fa337d..83ee4f2d 100644 --- a/espei/input-schema.yaml +++ b/espei/input-schema.yaml @@ -95,8 +95,8 @@ mcmc: required: True scheduler: # scheduler to use for parallelization type: string - default: dask # dask | None | A JSON file - regex: 'dask|None|.*\.json$' + default: dask # dask | A JSON file + regex: 'dask|.*\.json$' required: True nullable: True cores: