Skip to content

Commit

Permalink
MAINT: Migrate to PyCalphad Workspace (#256)
Browse files Browse the repository at this point in the history
- Add support for breaking changes in the upcoming PyCalphad 0.11 release:
    - Migrate almost all residual function implementations to PyCalphad `Workspace`. This allowed us to remove several `espei.shadow_functions` that are no longer necessary.
    - With `Workspace`, ESPEI no longer needs to use a heuristic and hack the solver to prevent adding composition sets for constrained equilibria in ZPF data because this functionality is replaced by `IsolatedPhase`. This should resolve several bugs where the heurisitic for choosing points was too narrow (not allowing any points to be selected) or too broad (with bad starting points selected that could give metastable equilibria).
    - Update data structures to reflect the change in PyCalphad to `PhaseRecordFactory` objects
    - Fix compatibilitiy with `unpack_components` -> `unpack_species`
    - Fix `ImmutableDenseMatrix` of symbolic coefficients in parameter selection now that PyCalphad `Symbol` objects define a `__getitem__` method. See symengine/symengine.py#485 for more details.
    - Fix need to pad `points` matricies in `plot_endmember` and `plot_interaction` (`_compare_data_to_parameters` is getting removed as deprecated in a future PR)
- Drop Python 3.8 support (NEP-29)
- Remove dead code and dead imports
- Add timings to individual likelihood calls in trace output
  • Loading branch information
bocklund authored Aug 9, 2024
1 parent 181cc80 commit 227eac8
Show file tree
Hide file tree
Showing 15 changed files with 306 additions and 329 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
fail-fast: false
max-parallel: 100
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -52,7 +52,7 @@ jobs:
fail-fast: false
max-parallel: 100
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
with:
Expand Down
28 changes: 13 additions & 15 deletions espei/error_functions/activity_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
import numpy as np
import numpy.typing as npt
import tinydb
from pycalphad import Database, equilibrium, variables as v
from pycalphad import Database, variables as v
from pycalphad.plot.eqplot import _map_coord_to_variable
from pycalphad.core.utils import filter_phases, unpack_components
from pycalphad.core.utils import filter_phases, unpack_species
from scipy.stats import norm
from pycalphad import Workspace

from espei.core_utils import ravel_conditions
from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
Expand All @@ -28,7 +29,7 @@
_log = logging.getLogger(__name__)


def target_chempots_from_activity(component, target_activity, temperatures, reference_result):
def target_chempots_from_activity(component, target_activity, temperatures, wks_ref):
"""
Return an array of experimental chemical potentials for the component
Expand All @@ -50,8 +51,9 @@ def target_chempots_from_activity(component, target_activity, temperatures, refe
"""
# acr_i = exp((mu_i - mu_i^{ref})/(RT))
# so mu_i = R*T*ln(acr_i) + mu_i^{ref}
ref_chempot = reference_result["MU"].sel(component=component).values.flatten()
return v.R * temperatures * np.log(target_activity) + ref_chempot
ref_chempot = wks_ref.get(v.MU(component))
exp_chem_pots = v.R * temperatures * np.log(target_activity) + ref_chempot
return exp_chem_pots


# TODO: roll this function into ActivityResidual
Expand Down Expand Up @@ -86,11 +88,9 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
# data_comps and data_phases ensures that we only do calculations on
# the subsystem of the system defining the data.
data_comps = ds['components']
data_phases = filter_phases(dbf, unpack_components(dbf, data_comps), candidate_phases=phases)
data_phases = filter_phases(dbf, unpack_species(dbf, data_comps), candidate_phases=phases)
ref_conditions = {_map_coord_to_variable(coord): val for coord, val in ref['conditions'].items()}
ref_result = equilibrium(dbf, data_comps, ref['phases'], ref_conditions,
model=phase_models, parameters=parameters,
callables=callables)
wks_ref = Workspace(database=dbf, components=data_comps, phases=ref['phases'], conditions=ref_conditions, parameters=parameters)

# calculate current chemical potentials
# get the conditions
Expand All @@ -113,15 +113,13 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
# assume now that the ravelled conditions all have the same size
conditions_list = [{c: conditions[c][i] for c in conditions.keys()} for i in range(len(conditions[v.T]))]
for conds in conditions_list:
sample_eq_res = equilibrium(dbf, data_comps, data_phases, conds,
model=phase_models, parameters=parameters,
callables=callables)
dataset_computed_chempots.append(float(sample_eq_res.MU.sel(component=acr_component).values.flatten()[0]))
wks_sample = Workspace(database=dbf, components=data_comps, phases=data_phases, conditions=conds, parameters=parameters)
dataset_computed_chempots.append(wks_sample.get(v.MU(acr_component)))
dataset_weights.append(std_dev / data_weight / ds.get("weight", 1.0))

# calculate target chempots
dataset_activities = np.array(ds['values']).flatten()
dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], ref_result)
dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], wks_ref)
dataset_residuals = (np.asarray(dataset_computed_chempots) - np.asarray(dataset_target_chempots, dtype=float)).tolist()
_log.debug('Data: %s, chemical potential difference: %s, reference: %s', dataset_activities, dataset_residuals, ds["reference"])
residuals.extend(dataset_residuals)
Expand Down Expand Up @@ -203,7 +201,7 @@ def __init__(
else:
comps = sorted(database.elements)
model_dict = dict()
phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys()))
phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys()))
if symbols_to_fit is None:
symbols_to_fit = database_symbols_to_fit(database)
self._symbols_to_fit = symbols_to_fit
Expand Down
51 changes: 20 additions & 31 deletions espei/error_functions/equilibrium_thermochemical_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,14 @@
import tinydb
from tinydb import where
from scipy.stats import norm
from pycalphad.plot.eqplot import _map_coord_to_variable
from pycalphad import Database, Model, ReferenceState, variables as v
from pycalphad.core.equilibrium import _eqcalculate
from pycalphad.codegen.callables import build_phase_records
from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_components, unpack_condition
from pycalphad.core.phase_rec import PhaseRecord
from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition
from pycalphad.codegen.phase_record_factory import PhaseRecordFactory
from pycalphad import Workspace, as_property

from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
from espei.phase_models import PhaseModelSpecification
from espei.shadow_functions import equilibrium_, calculate_, no_op_equilibrium_, update_phase_record_parameters
from espei.shadow_functions import update_phase_record_parameters
from espei.typing import SymbolName
from espei.utils import PickleableTinyDB, database_symbols_to_fit

Expand All @@ -34,7 +32,7 @@
('composition_conds', Sequence[Dict[v.X, float]]),
('models', Dict[str, Model]),
('params_keys', Dict[str, float]),
('phase_records', Sequence[Dict[str, PhaseRecord]]),
('phase_record_factory', PhaseRecordFactory),
('output', str),
('samples', np.ndarray),
('weight', np.ndarray),
Expand Down Expand Up @@ -79,7 +77,7 @@ def build_eqpropdata(data: tinydb.database.Document,
params_keys, _ = extract_parameters(parameters)

data_comps = list(set(data['components']).union({'VA'}))
species = sorted(unpack_components(dbf, data_comps), key=str)
species = sorted(unpack_species(dbf, data_comps), key=str)
data_phases = filter_phases(dbf, species, candidate_phases=data['phases'])
models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters)
output = data['output']
Expand All @@ -88,6 +86,7 @@ def build_eqpropdata(data: tinydb.database.Document,
reference = data.get('reference', '')

# Models are now modified in response to the data from this data
# TODO: build a reference state MetaProperty with the reference state information, maybe just-in-time, below
if 'reference_states' in data:
property_output = output[:-1] if output.endswith('R') else output # unreferenced model property so we can tell shift_reference_state what to build.
reference_states = []
Expand All @@ -100,7 +99,7 @@ def build_eqpropdata(data: tinydb.database.Document,
pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')])
comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')])

phase_records = build_phase_records(dbf, species, data_phases, {**pot_conds, **comp_conds}, models, parameters=parameters, build_gradients=True, build_hessians=True)
phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds}, models, parameters=parameters)

# Now we need to unravel the composition conditions
# (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the
Expand All @@ -115,7 +114,7 @@ def build_eqpropdata(data: tinydb.database.Document,
dataset_weights = np.array(data.get('weight', 1.0)) * np.ones(total_num_calculations)
weights = (property_std_deviation.get(property_output, 1.0)/data_weight_dict.get(property_output, 1.0)/dataset_weights).flatten()

return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_records, output, samples, weights, reference)
return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_record_factory, output, samples, weights, reference)


def get_equilibrium_thermochemical_data(dbf: Database, comps: Sequence[str],
Expand Down Expand Up @@ -197,38 +196,28 @@ def calc_prop_differences(eqpropdata: EqPropData,
* weights for this dataset
"""
if approximate_equilibrium:
_equilibrium = no_op_equilibrium_
else:
_equilibrium = equilibrium_

dbf = eqpropdata.dbf
species = eqpropdata.species
phases = eqpropdata.phases
pot_conds = eqpropdata.potential_conds
models = eqpropdata.models
phase_records = eqpropdata.phase_records
update_phase_record_parameters(phase_records, parameters)
phase_record_factory = eqpropdata.phase_record_factory
update_phase_record_parameters(phase_record_factory, parameters)
params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters))
output = eqpropdata.output
output = as_property(eqpropdata.output)
weights = np.array(eqpropdata.weight, dtype=np.float64)
samples = np.array(eqpropdata.samples, dtype=np.float64)
wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_record_factory, parameters=params_dict)

calculated_data = []
for comp_conds in eqpropdata.composition_conds:
cond_dict = OrderedDict(**pot_conds, **comp_conds)
# str_statevar_dict must be sorted, assumes that pot_conds are.
str_statevar_dict = OrderedDict([(str(key), vals) for key, vals in pot_conds.items()])
grid = calculate_(species, phases, str_statevar_dict, models, phase_records, pdens=50, fake_points=True)
multi_eqdata = _equilibrium(phase_records, cond_dict, grid)
# TODO: could be kind of slow. Callables (which are cachable) must be built.
propdata = _eqcalculate(dbf, species, phases, cond_dict, output, data=multi_eqdata, per_phase=False, callables=None, parameters=params_dict, model=models)

if 'vertex' in propdata.data_vars[output][0]:
raise ValueError(f"Property {output} cannot be used to calculate equilibrium thermochemical error because each phase has a unique value for this property.")

vals = getattr(propdata, output).flatten().tolist()
calculated_data.extend(vals)
wks.conditions = cond_dict
wks.parameters = params_dict # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc.
wks.models = models
wks.phase_record_factory = phase_record_factory
vals = wks.get(output)
calculated_data.extend(np.atleast_1d(vals).tolist())

calculated_data = np.array(calculated_data, dtype=np.float64)

Expand Down Expand Up @@ -304,7 +293,7 @@ def __init__(
else:
comps = sorted(database.elements)
model_dict = dict()
phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys()))
phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys()))
if symbols_to_fit is None:
symbols_to_fit = database_symbols_to_fit(database)
# okay if parameters are initialized to zero, we only need the symbol names
Expand Down
Loading

0 comments on commit 227eac8

Please sign in to comment.