MAINT: Migrate to PyCalphad Workspace (#256)

- Add support for breaking changes in the upcoming PyCalphad 0.11 release: - Migrate almost all residual function implementations to PyCalphad `Workspace`. This allowed us to remove several `espei.shadow_functions` that are no longer necessary. - With `Workspace`, ESPEI no longer needs to use a heuristic and hack the solver to prevent adding composition sets for constrained equilibria in ZPF data because this functionality is replaced by `IsolatedPhase`. This should resolve several bugs where the heurisitic for choosing points was too narrow (not allowing any points to be selected) or too broad (with bad starting points selected that could give metastable equilibria). - Update data structures to reflect the change in PyCalphad to `PhaseRecordFactory` objects - Fix compatibilitiy with `unpack_components` -> `unpack_species` - Fix `ImmutableDenseMatrix` of symbolic coefficients in parameter selection now that PyCalphad `Symbol` objects define a `__getitem__` method. See symengine/symengine.py#485 for more details. - Fix need to pad `points` matricies in `plot_endmember` and `plot_interaction` (`_compare_data_to_parameters` is getting removed as deprecated in a future PR) - Drop Python 3.8 support (NEP-29) - Remove dead code and dead imports - Add timings to individual likelihood calls in trace output
PhasesResearchLab · Aug 9, 2024 · 227eac8 · 227eac8
1 parent 181cc80
commit 227eac8
Show file tree

Hide file tree

Showing 15 changed files with 306 additions and 329 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -33,7 +33,7 @@ jobs:
       fail-fast: false
       max-parallel: 100
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
     - uses: actions/checkout@v4
       with:
@@ -52,7 +52,7 @@ jobs:
       fail-fast: false
       max-parallel: 100
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
     - uses: actions/checkout@v4
       with:

diff --git a/espei/error_functions/activity_error.py b/espei/error_functions/activity_error.py
@@ -14,10 +14,11 @@
 import numpy as np
 import numpy.typing as npt
 import tinydb
-from pycalphad import Database, equilibrium, variables as v
+from pycalphad import Database, variables as v
 from pycalphad.plot.eqplot import _map_coord_to_variable
-from pycalphad.core.utils import filter_phases, unpack_components
+from pycalphad.core.utils import filter_phases, unpack_species
 from scipy.stats import norm
+from pycalphad import Workspace
 
 from espei.core_utils import ravel_conditions
 from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
@@ -28,7 +29,7 @@
 _log = logging.getLogger(__name__)
 
 
-def target_chempots_from_activity(component, target_activity, temperatures, reference_result):
+def target_chempots_from_activity(component, target_activity, temperatures, wks_ref):
     """
     Return an array of experimental chemical potentials for the component
 
@@ -50,8 +51,9 @@ def target_chempots_from_activity(component, target_activity, temperatures, refe
     """
     # acr_i = exp((mu_i - mu_i^{ref})/(RT))
     # so mu_i = R*T*ln(acr_i) + mu_i^{ref}
-    ref_chempot = reference_result["MU"].sel(component=component).values.flatten()
-    return v.R * temperatures * np.log(target_activity) + ref_chempot
+    ref_chempot = wks_ref.get(v.MU(component))
+    exp_chem_pots = v.R * temperatures * np.log(target_activity) + ref_chempot
+    return exp_chem_pots
 
 
 # TODO: roll this function into ActivityResidual
@@ -86,11 +88,9 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
         # data_comps and data_phases ensures that we only do calculations on
         # the subsystem of the system defining the data.
         data_comps = ds['components']
-        data_phases = filter_phases(dbf, unpack_components(dbf, data_comps), candidate_phases=phases)
+        data_phases = filter_phases(dbf, unpack_species(dbf, data_comps), candidate_phases=phases)
         ref_conditions = {_map_coord_to_variable(coord): val for coord, val in ref['conditions'].items()}
-        ref_result = equilibrium(dbf, data_comps, ref['phases'], ref_conditions,
-                                 model=phase_models, parameters=parameters,
-                                 callables=callables)
+        wks_ref = Workspace(database=dbf, components=data_comps, phases=ref['phases'], conditions=ref_conditions, parameters=parameters)
 
         # calculate current chemical potentials
         # get the conditions
@@ -113,15 +113,13 @@ def calculate_activity_residuals(dbf, comps, phases, datasets, parameters=None,
         # assume now that the ravelled conditions all have the same size
         conditions_list = [{c: conditions[c][i] for c in conditions.keys()} for i in range(len(conditions[v.T]))]
         for conds in conditions_list:
-            sample_eq_res = equilibrium(dbf, data_comps, data_phases, conds,
-                                        model=phase_models, parameters=parameters,
-                                        callables=callables)
-            dataset_computed_chempots.append(float(sample_eq_res.MU.sel(component=acr_component).values.flatten()[0]))
+            wks_sample = Workspace(database=dbf, components=data_comps, phases=data_phases, conditions=conds, parameters=parameters)
+            dataset_computed_chempots.append(wks_sample.get(v.MU(acr_component)))
             dataset_weights.append(std_dev / data_weight / ds.get("weight", 1.0))
 
         # calculate target chempots
         dataset_activities = np.array(ds['values']).flatten()
-        dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], ref_result)
+        dataset_target_chempots = target_chempots_from_activity(acr_component, dataset_activities, conditions[v.T], wks_ref)
         dataset_residuals = (np.asarray(dataset_computed_chempots) - np.asarray(dataset_target_chempots, dtype=float)).tolist()
         _log.debug('Data: %s, chemical potential difference: %s, reference: %s', dataset_activities, dataset_residuals, ds["reference"])
         residuals.extend(dataset_residuals)
@@ -203,7 +201,7 @@ def __init__(
         else:
             comps = sorted(database.elements)
             model_dict = dict()
-        phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys()))
+        phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys()))
         if symbols_to_fit is None:
             symbols_to_fit = database_symbols_to_fit(database)
         self._symbols_to_fit = symbols_to_fit

diff --git a/espei/error_functions/equilibrium_thermochemical_error.py b/espei/error_functions/equilibrium_thermochemical_error.py
@@ -11,16 +11,14 @@
 import tinydb
 from tinydb import where
 from scipy.stats import norm
-from pycalphad.plot.eqplot import _map_coord_to_variable
 from pycalphad import Database, Model, ReferenceState, variables as v
-from pycalphad.core.equilibrium import _eqcalculate
-from pycalphad.codegen.callables import build_phase_records
-from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_components, unpack_condition
-from pycalphad.core.phase_rec import PhaseRecord
+from pycalphad.core.utils import instantiate_models, filter_phases, extract_parameters, unpack_species, unpack_condition
+from pycalphad.codegen.phase_record_factory import PhaseRecordFactory
+from pycalphad import Workspace, as_property
 
 from espei.error_functions.residual_base import ResidualFunction, residual_function_registry
 from espei.phase_models import PhaseModelSpecification
-from espei.shadow_functions import equilibrium_, calculate_, no_op_equilibrium_, update_phase_record_parameters
+from espei.shadow_functions import update_phase_record_parameters
 from espei.typing import SymbolName
 from espei.utils import PickleableTinyDB, database_symbols_to_fit
 
@@ -34,7 +32,7 @@
                                        ('composition_conds', Sequence[Dict[v.X, float]]),
                                        ('models', Dict[str, Model]),
                                        ('params_keys', Dict[str, float]),
-                                       ('phase_records', Sequence[Dict[str, PhaseRecord]]),
+                                       ('phase_record_factory', PhaseRecordFactory),
                                        ('output', str),
                                        ('samples', np.ndarray),
                                        ('weight', np.ndarray),
@@ -79,7 +77,7 @@ def build_eqpropdata(data: tinydb.database.Document,
     params_keys, _ = extract_parameters(parameters)
 
     data_comps = list(set(data['components']).union({'VA'}))
-    species = sorted(unpack_components(dbf, data_comps), key=str)
+    species = sorted(unpack_species(dbf, data_comps), key=str)
     data_phases = filter_phases(dbf, species, candidate_phases=data['phases'])
     models = instantiate_models(dbf, species, data_phases, model=model, parameters=parameters)
     output = data['output']
@@ -88,6 +86,7 @@ def build_eqpropdata(data: tinydb.database.Document,
     reference = data.get('reference', '')
 
     # Models are now modified in response to the data from this data
+    # TODO: build a reference state MetaProperty with the reference state information, maybe just-in-time, below
     if 'reference_states' in data:
         property_output = output[:-1] if output.endswith('R') else output  # unreferenced model property so we can tell shift_reference_state what to build.
         reference_states = []
@@ -100,7 +99,7 @@ def build_eqpropdata(data: tinydb.database.Document,
     pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')])
     comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')])
 
-    phase_records = build_phase_records(dbf, species, data_phases, {**pot_conds, **comp_conds}, models, parameters=parameters, build_gradients=True, build_hessians=True)
+    phase_record_factory = PhaseRecordFactory(dbf, species, {**pot_conds, **comp_conds}, models, parameters=parameters)
 
     # Now we need to unravel the composition conditions
     # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the
@@ -115,7 +114,7 @@ def build_eqpropdata(data: tinydb.database.Document,
     dataset_weights = np.array(data.get('weight', 1.0)) * np.ones(total_num_calculations)
     weights = (property_std_deviation.get(property_output, 1.0)/data_weight_dict.get(property_output, 1.0)/dataset_weights).flatten()
 
-    return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_records, output, samples, weights, reference)
+    return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_record_factory, output, samples, weights, reference)
 
 
 def get_equilibrium_thermochemical_data(dbf: Database, comps: Sequence[str],
@@ -197,38 +196,28 @@ def calc_prop_differences(eqpropdata: EqPropData,
         * weights for this dataset
 
     """
-    if approximate_equilibrium:
-        _equilibrium = no_op_equilibrium_
-    else:
-        _equilibrium = equilibrium_
-
     dbf = eqpropdata.dbf
     species = eqpropdata.species
     phases = eqpropdata.phases
     pot_conds = eqpropdata.potential_conds
     models = eqpropdata.models
-    phase_records = eqpropdata.phase_records
-    update_phase_record_parameters(phase_records, parameters)
+    phase_record_factory = eqpropdata.phase_record_factory
+    update_phase_record_parameters(phase_record_factory, parameters)
     params_dict = OrderedDict(zip(map(str, eqpropdata.params_keys), parameters))
-    output = eqpropdata.output
+    output = as_property(eqpropdata.output)
     weights = np.array(eqpropdata.weight, dtype=np.float64)
     samples = np.array(eqpropdata.samples, dtype=np.float64)
+    wks = Workspace(database=dbf, components=species, phases=phases, models=models, phase_record_factory=phase_record_factory, parameters=params_dict)
 
     calculated_data = []
     for comp_conds in eqpropdata.composition_conds:
         cond_dict = OrderedDict(**pot_conds, **comp_conds)
-        # str_statevar_dict must be sorted, assumes that pot_conds are.
-        str_statevar_dict = OrderedDict([(str(key), vals) for key, vals in pot_conds.items()])
-        grid = calculate_(species, phases, str_statevar_dict, models, phase_records, pdens=50, fake_points=True)
-        multi_eqdata = _equilibrium(phase_records, cond_dict, grid)
-        # TODO: could be kind of slow. Callables (which are cachable) must be built.
-        propdata = _eqcalculate(dbf, species, phases, cond_dict, output, data=multi_eqdata, per_phase=False, callables=None, parameters=params_dict, model=models)
-
-        if 'vertex' in propdata.data_vars[output][0]:
-            raise ValueError(f"Property {output} cannot be used to calculate equilibrium thermochemical error because each phase has a unique value for this property.")
-
-        vals = getattr(propdata, output).flatten().tolist()
-        calculated_data.extend(vals)
+        wks.conditions = cond_dict
+        wks.parameters = params_dict  # these reset models and phase_record_factory through depends_on -> lose Model.shift_reference_state, etc.
+        wks.models = models
+        wks.phase_record_factory = phase_record_factory
+        vals = wks.get(output)
+        calculated_data.extend(np.atleast_1d(vals).tolist())
 
     calculated_data = np.array(calculated_data, dtype=np.float64)
 
@@ -304,7 +293,7 @@ def __init__(
         else:
             comps = sorted(database.elements)
             model_dict = dict()
-        phases = sorted(filter_phases(database, unpack_components(database, comps), database.phases.keys()))
+        phases = sorted(filter_phases(database, unpack_species(database, comps), database.phases.keys()))
         if symbols_to_fit is None:
             symbols_to_fit = database_symbols_to_fit(database)
         # okay if parameters are initialized to zero, we only need the symbol names