added output estimate coercion

elphick · Oct 24, 2024 · d949791 · d949791
1 parent e9029e6
commit d949791
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 1 deletion.
diff --git a/elphick/geomet/base.py b/elphick/geomet/base.py
@@ -480,6 +480,7 @@ def update_mass_data(self, value: pd.DataFrame):
                 self._supplementary_data.index = self._mass_data.index
             self._supplementary_data = self._supplementary_data.loc[value.index]
         self.aggregate = self.weight_average()
+        self.status = OutOfRangeStatus(self, self.status.ranges)
 
     def filter_by_index(self, index: pd.Index):
         """Update the data by index"""
@@ -586,6 +587,7 @@ def sub(self, other: MC, name: Optional[str] = None,
         other: 'Stream'
 
         # create the relationships
+        other.nodes = [self.nodes[1], other.nodes[1]]
         res.nodes = [self.nodes[1], random_int()]
 
         return res
@@ -738,6 +740,41 @@ def query(self, expr: str, name: Optional[str] = None) -> MC:
 
         return res
 
+    def compare(self, other: 'MassComposition', comparisons: Union[str, list[str]] = 'recovery',
+                explicit_names: bool = True) -> pd.DataFrame:
+
+        comparisons = [comparisons] if isinstance(comparisons, str) else comparisons
+        valid_comparisons: set = {'recovery', 'difference', 'divide', 'all'}
+
+        cols = [col for col in self.data.data_vars if col not in self.data.mc.mc_vars_attrs]
+
+        chunks: list[pd.DataFrame] = []
+        if 'recovery' in comparisons or comparisons == ['all']:
+            df: pd.DataFrame = self._mass_data[self.component_vars] / other._mass_data[self.component_vars]
+            if explicit_names:
+                df.columns = [f"{self.name}_{col}_{self.config['comparisons']['recovery']}_{other.name}" for col in
+                              df.columns]
+            chunks.append(df)
+        if 'difference' in comparisons or comparisons == ['all']:
+            df: pd.DataFrame = self.data[cols] - other.data[cols]
+            if explicit_names:
+                df.columns = [f"{self.name}_{col}_{self.config['comparisons']['difference']}_{other.name}" for col in
+                              df.columns]
+            chunks.append(df)
+        if 'divide' in comparisons or comparisons == ['all']:
+            df: pd.DataFrame = self.data[cols] / other.data[cols]
+            if explicit_names:
+                df.columns = [f"{self.name}_{col}_{self.config['comparisons']['divide']}_{other.name}" for col in
+                              df.columns]
+            chunks.append(df)
+
+        if not chunks:
+            raise ValueError(f"The comparison argument is not valid: {valid_comparisons}")
+
+        res: pd.DataFrame = pd.concat(chunks, axis=1)
+
+        return res
+
     def reset_index(self, index_name: str) -> MC:
         res = self.create_congruent_object(name=f"{self.name} (reset_index)", include_mc_data=True,
                                            include_supp_data=True)
@@ -768,7 +805,7 @@ def __init__(self, mc: 'MC', ranges: dict[str, list]):
             self.oor: pd.DataFrame = self._check_range()
             self.num_oor: int = len(self.oor)
             self.failing_components: Optional[list[str]] = list(
-                self.oor.dropna(axis=1).columns) if self.num_oor > 0 else None
+                self.oor.dropna(axis=1, how='all').columns) if self.num_oor > 0 else None
 
     def get_ranges(self, ranges: dict[str, list]) -> dict[str, list]:
 

diff --git a/elphick/geomet/utils/estimates.py b/elphick/geomet/utils/estimates.py
@@ -0,0 +1,70 @@
+import pandas as pd
+
+from elphick.geomet.base import MassComposition
+from elphick.geomet.flowsheet import Flowsheet
+from elphick.geomet.utils.pandas import composition_to_mass
+
+
+def coerce_output_estimates(estimate_stream: MassComposition, input_stream: MassComposition,
+                            recovery_bounds: tuple[float, float] = (0.01, 0.99)) -> MassComposition:
+    """Coerce output estimates within recovery and the component range.
+
+    Estimates contain error and at times can exceed the specified component range, or can consume more component
+    mass than is available in the feed.  This function:solves the balance assuming one output complement stream,
+    1. limits the estimate to within the recovery bounds,
+    2. ensures the estimate is within the component range,
+    3. solves the complement, and ensures it is in range,
+    4. if the complement is out of range, it is adjusted and the estimate adjusted to maintain the balance.
+
+    Args:
+        estimate_stream: The estimated object, which is a node output
+        input_stream: The input object, which is a node input
+        recovery_bounds: The bounds for the recovery, default is 0.01 to 0.99
+
+    Returns:
+        The coerced estimate stream
+    """
+
+    if input_stream.status.ok is False:
+        raise ValueError('Input stream is not OK')
+
+    # calculate the recovery
+    cols: list[str] = [estimate_stream.mass_dry_var] + estimate_stream.composition_columns
+    recovery: pd.DataFrame = estimate_stream.mass_data[cols] / input_stream.mass_data[cols]
+
+    # limit the recovery to the bounds
+    recovery = recovery.clip(lower=recovery_bounds[0], upper=recovery_bounds[1])
+
+    # recalculate the estimate from the bound recovery
+    new_mass: pd.DataFrame = recovery * input_stream.mass_data[cols]
+    estimate_stream.update_mass_data(new_mass)
+
+    if estimate_stream.status.ok is False:
+        raise ValueError('Estimate stream is not OK - it should be after bounding recovery')
+
+    # solve the complement
+    complement_stream: MassComposition = input_stream.sub(estimate_stream, name='complement')
+    if complement_stream.status.ok is False:
+
+        # adjust the complement to be within the component without creating a new method
+        new_complement_composition = complement_stream.data[complement_stream.composition_columns]
+        for comp, comp_range in complement_stream.status.ranges.items():
+            new_complement_composition[comp] = new_complement_composition[comp].clip(comp_range[0], comp_range[1])
+        new_component_mass: pd.DataFrame = composition_to_mass(new_complement_composition,
+                                                               mass_dry=complement_stream.mass_dry_var)
+        complement_stream.update_mass_data(new_component_mass)
+
+        # adjust the estimate to maintain the balance
+        estimate_stream = input_stream.sub(complement_stream, name=estimate_stream.name,
+                                           include_supplementary_data=True)
+
+        if estimate_stream.status.ok is False:
+            raise ValueError('Estimate stream is not OK after adjustment')
+
+    fs: Flowsheet = Flowsheet.from_objects([input_stream, estimate_stream, complement_stream])
+    if fs.balanced is False:
+        raise ValueError('Flowsheet is not balanced after adjustment')
+
+    # fs.plot_network().show()
+
+    return estimate_stream
diff --git a/tests/test_014_coerce_estimates.py b/tests/test_014_coerce_estimates.py
@@ -0,0 +1,29 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from elphick.geomet import Sample
+from elphick.geomet.utils.estimates import coerce_output_estimates
+from fixtures import sample_data as test_data
+
+
+def test_coerce_output_estimate(test_data):
+    data: pd.DataFrame = test_data
+
+    obj_input: Sample = Sample(data=data, name='feed', moisture_in_scope=False, mass_dry_var='mass_dry')
+
+    df_est: pd.DataFrame = data.copy()
+    df_est['mass_dry'] = df_est['mass_dry'] * 0.95
+    df_est['Fe'] = df_est['Fe'] * 1.3
+    df_est[['SiO2', 'Al2O3', 'LOI']] = df_est[['SiO2', 'Al2O3', 'LOI']] * 0.8
+    obj_est: Sample = Sample(data=df_est, name='estimate', moisture_in_scope=False, mass_dry_var='mass_dry')
+
+    expected: pd.DataFrame = pd.DataFrame.from_dict(
+        {'mass_dry': {0: 85.5, 1: 76.0, 2: 85.5}, 'Fe': {0: 59.4, 1: 61.4842105263158, 2: 63.56842105263157},
+         'SiO2': {0: 4.16, 1: 2.4800000000000004, 2: 1.7599999999999998},
+         'Al2O3': {0: 2.4, 1: 1.36, 2: 0.7200000000000002}, 'LOI': {0: 4.0, 1: 3.2000000000000006, 2: 2.4},
+         'group': {0: 'grp_1', 1: 'grp_1', 2: 'grp_2'}})
+    expected.index.name = 'index'
+    obj_coerced: Sample = coerce_output_estimates(estimate_stream=obj_est, input_stream=obj_input)
+
+    pd.testing.assert_frame_equal(obj_coerced.data, expected)