Skip to content

Commit

Permalink
added output estimate coercion
Browse files Browse the repository at this point in the history
  • Loading branch information
elphick committed Oct 24, 2024
1 parent e9029e6 commit d949791
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 1 deletion.
39 changes: 38 additions & 1 deletion elphick/geomet/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ def update_mass_data(self, value: pd.DataFrame):
self._supplementary_data.index = self._mass_data.index
self._supplementary_data = self._supplementary_data.loc[value.index]
self.aggregate = self.weight_average()
self.status = OutOfRangeStatus(self, self.status.ranges)

def filter_by_index(self, index: pd.Index):
"""Update the data by index"""
Expand Down Expand Up @@ -586,6 +587,7 @@ def sub(self, other: MC, name: Optional[str] = None,
other: 'Stream'

# create the relationships
other.nodes = [self.nodes[1], other.nodes[1]]
res.nodes = [self.nodes[1], random_int()]

return res
Expand Down Expand Up @@ -738,6 +740,41 @@ def query(self, expr: str, name: Optional[str] = None) -> MC:

return res

def compare(self, other: 'MassComposition', comparisons: Union[str, list[str]] = 'recovery',
explicit_names: bool = True) -> pd.DataFrame:

comparisons = [comparisons] if isinstance(comparisons, str) else comparisons
valid_comparisons: set = {'recovery', 'difference', 'divide', 'all'}

cols = [col for col in self.data.data_vars if col not in self.data.mc.mc_vars_attrs]

chunks: list[pd.DataFrame] = []
if 'recovery' in comparisons or comparisons == ['all']:
df: pd.DataFrame = self._mass_data[self.component_vars] / other._mass_data[self.component_vars]
if explicit_names:
df.columns = [f"{self.name}_{col}_{self.config['comparisons']['recovery']}_{other.name}" for col in
df.columns]
chunks.append(df)
if 'difference' in comparisons or comparisons == ['all']:
df: pd.DataFrame = self.data[cols] - other.data[cols]
if explicit_names:
df.columns = [f"{self.name}_{col}_{self.config['comparisons']['difference']}_{other.name}" for col in
df.columns]
chunks.append(df)
if 'divide' in comparisons or comparisons == ['all']:
df: pd.DataFrame = self.data[cols] / other.data[cols]
if explicit_names:
df.columns = [f"{self.name}_{col}_{self.config['comparisons']['divide']}_{other.name}" for col in
df.columns]
chunks.append(df)

if not chunks:
raise ValueError(f"The comparison argument is not valid: {valid_comparisons}")

res: pd.DataFrame = pd.concat(chunks, axis=1)

return res

def reset_index(self, index_name: str) -> MC:
res = self.create_congruent_object(name=f"{self.name} (reset_index)", include_mc_data=True,
include_supp_data=True)
Expand Down Expand Up @@ -768,7 +805,7 @@ def __init__(self, mc: 'MC', ranges: dict[str, list]):
self.oor: pd.DataFrame = self._check_range()
self.num_oor: int = len(self.oor)
self.failing_components: Optional[list[str]] = list(
self.oor.dropna(axis=1).columns) if self.num_oor > 0 else None
self.oor.dropna(axis=1, how='all').columns) if self.num_oor > 0 else None

def get_ranges(self, ranges: dict[str, list]) -> dict[str, list]:

Expand Down
70 changes: 70 additions & 0 deletions elphick/geomet/utils/estimates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pandas as pd

from elphick.geomet.base import MassComposition
from elphick.geomet.flowsheet import Flowsheet
from elphick.geomet.utils.pandas import composition_to_mass


def coerce_output_estimates(estimate_stream: MassComposition, input_stream: MassComposition,
recovery_bounds: tuple[float, float] = (0.01, 0.99)) -> MassComposition:
"""Coerce output estimates within recovery and the component range.
Estimates contain error and at times can exceed the specified component range, or can consume more component
mass than is available in the feed. This function:solves the balance assuming one output complement stream,
1. limits the estimate to within the recovery bounds,
2. ensures the estimate is within the component range,
3. solves the complement, and ensures it is in range,
4. if the complement is out of range, it is adjusted and the estimate adjusted to maintain the balance.
Args:
estimate_stream: The estimated object, which is a node output
input_stream: The input object, which is a node input
recovery_bounds: The bounds for the recovery, default is 0.01 to 0.99
Returns:
The coerced estimate stream
"""

if input_stream.status.ok is False:
raise ValueError('Input stream is not OK')

# calculate the recovery
cols: list[str] = [estimate_stream.mass_dry_var] + estimate_stream.composition_columns
recovery: pd.DataFrame = estimate_stream.mass_data[cols] / input_stream.mass_data[cols]

# limit the recovery to the bounds
recovery = recovery.clip(lower=recovery_bounds[0], upper=recovery_bounds[1])

# recalculate the estimate from the bound recovery
new_mass: pd.DataFrame = recovery * input_stream.mass_data[cols]
estimate_stream.update_mass_data(new_mass)

if estimate_stream.status.ok is False:
raise ValueError('Estimate stream is not OK - it should be after bounding recovery')

# solve the complement
complement_stream: MassComposition = input_stream.sub(estimate_stream, name='complement')
if complement_stream.status.ok is False:

# adjust the complement to be within the component without creating a new method
new_complement_composition = complement_stream.data[complement_stream.composition_columns]
for comp, comp_range in complement_stream.status.ranges.items():
new_complement_composition[comp] = new_complement_composition[comp].clip(comp_range[0], comp_range[1])
new_component_mass: pd.DataFrame = composition_to_mass(new_complement_composition,
mass_dry=complement_stream.mass_dry_var)
complement_stream.update_mass_data(new_component_mass)

# adjust the estimate to maintain the balance
estimate_stream = input_stream.sub(complement_stream, name=estimate_stream.name,
include_supplementary_data=True)

if estimate_stream.status.ok is False:
raise ValueError('Estimate stream is not OK after adjustment')

fs: Flowsheet = Flowsheet.from_objects([input_stream, estimate_stream, complement_stream])
if fs.balanced is False:
raise ValueError('Flowsheet is not balanced after adjustment')

# fs.plot_network().show()

return estimate_stream
29 changes: 29 additions & 0 deletions tests/test_014_coerce_estimates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
import pandas as pd
import pytest

from elphick.geomet import Sample
from elphick.geomet.utils.estimates import coerce_output_estimates
from fixtures import sample_data as test_data


def test_coerce_output_estimate(test_data):
data: pd.DataFrame = test_data

obj_input: Sample = Sample(data=data, name='feed', moisture_in_scope=False, mass_dry_var='mass_dry')

df_est: pd.DataFrame = data.copy()
df_est['mass_dry'] = df_est['mass_dry'] * 0.95
df_est['Fe'] = df_est['Fe'] * 1.3
df_est[['SiO2', 'Al2O3', 'LOI']] = df_est[['SiO2', 'Al2O3', 'LOI']] * 0.8
obj_est: Sample = Sample(data=df_est, name='estimate', moisture_in_scope=False, mass_dry_var='mass_dry')

expected: pd.DataFrame = pd.DataFrame.from_dict(
{'mass_dry': {0: 85.5, 1: 76.0, 2: 85.5}, 'Fe': {0: 59.4, 1: 61.4842105263158, 2: 63.56842105263157},
'SiO2': {0: 4.16, 1: 2.4800000000000004, 2: 1.7599999999999998},
'Al2O3': {0: 2.4, 1: 1.36, 2: 0.7200000000000002}, 'LOI': {0: 4.0, 1: 3.2000000000000006, 2: 2.4},
'group': {0: 'grp_1', 1: 'grp_1', 2: 'grp_2'}})
expected.index.name = 'index'
obj_coerced: Sample = coerce_output_estimates(estimate_stream=obj_est, input_stream=obj_input)

pd.testing.assert_frame_equal(obj_coerced.data, expected)

0 comments on commit d949791

Please sign in to comment.