Skip to content

Commit eb0c68b

Browse files
committed
closes #147, Added support for splitting with function and sklearn estimator. BREAKING CHANGE: renamed apply_partition to split_by_partition for method name consistency.
1 parent 9dc617d commit eb0c68b

15 files changed

+335
-38
lines changed

CHANGELOG.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
Mass_Composition 0.6.0 (2024-05-16)
2+
===================================
3+
4+
Feature
5+
-------
6+
7+
- Added support for splitting with function and sklearn estimator.
8+
- BREAKING CHANGE: renamed apply_partition to split_by_partition for method name consistency. (#147)
9+
10+
111
Mass_Composition 0.5.2 (2024-05-16)
212
===================================
313

elphick/mass_composition/datasets/sample_data.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def size_by_assay_2() -> pd.DataFrame:
115115
"""
116116
mc_size: MassComposition = MassComposition(size_by_assay(), name='feed')
117117
partition = partial(napier_munn, d50=0.150, ep=0.1, dim='size')
118-
mc_coarse, mc_fine = mc_size.apply_partition(definition=partition, name_1='coarse', name_2='fine')
118+
mc_coarse, mc_fine = mc_size.split_by_partition(partition_definition=partition, name_1='coarse', name_2='fine')
119119
fs: Flowsheet = Flowsheet().from_streams([mc_size, mc_coarse, mc_fine])
120120
return fs.to_dataframe()
121121

@@ -125,7 +125,7 @@ def size_by_assay_3() -> pd.DataFrame:
125125
"""
126126
mc_size: MassComposition = MassComposition(size_by_assay(), name='feed')
127127
partition = partial(napier_munn, d50=0.150, ep=0.1, dim='size')
128-
mc_coarse, mc_fine = mc_size.apply_partition(definition=partition, name_1='coarse', name_2='fine')
128+
mc_coarse, mc_fine = mc_size.split_by_partition(partition_definition=partition, name_1='coarse', name_2='fine')
129129
# add error to the coarse stream to create an imbalance
130130
df_coarse_2 = mc_coarse.data.to_dataframe().apply(lambda x: np.random.normal(loc=x, scale=np.std(x)))
131131
mc_coarse_2: MassComposition = MassComposition(data=df_coarse_2, name='coarse')
@@ -156,7 +156,7 @@ def iron_ore_met_sample_data() -> pd.DataFrame:
156156
def demo_size_network() -> Flowsheet:
157157
mc_size: MassComposition = MassComposition(size_by_assay(), name='size sample')
158158
partition = partial(perfect, d50=0.150, dim='size')
159-
mc_coarse, mc_fine = mc_size.apply_partition(definition=partition)
159+
mc_coarse, mc_fine = mc_size.split_by_partition(partition_definition=partition)
160160
mc_coarse.name = 'coarse'
161161
mc_fine.name = 'fine'
162162
fs: Flowsheet = Flowsheet().from_streams([mc_size, mc_coarse, mc_fine])

elphick/mass_composition/mass_composition.py

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,8 @@ def split(self,
495495
496496
A simple mass split maintaining the same composition
497497
498+
See also: split_by_partition, split_by_function, split_by_estimator
499+
498500
Args:
499501
fraction: A constant in the range [0.0, 1.0]
500502
name_1: The name of the reference stream created by the split
@@ -515,18 +517,18 @@ def split(self,
515517

516518
return out, comp
517519

518-
def apply_partition(self,
519-
definition: Callable,
520-
name_1: Optional[str] = None,
521-
name_2: Optional[str] = None) -> Tuple['MassComposition', 'MassComposition']:
520+
def split_by_partition(self,
521+
partition_definition: Callable,
522+
name_1: Optional[str] = None,
523+
name_2: Optional[str] = None) -> Tuple['MassComposition', 'MassComposition']:
522524
"""Partition the object along a given dimension.
523525
524526
This method applies the defined separation resulting in two new objects.
525527
526-
See also: split
528+
See also: split, split_by_function, split_by_estimator
527529
528530
Args:
529-
definition: A partition function that defines the efficiency of separation along a dimension
531+
partition_definition: A partition function that defines the efficiency of separation along a dimension
530532
name_1: The name of the reference stream created by the split
531533
name_2: The name of the complement stream created by the split
532534
@@ -536,7 +538,7 @@ def apply_partition(self,
536538
out = deepcopy(self)
537539
comp = deepcopy(self)
538540

539-
xr_ds_1, xr_ds_2 = self._data.mc.apply_partition(definition=definition)
541+
xr_ds_1, xr_ds_2 = self._data.mc.split_by_partition(partition_definition=partition_definition)
540542

541543
out._data = xr_ds_1
542544
comp._data = xr_ds_2
@@ -545,6 +547,67 @@ def apply_partition(self,
545547

546548
return out, comp
547549

550+
def split_by_function(self,
551+
split_function: Callable,
552+
name_1: Optional[str] = None,
553+
name_2: Optional[str] = None) -> Tuple['MassComposition', 'MassComposition']:
554+
"""Split an object using a function.
555+
556+
This method applies the function to self, resulting in two new objects. The object returned with name_1
557+
is the result of the function. The object returned with name_2 is the complement.
558+
559+
See also: split, split_by_estimator, split_by_partition
560+
561+
Args:
562+
split_function: Any function that transforms the dataframe from a MassComposition object into a new
563+
dataframe with values representing a new (output) stream. The returned dataframe structure must be
564+
identical to the input dataframe.
565+
name_1: The name of the stream created by the function
566+
name_2: The name of the complement stream created by the split, which is calculated automatically.
567+
568+
Returns:
569+
tuple of two datasets, the first with the mass fraction specified, the other the complement
570+
"""
571+
out_data: pd.DataFrame = split_function(self.data.to_dataframe())
572+
out: MassComposition = MassComposition(name=name_1, constraints=self.constraints, data=out_data)
573+
comp: MassComposition = self.sub(other=out, name=name_2)
574+
575+
self._post_process_split(out, comp, name_1, name_2)
576+
577+
return out, comp
578+
579+
def split_by_estimator(self,
580+
estimator: 'sklearn.base.BaseEstimator',
581+
name_1: Optional[str] = None,
582+
name_2: Optional[str] = None) -> Tuple['MassComposition', 'MassComposition']:
583+
"""Split an object using a sklearn estimator.
584+
585+
This method applies the function to self, resulting in two new objects. The object returned with name_1
586+
is the result of the estimator.predict() method. The object returned with name_2 is the complement.
587+
588+
See also: split, split_by_function, split_by_partition
589+
590+
Args:
591+
estimator: Any sklearn estimator that transforms the dataframe from a MassComposition object into a new
592+
dataframe with values representing a new (output) stream using the predict method. The returned
593+
dataframe structure must be identical to the input dataframe.
594+
name_1: The name of the stream created by the estimator.
595+
name_2: The name of the complement stream created by the split, which is calculated automatically.
596+
597+
Returns:
598+
tuple of two datasets, the first with the mass fraction specified, the other the complement
599+
"""
600+
out_data: Union[pd.DataFrame, np.ndarray] = estimator.predict(self.data.to_dataframe())
601+
if isinstance(out_data, np.ndarray):
602+
out_data = pd.DataFrame(out_data, index=self.data.to_dataframe().index,
603+
columns=self.data.to_dataframe().columns)
604+
out: MassComposition = MassComposition(name=name_1, constraints=self.constraints, data=out_data)
605+
comp: MassComposition = self.sub(other=out, name=name_2)
606+
607+
self._post_process_split(out, comp, name_1, name_2)
608+
609+
return out, comp
610+
548611
def calculate_partition(self, ref: 'MassComposition') -> pd.DataFrame:
549612
"""Calculate the partition of the ref stream relative to self"""
550613
self._check_one_dim_interval()
@@ -578,7 +641,7 @@ def resample_1d(self, interval_edges: Union[Iterable, int],
578641
include_original_edges=include_original_edges)
579642

580643
obj: MassComposition = MassComposition(df_upsampled, name=self.name)
581-
obj.nodes = self._nodes
644+
obj._nodes = self._nodes
582645
obj.constraints = self.constraints
583646
return obj
584647

@@ -991,7 +1054,7 @@ def __sub__(self, other: 'MassComposition') -> 'MassComposition':
9911054
res: MassComposition = MassComposition(name=xr_sub.mc.name, constraints=self.constraints)
9921055
res.set_data(data=xr_sub, constraints=self.constraints)
9931056

994-
res.nodes = [self._nodes[1], random_int()]
1057+
res._nodes = [self._nodes[1], random_int()]
9951058
return res
9961059

9971060
def __truediv__(self, other: 'MassComposition') -> 'MassComposition':
@@ -1050,6 +1113,7 @@ def _post_process_split(self, obj_1, obj_2, name_1, name_2):
10501113
obj_2._nodes = [self._nodes[1], random_int()]
10511114
obj_1._name = name_1
10521115
obj_2._name = name_2
1116+
10531117
return obj_1, obj_2
10541118

10551119
def _intervals_to_columns(self, interval_index: pd.IntervalIndex) -> pd.DataFrame:

elphick/mass_composition/mc_xarray.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -269,15 +269,15 @@ def split(self, fraction: float) -> Tuple[xr.Dataset, xr.Dataset]:
269269

270270
return out._obj, comp._obj
271271

272-
def apply_partition(self, definition: Callable) -> Tuple[xr.Dataset, xr.Dataset]:
272+
def split_by_partition(self, partition_definition: Callable) -> Tuple[xr.Dataset, xr.Dataset]:
273273
"""Partition the object along a given dimension.
274274
275275
This method applies the defined partition resulting in two new objects.
276276
277277
See also: split
278278
279279
Args:
280-
definition: A partition function that defines the efficiency of separation along a dimension
280+
partition_definition: A partition function that defines the efficiency of separation along a dimension
281281
282282
Returns:
283283
tuple of two datasets, the first defined by the function, the other the complement
@@ -288,13 +288,13 @@ def apply_partition(self, definition: Callable) -> Tuple[xr.Dataset, xr.Dataset]
288288
out = deepcopy(self)
289289
comp = deepcopy(self)
290290

291-
if not isinstance(definition, Callable):
291+
if not isinstance(partition_definition, Callable):
292292
raise TypeError("The definition is not a callable function")
293-
if 'dim' not in definition.keywords.keys():
293+
if 'dim' not in partition_definition.keywords.keys():
294294
raise NotImplementedError("The callable function passed does not have a dim")
295295

296-
dim = definition.keywords['dim']
297-
definition.keywords.pop('dim')
296+
dim = partition_definition.keywords['dim']
297+
partition_definition.keywords.pop('dim')
298298
if isinstance(self._obj[dim].data[0], pd.Interval):
299299
if dim == 'size':
300300
x = mean_size(pd.arrays.IntervalArray(self._obj[dim].data))
@@ -306,7 +306,7 @@ def apply_partition(self, definition: Callable) -> Tuple[xr.Dataset, xr.Dataset]
306306
'not an interval. This is not typical usage. It is assumed that the '
307307
'dimension data represents the centre/mean, and not an edge like '
308308
'retained or passing.')
309-
pn = definition(x)
309+
pn = partition_definition(x)
310310
if not ((dim in self._obj.dims) and (len(self._obj.dims) == 1)):
311311
# TODO: Set the dim to match the partition if it does not already
312312
# obj_mass = obj_mass.swap_dims(dim=)

elphick/mass_composition/stream.py

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Optional, Callable
1+
from typing import Optional, Callable, Generator
22

33
from elphick.mass_composition import MassComposition
44

@@ -47,17 +47,18 @@ def split(self, fraction: float,
4747
mc1, mc2 = super().split(fraction, name_1, name_2)
4848
return Stream.from_mass_composition(mc1), Stream.from_mass_composition(mc2)
4949

50-
def apply_partition(self, definition: Callable,
51-
name_1: Optional[str] = None, name_2: Optional[str] = None) -> tuple['Stream', 'Stream']:
50+
def split_by_partition(self, partition_definition: Callable,
51+
name_1: Optional[str] = None, name_2: Optional[str] = None) -> Generator[
52+
'Stream', None, None]:
5253
"""
5354
Partition the object along a given dimension.
5455
5556
This method applies the defined separation resulting in two new objects.
5657
57-
See also: split
58+
See also: split, split_by_function
5859
5960
Args:
60-
definition: A partition function that defines the efficiency of separation along a dimension
61+
partition_definition: A partition function that defines the efficiency of separation along a dimension
6162
name_1: The name of the reference stream created by the split
6263
name_2: The name of the complement stream created by the split
6364
@@ -66,8 +67,56 @@ def apply_partition(self, definition: Callable,
6667
6768
6869
"""
69-
mcs = super().apply_partition(definition, name_1, name_2)
70-
return (Stream.from_mass_composition(mc) for mc in mcs)
70+
streams = super().split_by_partition(partition_definition, name_1, name_2)
71+
return (Stream.from_mass_composition(stream) for stream in streams)
72+
73+
def split_by_function(self, split_function: Callable,
74+
name_1: Optional[str] = None,
75+
name_2: Optional[str] = None) -> Generator['Stream', None, None]:
76+
"""Split an object using a function.
77+
78+
This method applies the function to self, resulting in two new objects. The object returned with name_1
79+
is the result of the function. The object returned with name_2 is the complement.
80+
81+
See also: split, split_by_estimator, split_by_partition
82+
83+
Args:
84+
split_function: Any function that transforms the dataframe from a MassComposition object into a new
85+
dataframe with values representing a new (output) stream. The returned dataframe structure must be
86+
identical to the input dataframe.
87+
name_1: The name of the stream created by the function
88+
name_2: The name of the complement stream created by the split, which is calculated automatically.
89+
90+
Returns:
91+
A generator of two Streams,
92+
93+
94+
"""
95+
streams = super().split_by_function(split_function, name_1, name_2)
96+
return (Stream.from_mass_composition(stream) for stream in streams)
97+
98+
def split_by_estimator(self, estimator: 'sklearn.base.BaseEstimator',
99+
name_1: Optional[str] = None,
100+
name_2: Optional[str] = None) -> Generator['Stream', None, None]:
101+
"""Split an object using a sklearn estimator.
102+
103+
This method applies the function to self, resulting in two new objects. The object returned with name_1
104+
is the result of the estimator.predict() method. The object returned with name_2 is the complement.
105+
106+
See also: split, split_by_function, split_by_partition
107+
108+
Args:
109+
estimator: Any sklearn estimator that transforms the dataframe from a MassComposition object into a new
110+
dataframe with values representing a new (output) stream using the predict method. The returned
111+
dataframe structure must be identical to the input dataframe.
112+
name_1: The name of the stream created by the estimator.
113+
name_2: The name of the complement stream created by the split, which is calculated automatically.
114+
115+
Returns:
116+
tuple of two datasets, the first with the mass fraction specified, the other the complement
117+
"""
118+
streams = super().split_by_estimator(estimator, name_1, name_2)
119+
return (Stream.from_mass_composition(stream) for stream in streams)
71120

72121
def add(self, other: 'Stream', name: Optional[str] = None) -> 'Stream':
73122
"""

examples/109_split_and_partition.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101
#
102102
# Separate the object using the defined partition
103103

104-
mc_coarse, mc_fine = mc_size.apply_partition(definition=partition)
104+
mc_coarse, mc_fine = mc_size.split_by_partition(partition_definition=partition)
105105
mc_coarse.name = 'coarse'
106106
mc_fine.name = 'fine'
107107

examples/113_partition_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
# %%
5555
# Separate the object using the defined partitions. UF = Underflow, OF = Overflow
5656

57-
mc_uf, mc_of = mc_feed.apply_partition(definition=part_cyclone, name_1='underflow', name_2='overflow')
57+
mc_uf, mc_of = mc_feed.split_by_partition(partition_definition=part_cyclone, name_1='underflow', name_2='overflow')
5858
fs: Flowsheet = Flowsheet().from_streams([mc_feed, mc_uf, mc_of])
5959

6060
fig = fs.table_plot(table_pos='left',

examples/114_compare_partitions.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@
7979
#
8080
# Separate the object using the defined partitions
8181

82-
mc_ideal_coarse, mc_ideal_fine = mc_ideal_feed.apply_partition(definition=part_ideal,
83-
name_1='ideal_coarse', name_2='ideal_fine')
84-
mc_sim_coarse, mc_sim_fine = mc_sim_feed.apply_partition(definition=part_sim, name_1='sim_coarse', name_2='sim_fine')
82+
mc_ideal_coarse, mc_ideal_fine = mc_ideal_feed.split_by_partition(partition_definition=part_ideal,
83+
name_1='ideal_coarse', name_2='ideal_fine')
84+
mc_sim_coarse, mc_sim_fine = mc_sim_feed.split_by_partition(partition_definition=part_sim, name_1='sim_coarse', name_2='sim_fine')
8585

8686

8787
fs: Flowsheet = Flowsheet().from_streams([mc_size, mc_ideal_feed, mc_sim_feed,

examples/202_interval_data_advanced.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
# We partially initialise a partition function, and split the feed stream accordingly.
4848

4949
partition = partial(napier_munn, d50=0.150, ep=0.05, dim='size')
50-
mc_oversize, mc_undersize = mc_feed.apply_partition(definition=partition, name_1='OS', name_2='US')
50+
mc_oversize, mc_undersize = mc_feed.split_by_partition(partition_definition=partition, name_1='OS', name_2='US')
5151

5252
# %%
5353
# Drop the two size fractions from mc_fine that have near zero mass.

examples/400_mass_balancing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
#
6161
# Separate the object using the defined partition
6262

63-
mc_coarse, mc_fine = mc_size.apply_partition(definition=partition, name_1='coarse', name_2='fine')
63+
mc_coarse, mc_fine = mc_size.split_by_partition(partition_definition=partition, name_1='coarse', name_2='fine')
6464

6565
fs: Flowsheet = Flowsheet().from_streams([mc_size, mc_coarse, mc_fine])
6666
print(fs.balanced)

0 commit comments

Comments
 (0)