Skip to content

Commit

Permalink
Clean up use cases, add new tabular prediction on MIMICIV (#510)
Browse files Browse the repository at this point in the history
* Clean up use cases, add new tabular prediction on MIMICIV

* Bring example use cases first

* Remove decompensation notebook, its not ready
  • Loading branch information
amrit110 authored Nov 22, 2023
1 parent 8a0ef2f commit 4b42ac4
Show file tree
Hide file tree
Showing 16 changed files with 1,324 additions and 2,670 deletions.
21 changes: 8 additions & 13 deletions cyclops/process/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(
timestamp_col: str,
time_by: Union[str, List[str]],
agg_by: Union[str, List[str]],
timestep_size: int,
timestep_size: Optional[int] = None,
window_duration: Optional[int] = None,
imputer: Optional[AggregatedImputer] = None,
agg_meta_for: Optional[List[str]] = None,
Expand All @@ -78,9 +78,9 @@ def __init__(
self.timestamp_col = timestamp_col
self.time_by = to_list(time_by)
self.agg_by = to_list(agg_by)
self.agg_meta_for = to_list_optional(agg_meta_for)
self.timestep_size = timestep_size
self.window_duration = window_duration
self.agg_meta_for = to_list_optional(agg_meta_for)
self.window_times = pd.DataFrame() # Calculated when given the data
self.imputer = imputer
# Parameter checking
Expand All @@ -90,8 +90,8 @@ def __init__(
raise ValueError(
"Cannot compute meta for a column not being aggregated.",
)
if self.window_duration is not None:
divided = self.window_duration / self.timestep_size
if window_duration is not None and timestep_size is not None:
divided = window_duration / timestep_size
if divided != int(divided):
raise ValueError("Window duration be divisible by bucket size.")

Expand Down Expand Up @@ -568,6 +568,10 @@ def vectorize(self, aggregated: pd.DataFrame) -> Vectorized:
raise NotImplementedError(
"Cannot currently vectorize data aggregated with no window duration.",
)
if self.timestep_size is None:
raise NotImplementedError(
"Cannot currently vectorize data aggregated with no timestep size.",
)
num_timesteps = int(self.window_duration / self.timestep_size)
# Parameter checking
has_columns(aggregated, list(self.aggfuncs.keys()), raise_error=True)
Expand Down Expand Up @@ -605,8 +609,6 @@ def aggregate_values(
data: pd.DataFrame,
window_start_time: Optional[pd.DataFrame] = None,
window_stop_time: Optional[pd.DataFrame] = None,
start_bound_func: Optional[Callable[[pd.Series], pd.Series]] = None,
stop_bound_func: Optional[Callable[[pd.Series], pd.Series]] = None,
) -> pd.DataFrame:
"""Aggregate temporal values.
Expand All @@ -622,10 +624,6 @@ def aggregate_values(
window_stop_time: pd.DataFrame, optional
An optionally provided window stop time. This cannot be provided if
window_duration was set.
start_bound_func : Optional[Callable[[pd.Series], pd.Series]], optional
A function to bound the start timestamp values, by default None
stop_bound_func : Optional[Callable[[pd.Series], pd.Series]], optional
A function to bound the start timestamp values, by default None
Returns
-------
Expand All @@ -648,9 +646,6 @@ def aggregate_values(
)
# Restrict the data according to the start/stop
data = self._restrict_by_timestamp(data)
# Filter the data based on bounds on start/stop
data = start_bound_func(data) if start_bound_func else data
data = stop_bound_func(data) if stop_bound_func else data
grouped = data.groupby(self.agg_by, sort=False)

return grouped.agg(self.aggfuncs)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ Tutorials

.. toctree::

tutorials_monitor
tutorials_use_cases
tutorials_monitor
Loading

0 comments on commit 4b42ac4

Please sign in to comment.