Skip to content

Commit

Permalink
Truncate real world data to last sampling time
Browse files Browse the repository at this point in the history
  • Loading branch information
viktorht committed Nov 14, 2024
1 parent cc1444a commit 71be3de
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
17 changes: 15 additions & 2 deletions pseudobatch/datasets/_dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,20 @@ def load_real_world_yeast_fedbatch():
is obtained from an experiment carried out in a biolector."""

data_path = pathlib.Path(__file__).parent / "data" / "biolector_yeast_fedbatch.csv"
return pd.read_csv(data_path)
data = pd.read_csv(data_path)

def truncate_after_last_sample(
df: pd.DataFrame, time_col: str, sample_col: str
) -> pd.DataFrame:
last_sample_time = df.query(f"`{sample_col}` > 0")[time_col].max()

return df.query(f"`{time_col}` <= @last_sample_time")

data_truncated = data.groupby("Biolector well").apply(
lambda x: truncate_after_last_sample(x, "Time", "Sample volume")
)

return data_truncated.reset_index(drop=True)


def load_volatile_compounds_fedbatch(sampling_points_only: bool = False):
Expand All @@ -107,4 +120,4 @@ def load_volatile_compounds_fedbatch(sampling_points_only: bool = False):
If True, only the rows where a sample was taken is kept, by default False
"""
data_path = pathlib.Path(__file__).parent / "data" / "volatile_product.csv"
return _prepare_simulated_dataset(data_path, sampling_points_only=sampling_points_only)
return _prepare_simulated_dataset(data_path, sampling_points_only=sampling_points_only)
2 changes: 1 addition & 1 deletion tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_load_real_world_yeast_fedbatch():
df = load_real_world_yeast_fedbatch()
logging.debug(df.shape)
assert df.empty is False
assert df.shape == (11400, 12), "The dataset has changed. Update the test."
assert df.shape == (10712, 12), "The dataset has changed. Update the test."


def test_load_volatile_compounds_fedbatch_unique_timestamps():
Expand Down

0 comments on commit 71be3de

Please sign in to comment.