Skip to content

Commit 6c1b0c9

Browse files
Merge pull request #145 from PixelgenTechnologies/dev
Release 0.17.1
2 parents 593252e + a244a16 commit 6c1b0c9

File tree

2 files changed

+39
-6
lines changed

2 files changed

+39
-6
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.17.1] - 2024-05-27
9+
10+
### Fixed
11+
12+
* Poor performance when writing many small layouts to pxl file (~45x speed-up). This should almost only
13+
impact test scenarios, since most real components should be large enough for this not to be an issue.
14+
815
## [0.17.0] - 2024-05-23
916

1017
### Added

src/pixelator/pixeldataset/datastores.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,13 @@ def read_precomputed_layouts(
256256
def write_precomputed_layouts(
257257
self,
258258
layouts: PreComputedLayouts,
259+
collapse_to_single_dataframe: bool = False,
259260
) -> None:
260261
"""Write pre-computed layouts to the data store.
261262
262263
:param layouts: The pre-computed layouts to write.
264+
:param collapse_to_single_dataframe: Whether to collapse the layouts into
265+
a single dataframe before writing.
263266
"""
264267
...
265268

@@ -454,6 +457,7 @@ def write_colocalization(self, colocalization: pd.DataFrame) -> None:
454457
def write_precomputed_layouts(
455458
self,
456459
layouts: Optional[PreComputedLayouts],
460+
collapse_to_single_dataframe: bool = False,
457461
) -> None:
458462
"""Write pre-computed layouts to the data store."""
459463
if layouts is None:
@@ -463,15 +467,27 @@ def write_precomputed_layouts(
463467
self._check_if_writeable(self.LAYOUTS_KEY)
464468

465469
logger.debug("Starting to write layouts...")
466-
467-
for idx, layouts_to_write in enumerate(layouts.component_iterator()):
468-
if idx % 100 == 0:
469-
logger.debug("Writing layouts...")
470+
# This option is in place to allow collecting all the layouts into
471+
# as single dataframe before writing (they will still be written into
472+
# partitions), but this is much faster than writing them one by one
473+
# for scenarios with many very small layouts.
474+
if collapse_to_single_dataframe:
475+
logger.debug("Writing from a single dataframe...")
470476
self.write_dataframe(
471-
layouts_to_write,
477+
layouts.to_df(),
472478
self.LAYOUTS_KEY,
473479
partitioning=PreComputedLayouts.DEFAULT_PARTITIONING,
474480
)
481+
else:
482+
logger.debug("Writing by iterating components...")
483+
for idx, layouts_to_write in enumerate(layouts.component_iterator()):
484+
if idx % 100 == 0:
485+
logger.debug("Writing layouts...")
486+
self.write_dataframe(
487+
layouts_to_write,
488+
self.LAYOUTS_KEY,
489+
partitioning=PreComputedLayouts.DEFAULT_PARTITIONING,
490+
)
475491

476492
logger.debug("Completed writing layouts...")
477493

@@ -506,7 +522,16 @@ def save(self, dataset: PixelDataset, force_overwrite: bool = False) -> None:
506522

507523
if dataset.precomputed_layouts is not None:
508524
logger.debug("Writing precomputed layouts")
509-
self.write_precomputed_layouts(dataset.precomputed_layouts)
525+
# This speeds things up massively when you have many, very small
526+
# layouts, like we do in some test data.
527+
try:
528+
write_layouts_in_one_go = dataset.adata.obs["vertices"].sum() < 100_000
529+
except KeyError:
530+
write_layouts_in_one_go = False
531+
self.write_precomputed_layouts(
532+
dataset.precomputed_layouts,
533+
collapse_to_single_dataframe=write_layouts_in_one_go,
534+
)
510535

511536
logger.debug("PixelDataset saved to %s", self.path)
512537

@@ -560,6 +585,7 @@ def read_dataframe_lazy(self, key: str) -> Optional[pl.LazyFrame]:
560585
def write_precomputed_layouts(
561586
self,
562587
layouts: Optional[PreComputedLayouts],
588+
collapse_to_single_dataframe: bool = False,
563589
) -> None:
564590
"""Write pre-computed layouts to the data store (NB: Not implemented!)."""
565591
raise NotImplementedError(

0 commit comments

Comments
 (0)