@@ -256,10 +256,13 @@ def read_precomputed_layouts(
256
256
def write_precomputed_layouts (
257
257
self ,
258
258
layouts : PreComputedLayouts ,
259
+ collapse_to_single_dataframe : bool = False ,
259
260
) -> None :
260
261
"""Write pre-computed layouts to the data store.
261
262
262
263
:param layouts: The pre-computed layouts to write.
264
+ :param collapse_to_single_dataframe: Whether to collapse the layouts into
265
+ a single dataframe before writing.
263
266
"""
264
267
...
265
268
@@ -454,6 +457,7 @@ def write_colocalization(self, colocalization: pd.DataFrame) -> None:
454
457
def write_precomputed_layouts (
455
458
self ,
456
459
layouts : Optional [PreComputedLayouts ],
460
+ collapse_to_single_dataframe : bool = False ,
457
461
) -> None :
458
462
"""Write pre-computed layouts to the data store."""
459
463
if layouts is None :
@@ -463,15 +467,27 @@ def write_precomputed_layouts(
463
467
self ._check_if_writeable (self .LAYOUTS_KEY )
464
468
465
469
logger .debug ("Starting to write layouts..." )
466
-
467
- for idx , layouts_to_write in enumerate (layouts .component_iterator ()):
468
- if idx % 100 == 0 :
469
- logger .debug ("Writing layouts..." )
470
+ # This option is in place to allow collecting all the layouts into
471
+ # as single dataframe before writing (they will still be written into
472
+ # partitions), but this is much faster than writing them one by one
473
+ # for scenarios with many very small layouts.
474
+ if collapse_to_single_dataframe :
475
+ logger .debug ("Writing from a single dataframe..." )
470
476
self .write_dataframe (
471
- layouts_to_write ,
477
+ layouts . to_df () ,
472
478
self .LAYOUTS_KEY ,
473
479
partitioning = PreComputedLayouts .DEFAULT_PARTITIONING ,
474
480
)
481
+ else :
482
+ logger .debug ("Writing by iterating components..." )
483
+ for idx , layouts_to_write in enumerate (layouts .component_iterator ()):
484
+ if idx % 100 == 0 :
485
+ logger .debug ("Writing layouts..." )
486
+ self .write_dataframe (
487
+ layouts_to_write ,
488
+ self .LAYOUTS_KEY ,
489
+ partitioning = PreComputedLayouts .DEFAULT_PARTITIONING ,
490
+ )
475
491
476
492
logger .debug ("Completed writing layouts..." )
477
493
@@ -506,7 +522,16 @@ def save(self, dataset: PixelDataset, force_overwrite: bool = False) -> None:
506
522
507
523
if dataset .precomputed_layouts is not None :
508
524
logger .debug ("Writing precomputed layouts" )
509
- self .write_precomputed_layouts (dataset .precomputed_layouts )
525
+ # This speeds things up massively when you have many, very small
526
+ # layouts, like we do in some test data.
527
+ try :
528
+ write_layouts_in_one_go = dataset .adata .obs ["vertices" ].sum () < 100_000
529
+ except KeyError :
530
+ write_layouts_in_one_go = False
531
+ self .write_precomputed_layouts (
532
+ dataset .precomputed_layouts ,
533
+ collapse_to_single_dataframe = write_layouts_in_one_go ,
534
+ )
510
535
511
536
logger .debug ("PixelDataset saved to %s" , self .path )
512
537
@@ -560,6 +585,7 @@ def read_dataframe_lazy(self, key: str) -> Optional[pl.LazyFrame]:
560
585
def write_precomputed_layouts (
561
586
self ,
562
587
layouts : Optional [PreComputedLayouts ],
588
+ collapse_to_single_dataframe : bool = False ,
563
589
) -> None :
564
590
"""Write pre-computed layouts to the data store (NB: Not implemented!)."""
565
591
raise NotImplementedError (
0 commit comments