@@ -196,7 +196,7 @@ def _load_and_process_data(filename: str, chromosome_info: Optional[pd.DataFrame
196
196
if not os .path .exists (destination_path ):
197
197
source_path = f"https://zenodo.org/record/7416068/files/{ filename } .h5ad?download=1"
198
198
wget .download (source_path , destination_path )
199
- ad = scanpy . read_h5ad (destination_path )
199
+ ad = read_and_log_transform_h5ad_file (destination_path )
200
200
ad .var = ad .var .rename (columns = {"start" : "st" , "end" : "en" }).join (chromosome_info , how = "left" )
201
201
if filename .startswith ("Adamson" ):
202
202
ad .obs ["gene" ] = ad .obs .perturbation .apply (lambda x : x .split ("_" )[0 ]).fillna ("" )
@@ -529,6 +529,21 @@ def _get_perturbation_type(filename_short: str):
529
529
}[filename_short ]
530
530
531
531
532
+ def read_and_log_transform_h5ad_file (filename : str ) -> AnnData :
533
+ """
534
+ Read and log-transform the specified h5ad single-cell perturb-seq file.
535
+
536
+ Args:
537
+ filename (str): The name of the dataset file to read and log-transform.
538
+
539
+ Returns:
540
+ AnnData: The log-transformed dataset as an AnnData object.
541
+ """
542
+ ad = scanpy .read_h5ad (filename )
543
+ scanpy .pp .log1p (ad )
544
+ return ad
545
+
546
+
532
547
def plot_loss_for_selected_genes (
533
548
filenames : List [str ],
534
549
chromosome_info : Optional [pd .DataFrame ] = None ,
@@ -567,7 +582,7 @@ def plot_loss_for_selected_genes(
567
582
sns .set (font_scale = 1.7 )
568
583
plt .rcParams ["svg.fonttype" ] = "none"
569
584
for filename in filenames :
570
- ad = scanpy . read_h5ad (os .path .join (str (utils .constants .DATA_DIR ), f"{ filename } .h5ad" ))
585
+ ad = read_and_log_transform_h5ad_file (os .path .join (str (utils .constants .DATA_DIR ), f"{ filename } .h5ad" ))
571
586
filename_short = _get_short_filename (filename )
572
587
perts2check_df = allres [
573
588
(allres ["Dataset" ] == filename_short )
0 commit comments