Skip to content

Commit fadfec0

Browse files
committed
log transform
1 parent fa12675 commit fadfec0

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

proxbias/scPerturb_processing_plotting.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def _load_and_process_data(filename: str, chromosome_info: Optional[pd.DataFrame
196196
if not os.path.exists(destination_path):
197197
source_path = f"https://zenodo.org/record/7416068/files/{filename}.h5ad?download=1"
198198
wget.download(source_path, destination_path)
199-
ad = scanpy.read_h5ad(destination_path)
199+
ad = read_and_log_transform_h5ad_file(destination_path)
200200
ad.var = ad.var.rename(columns={"start": "st", "end": "en"}).join(chromosome_info, how="left")
201201
if filename.startswith("Adamson"):
202202
ad.obs["gene"] = ad.obs.perturbation.apply(lambda x: x.split("_")[0]).fillna("")
@@ -529,6 +529,21 @@ def _get_perturbation_type(filename_short: str):
529529
}[filename_short]
530530

531531

532+
def read_and_log_transform_h5ad_file(filename: str) -> AnnData:
533+
"""
534+
Read and log-transform the specified h5ad single-cell perturb-seq file.
535+
536+
Args:
537+
filename (str): The name of the dataset file to read and log-transform.
538+
539+
Returns:
540+
AnnData: The log-transformed dataset as an AnnData object.
541+
"""
542+
ad = scanpy.read_h5ad(filename)
543+
scanpy.pp.log1p(ad)
544+
return ad
545+
546+
532547
def plot_loss_for_selected_genes(
533548
filenames: List[str],
534549
chromosome_info: Optional[pd.DataFrame] = None,
@@ -567,7 +582,7 @@ def plot_loss_for_selected_genes(
567582
sns.set(font_scale=1.7)
568583
plt.rcParams["svg.fonttype"] = "none"
569584
for filename in filenames:
570-
ad = scanpy.read_h5ad(os.path.join(str(utils.constants.DATA_DIR), f"{filename}.h5ad"))
585+
ad = read_and_log_transform_h5ad_file(os.path.join(str(utils.constants.DATA_DIR), f"{filename}.h5ad"))
571586
filename_short = _get_short_filename(filename)
572587
perts2check_df = allres[
573588
(allres["Dataset"] == filename_short)

0 commit comments

Comments
 (0)