Skip to content

Commit

Permalink
fixed zhangen and zhangblup
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucas Camillo authored and Lucas Camillo committed Dec 29, 2023
1 parent 0cd95da commit 4732449
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 35 deletions.
4 changes: 2 additions & 2 deletions clocks/notebooks/dnamtl.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"id": "303e9b76-993f-4691-af9d-1151b3c7638f",
"metadata": {},
"outputs": [
Expand All @@ -140,7 +140,7 @@
"0"
]
},
"execution_count": 8,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
2 changes: 1 addition & 1 deletion clocks/notebooks/join_metadata.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "59eb29df-0597-4d45-b2e6-8825670effe2",
"metadata": {},
"outputs": [],
Expand Down
14 changes: 7 additions & 7 deletions clocks/notebooks/zhangblup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
{
"data": {
"text/plain": [
"32768"
"0"
]
},
"execution_count": 2,
Expand All @@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "a284fe99-dc47-4f0c-b2ff-274e136e7020",
"metadata": {},
"outputs": [],
Expand All @@ -62,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 5,
"id": "7b4c3f6b-72af-4e99-84c4-65b8ef58c91d",
"metadata": {},
"outputs": [
Expand All @@ -74,7 +74,7 @@
")"
]
},
"execution_count": 7,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -90,13 +90,13 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 6,
"id": "b38f8af2-3d94-4a45-90b9-30b227828da1",
"metadata": {},
"outputs": [],
"source": [
"weights_dict = {\n",
" 'preprocessing': None, \n",
" 'preprocessing': 'scale_row', \n",
" 'preprocessing_helper': None,\n",
" 'postprocessing': None,\n",
" 'postprocessing_helper': None,\n",
Expand All @@ -119,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 7,
"id": "34136f3c-92b8-4641-a103-381d3a7dd857",
"metadata": {},
"outputs": [],
Expand Down
28 changes: 10 additions & 18 deletions clocks/notebooks/zhangen.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "fb157849-5454-4a60-8548-fff633fff764",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -36,7 +36,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"id": "b9f484b1-f501-41b7-9565-82e03bfe97dc",
"metadata": {},
"outputs": [],
Expand All @@ -49,7 +49,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"id": "a284fe99-dc47-4f0c-b2ff-274e136e7020",
"metadata": {},
"outputs": [],
Expand All @@ -62,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 4,
"id": "7b4c3f6b-72af-4e99-84c4-65b8ef58c91d",
"metadata": {},
"outputs": [
Expand All @@ -74,7 +74,7 @@
")"
]
},
"execution_count": 11,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -90,13 +90,13 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 5,
"id": "b38f8af2-3d94-4a45-90b9-30b227828da1",
"metadata": {},
"outputs": [],
"source": [
"weights_dict = {\n",
" 'preprocessing': None, \n",
" 'preprocessing': 'scale_row', \n",
" 'preprocessing_helper': None,\n",
" 'postprocessing': None,\n",
" 'postprocessing_helper': None,\n",
Expand All @@ -119,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 6,
"id": "34136f3c-92b8-4641-a103-381d3a7dd857",
"metadata": {},
"outputs": [],
Expand All @@ -130,7 +130,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 7,
"id": "01b905f5-298f-4edd-b69b-fcedeea9d0d4",
"metadata": {},
"outputs": [
Expand All @@ -140,22 +140,14 @@
"0"
]
},
"execution_count": 9,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.system(\"rm -r DNAm-based-age-predictor\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "34a90554-4e17-42de-9671-f52d656caf0a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion pyaging/data/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def download_example_data(
The type of data to download. Valid options are 'GSE139307' (human methylation), 'GSE130735' (mouse
methylation), 'GSE223748' (mammalian methylation), 'ENCFF386QWG' (histone mark), 'GSE65765' (C. elegans
RNA-seq), 'GSE193140' (ATAC-Seq), 'blood_chemistry_example' (blood chemistry).
dir : str
The directory to deposit the downloaded file. Defaults to "pyaging_data".
Expand Down
23 changes: 18 additions & 5 deletions pyaging/predict/_pred_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,8 @@ def preprocess_data(
adata.X = tpm_norm_log1p(adata.X, preprocessing_helper)
elif preprocessing == "binarize":
adata.X = binarize(adata.X)
elif preprocessing == "scale_row":
adata.X = scale_row(adata.X, adata[:, features].X)
elif preprocessing == "scale":
X = adata[:, features].X
X = scale(X, preprocessing_helper)
Expand Down Expand Up @@ -599,7 +601,12 @@ def postprocess_data(

@progress("Predict ages with model")
def predict_ages_with_model(
model: torch.nn.Module, adata: torch.Tensor, features: List[str], device: str, logger, indent_level: int = 2
model: torch.nn.Module,
adata: torch.Tensor,
features: List[str],
device: str,
logger,
indent_level: int = 2,
) -> torch.Tensor:
"""
Predict biological ages using a trained model and input data.
Expand Down Expand Up @@ -653,7 +660,7 @@ def predict_ages_with_model(
"""
# Create an AnnLoader
use_cuda = device == 'cuda'
use_cuda = device == "cuda"
dataloader = AnnLoader(adata, batch_size=1024, use_cuda=use_cuda)

# Use the AnnLoader for batched prediction
Expand Down Expand Up @@ -824,11 +831,17 @@ def filter_missing_features(
"""
n_missing_features = sum(adata.var["percent_na"] == 1)
if n_missing_features > 0:
logger.info(f"Removing {n_missing_features} added features", indent_level=indent_level+1)
logger.info(
f"Removing {n_missing_features} added features",
indent_level=indent_level + 1,
)
adata = adata[:, adata.var["percent_na"] < 1].copy()
else:
logger.info("No missing features, so adata size did not change", indent_level=indent_level+1)

logger.info(
"No missing features, so adata size did not change",
indent_level=indent_level + 1,
)

return adata


Expand Down
14 changes: 14 additions & 0 deletions pyaging/predict/_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@ def scale(x, scaler):
return x_scaled


def scale_row(x, x_overlap):
"""
Scales the input data per row with mean 0 and std 1.
"""
row_means = np.mean(x_overlap, axis=1, keepdims=True)
row_stds = np.std(x_overlap, axis=1, keepdims=True)

# Avoid division by zero in case of a row with constant value
row_stds[row_stds == 0] = 1

x_scaled = (x - row_means) / row_stds
return x_scaled


def binarize(x):
"""
Binarizes an array based on the median of each row, excluding zeros.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pyaging"
version = "0.0.10"
version = "0.0.11"
description = "A Python-based compendium of GPU-optimized aging clocks."
authors = ["Lucas Paulo de Lima Camillo <lucas_camillo@alumni.brown.edu>"]
license = "BSD"
Expand Down

0 comments on commit 4732449

Please sign in to comment.