Skip to content
This repository has been archived by the owner on Apr 19, 2023. It is now read-only.

Commit

Permalink
Merge pull request #138 from vib-singlecell-nf/develop
Browse files Browse the repository at this point in the history
Develop

Former-commit-id: 2671fbe
  • Loading branch information
dweemx authored Feb 25, 2020
2 parents efcd532 + 53efa96 commit ba501de
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 12 deletions.
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ manifest {
name = 'vib-singlecell-nf/vsn-pipelines'
description = 'A repository of pipelines for single-cell data in Nextflow DSL2'
homePage = 'https://github.com/vib-singlecell-nf/vsn-pipelines'
version = '0.13.0'
version = '0.13.1'
mainScript = 'main.nf'
defaultBranch = 'master'
nextflowVersion = '!19.12.0-edge' // with ! prefix, stop execution if current version does not match required version.
Expand Down
64 changes: 53 additions & 11 deletions src/utils/bin/h5ad_to_loom.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,29 +296,71 @@ def read_h5ad(file_path, backed='r'):
# Populate
for i in range(0, num_clusters):
i = str(i)
num_genes = len(adatas[adata_idx].uns['rank_genes_groups']['pvals_adj'][i])
sig_genes_mask = adatas[adata_idx].uns['rank_genes_groups']['pvals_adj'][i] < args.markers_fdr_threshold
gene_names = adatas[adata_idx].uns['rank_genes_groups']['names'][i]
pvals_adj = adatas[adata_idx].uns['rank_genes_groups']['pvals_adj'][i]
logfoldchanges = adatas[adata_idx].uns['rank_genes_groups']['logfoldchanges'][i]
num_genes = len(pvals_adj)
sig_genes_mask = pvals_adj < args.markers_fdr_threshold
deg_genes_mask = np.logical_and(
np.logical_or(
adatas[adata_idx].uns['rank_genes_groups']['logfoldchanges'][i] >= args.markers_log_fc_threshold,
adatas[adata_idx].uns['rank_genes_groups']['logfoldchanges'][i] <= -args.markers_log_fc_threshold
logfoldchanges >= args.markers_log_fc_threshold,
logfoldchanges <= -args.markers_log_fc_threshold
),
np.isfinite(
adatas[adata_idx].uns['rank_genes_groups']['logfoldchanges'][i]
logfoldchanges
)
)
sig_and_deg_genes_mask = np.logical_and(
sig_genes_mask,
deg_genes_mask
)
gene_names = adatas[adata_idx].uns['rank_genes_groups']['names'][i][sig_and_deg_genes_mask]
cluster_markers.loc[np.in1d(cluster_markers.index, gene_names), i] = 1
cluster_markers_avg_logfc.loc[np.in1d(cluster_markers.index, gene_names), i] = np.around(
adatas[adata_idx].uns['rank_genes_groups']['logfoldchanges'][i][sig_and_deg_genes_mask],
marker_names = gene_names[sig_and_deg_genes_mask]

marker_genes_along_raw_adata_mask = np.in1d(
raw_filtered_adata.var.index,
gene_names[sig_and_deg_genes_mask]
)
marker_genes_along_raw_adata = cluster_markers.index[marker_genes_along_raw_adata_mask]

# Populate the marker mask
markers_df = pd.DataFrame(
1,
index=marker_names,
columns=["is_marker"]
)
cluster_markers.loc[
marker_genes_along_raw_adata_mask,
i
] = np.around(
markers_df["is_marker"][marker_genes_along_raw_adata],
decimals=6
)

# Populate the marker gene log fold changes
logfoldchanges_df = pd.DataFrame(
logfoldchanges[sig_and_deg_genes_mask],
index=marker_names,
columns=["logfc"]
)
cluster_markers_avg_logfc.loc[
marker_genes_along_raw_adata_mask,
i
] = np.around(
logfoldchanges_df["logfc"][marker_genes_along_raw_adata],
decimals=6
)
cluster_markers_pval.loc[np.in1d(cluster_markers.index, gene_names), i] = np.around(
adatas[adata_idx].uns['rank_genes_groups']['pvals_adj'][i][sig_and_deg_genes_mask],

# Populate the marker gene false discovery rates
pvals_adj_df = pd.DataFrame(
logfoldchanges[sig_and_deg_genes_mask],
index=marker_names,
columns=["fdr"]
)
cluster_markers_avg_logfc.loc[
marker_genes_along_raw_adata_mask,
i
] = np.around(
pvals_adj_df["fdr"][marker_genes_along_raw_adata],
decimals=6
)

Expand Down
1 change: 1 addition & 0 deletions src/utils/bin/sc_file_concatenator.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
join=args.join,
index_unique=index_unique
)
adata.var.index = adata.var.index.astype(str)
adata = adata[:, np.sort(adata.var.index)]
else:
raise Exception("Concatenation of .{} files is not implemented.".format(args.format))
Expand Down
3 changes: 3 additions & 0 deletions src/utils/bin/sc_file_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def add_sample_id(adata, args):
# If is tag_cell_with_sample_id is given, add the sample ID as suffix
if args.tag_cell_with_sample_id:
adata.obs.index = map(lambda x: re.sub('-[0-9]+', f"-{args.sample_id}", x), adata.obs.index)
adata.var.index = adata.var.index.astype(str)
adata = adata[:, np.sort(adata.var.index)]
print("Writing 10x data to h5ad...")
adata.write_h5ad(filename="{}.h5ad".format(FILE_PATH_OUT_BASENAME))
Expand All @@ -134,6 +135,7 @@ def add_sample_id(adata, args):
# If is tag_cell_with_sample_id is given, add the sample ID as suffix
if args.tag_cell_with_sample_id:
adata.obs.index = map(lambda x: re.sub('-[0-9]+', f"-{args.sample_id}", x), adata.obs.index)
adata.var.index = adata.var.index.astype(str)
adata = adata[:, np.sort(adata.var.index)]
print("Writing 10x data to h5ad...")
adata.write_h5ad(filename="{}.h5ad".format(FILE_PATH_OUT_BASENAME))
Expand All @@ -149,6 +151,7 @@ def add_sample_id(adata, args):
delimiter=delim,
first_column_names=True
).T
adata.var.index = adata.var.index.astype(str)
adata = adata[:, np.sort(adata.var.index)]
adata.write_h5ad(filename="{}.h5ad".format(FILE_PATH_OUT_BASENAME))

Expand Down

0 comments on commit ba501de

Please sign in to comment.