-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from galaxyproteomics/master
MT2MQ (#465)
- Loading branch information
Showing
13 changed files
with
14,783 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name: mt2mq | ||
owner: galaxyp | ||
description: Tool to prepare metatranscriptomic outputs from ASaiM for Metaquantome | ||
long_description: | | ||
Tool to prepare metatranscriptomic outputs from ASaiM for Metaquantome | ||
categories: | ||
- Proteomics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# MT2MQ: prepares metatranscriptomic outputs from ASaiM (HUMAnN2 and metaphlan) for metaquantome | ||
|
||
# Load libraries | ||
suppressPackageStartupMessages(library(tidyverse)) | ||
#default_locale() | ||
|
||
# Set parameters from arguments | ||
args = commandArgs(trailingOnly = TRUE) | ||
data <- args[1] | ||
# data: full path to file or directory: | ||
# - if in functional or f-t mode, should be a tsv file of HUMAnN2 gene families, after regrouping and renaming to GO, joining samples, and renormalizing to CPM. | ||
# - if in taxonomic mode, should be a directory of tsv files of metaphlan genus-level results | ||
mode <- args[2] | ||
# mode: | ||
# -"f": function | ||
# -"t": taxonomy | ||
# -"ft": function-taxonomy | ||
ontology <- unlist(strsplit(args[3], split = ",")) | ||
# ontology: only for function or f-t mode. A string of the GO namespace(s) to include, separated by commas. | ||
# ex: to include all: "molecular_function,biological_process,cellular_component" | ||
outfile <- args[4] | ||
# outfile: full path with pathname and extension for output | ||
|
||
# Functional mode | ||
if (mode == "f"){ | ||
out <- read.delim(file=data, header=TRUE, sep='\t') %>% | ||
filter(!grepl(".+g__.+",X..Gene.Family)) %>% | ||
separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% | ||
separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% | ||
mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% | ||
filter(namespace %in% ontology) %>% | ||
select(id, name, namespace, 4:ncol(.)) | ||
} | ||
|
||
# Taxonomic mode | ||
if (mode == "t"){ | ||
files <- dir(path = data) | ||
out <- tibble(filename = files) %>% | ||
mutate(file_contents= map(filename, ~read.delim(file=file.path(data, .), header=TRUE, sep = "\t"))) %>% | ||
unnest(cols = c(file_contents)) %>% | ||
rename(sample = filename) %>% | ||
separate(col = sample, into = c("sample",NA), sep=".tsv") %>% | ||
pivot_wider(names_from = sample, values_from = abundance) %>% | ||
mutate(rank = "genus") %>% | ||
rename(name = genus) %>% | ||
mutate(id = row_number(name)) %>% # filler for taxon id but should eventually find a way to get id from ncbi database | ||
select(id, name, rank, 2:ncol(.)) | ||
} | ||
|
||
# Function-taxonomy mode | ||
if (mode == "ft"){ | ||
out <- read.delim(file=data, header=TRUE, sep='\t') %>% | ||
filter(grepl(".+g__.+",X..Gene.Family)) %>% | ||
separate(col=X..Gene.Family, into=c("id", "Extra"), sep=": ", fill="left") %>% | ||
separate(col=Extra, into = c("namespace", "name"), sep = " ", fill="left", extra="merge") %>% | ||
separate(col = name, into = c("name", "taxa"), sep="\\|", extra = "merge") %>% | ||
separate(col = taxa, into = c("Extra", "genus", "species"), sep = "__") %>% select(-"Extra") %>% | ||
mutate_if(is.character, str_replace_all, pattern = "\\.s", replacement = "") %>% | ||
mutate_at(c("species"), str_replace_all, pattern = "_", replacement = " ") %>% | ||
mutate(namespace = if_else(namespace == "[MF]", true = "molecular_function", false = if_else(namespace == "[BP]", true = "biological_process", false = "cellular_component"))) %>% | ||
filter(namespace %in% ontology) %>% | ||
select(id, name, namespace, 4:ncol(.)) | ||
} | ||
|
||
# Write file | ||
write.table(x = out, file = outfile, quote = FALSE, sep = "\t"); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
<tool id="mt2mq" name="MT2MQ" version="1.0"> | ||
<description>Tool to prepare metatranscriptomic outputs from ASaiM for Metaquantome</description> | ||
<requirements> | ||
<requirement type="package" version="1.2.1">r-tidyverse</requirement> | ||
</requirements> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
#if $options.mode == "f" or $options.mode == "ft": | ||
Rscript '$__tool_directory__/MT2MQ.R' '$options.input_files' '$options.mode' $options.ontology '$mq_output' | ||
#elif $options.mode == "t": | ||
mkdir in_dir | ||
#for $input in $options.input_files: | ||
&& cp '$input' 'in_dir/${input.name.rsplit('.',1)[0]}' | ||
#end for | ||
&& Rscript '$__tool_directory__/MT2MQ.R' in_dir t NA '$mq_output' | ||
#end if | ||
]]> | ||
</command> | ||
|
||
<inputs> | ||
<conditional name="options"> | ||
<param type="select" display="radio" name="mode" label="Mode"> | ||
<option value="f">Functional</option> | ||
<option value="t" selected="true">Taxonomic</option> | ||
<option value="ft">Functional-Taxonomic</option> | ||
</param> | ||
<when value="t"> | ||
<param name="ontology" type="hidden" value="NA" /> | ||
<param type="data" name="input_files" format="tsv,tabular,txt" label="Files from ASaiM for all samples (named after sample)" multiple="true" /> | ||
</when> | ||
<when value="f"> | ||
<param type="select" name="ontology" label="GO namespace" multiple="true" optional="false"> | ||
<option value="molecular_function">molecular function</option> | ||
<option value="biological_process">biological proces</option> | ||
<option value="cellular_component">cellular component</option> | ||
</param> | ||
<param type="data" name="input_files" format="tsv,tabular,txt" label="File from HUMAnN2 after regrouping, renaming, joining, and renormalizing" /> | ||
</when> | ||
<when value="ft"> | ||
<param type="select" name="ontology" label="GO namespace" multiple="true" optional="false"> | ||
<option value="molecular_function">molecular function</option> | ||
<option value="biological_process">biological proces</option> | ||
<option value="cellular_component">cellular component</option> | ||
</param> | ||
<param type="data" name="input_files" format="tsv,tabular,txt" label="File from HUMAnN2 after regrouping, renaming, joining, and renormalizing" /> | ||
</when> | ||
</conditional> | ||
</inputs> | ||
|
||
<outputs> | ||
<data name="mq_output" format="tabular" label="${options.mode}_output.tabular"/> | ||
</outputs> | ||
|
||
|
||
<tests> | ||
<test> | ||
<conditional name="options"> | ||
<param name="mode" value="t"/> | ||
<param name="input_files" value="T4A.tsv,T4B.tsv,T4C.tsv,T7A.tsv,T7B.tsv,T7C.tsv" ftype="tsv"/> | ||
<param name="ontology" value="NA"/> | ||
</conditional> | ||
<output name="mq_output"> | ||
<assert_contents> | ||
<has_text text="rank"/> | ||
<has_text text="genus"/> | ||
<has_text text="Clostridium"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<test> | ||
<conditional name="options"> | ||
<param name="mode" value="f"/> | ||
<param name="input_files" value="T4T7_func.tsv" ftype="tsv"/> | ||
<param name="ontology" value="molecular_function"/> | ||
</conditional> | ||
<output name="mq_output"> | ||
<assert_contents> | ||
<has_text text="namespace"/> | ||
<has_text text="molecular_function"/> | ||
<has_text text="0000014"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<test> | ||
<conditional name="options"> | ||
<param name="mode" value="ft"/> | ||
<param name="input_files" value="T4T7_func.tsv" ftype="tsv"/> | ||
<param name="ontology" value="biological_process"/> | ||
</conditional> | ||
<output name="mq_output"> | ||
<assert_contents> | ||
<has_text text="namespace"/> | ||
<has_text text="genus"/> | ||
<has_text text="biological_process"/> | ||
<has_text text="Clostridium"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
|
||
|
||
|
||
|
||
<help><![CDATA[ | ||
**MT2MQ**: Metatranscriptomics to Metaquantome | ||
---------------------------------------------------- | ||
MT2MQ is a tool to prepare metatrascriptomic results from ASaiM for further analysis with MetaQuantome, which currently only supports metaproteomic data. This tool has three modes: | ||
- **Taxonomic**: takes in genus-level MetaPhlAn2 results for each sample. The input files should be named as the sample. | ||
- Output: a single tabular file formatted for use as input for Metaquantome's taxonomic mode. | ||
- **Functional**: takes in a single file of HUMAnN2 results, regrouped and renamed to GO terms, with all samples joined together into one table, and renormalized to CPM. See the MT2MQ functional workflow for these processing steps. User can choose which GO namespace(s) to include. | ||
- Output: a single tabular file formatted for use as input for Metaquantome's functional mode. | ||
- **Functional/taxonomic**: takes the same input as the functional mode. User can choose which GO namespace(s) to include. | ||
- Output: a single tabular file including all GO terms and the taxa which express them and their abundances for each sample. This file *cannot* be used as input for Metaquantome. | ||
**Outputs**: | ||
------------ | ||
MT2MQ produces a single tabular output, formatted to be used as input for Metaquantome or for other analysis. | ||
]]></help> | ||
|
||
<citations> | ||
<citation type="bibtex"> | ||
@misc{MT2MQ, | ||
author={Crane, Marie}, | ||
year={2020}, | ||
title={Metatranscriptomics to MetaQuantome} | ||
} | ||
</citation> | ||
</citations> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
genus abundance | ||
Clostridium 68.36603 | ||
Coprothermobacter 31.23635 | ||
Methanothermobacter 0.3807 | ||
Escherichia 0.01692 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
genus abundance | ||
Clostridium 60.78776 | ||
Coprothermobacter 38.9515 | ||
Methanothermobacter 0.26075 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
genus abundance | ||
Clostridium 68.49482 | ||
Coprothermobacter 31.0739 | ||
Methanothermobacter 0.43128 |
Oops, something went wrong.