Skip to content

Commit

Permalink
Added the profile2cami tool, a component of the TaxonKit suite. (#6085)
Browse files Browse the repository at this point in the history
* Added the profile2cami tool, a component of the TaxonKit suite.

* Renamed shed.yml -> .shed.yml

* Shrinked delnodes.dmp 684kb -> 150kb

* Renamed test.loc -> ncbi_taxonomy.loc

* Adjusted taxonkit, removed folder profile2cami, added suite in .shed.yml

* Updated Taxonkit to version 0.17.0 - worked on issues.

* Renamed taxonomy.loc -> taxonomy.loc.test

* Adjusted test, adjusted loc.sample

* Try to fix linting error

* Resolving linting error

* Adjusted the output label.
  • Loading branch information
Albert-Ber authored Jul 26, 2024
1 parent 3cce4c7 commit 695ea58
Show file tree
Hide file tree
Showing 20 changed files with 15,768 additions and 0 deletions.
18 changes: 18 additions & 0 deletions tools/taxonkit/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: taxonkit
owner: iuc
description: TaxonKit - A Practical and Efficient NCBI Taxonomy Toolkit
homepage_url: https://bioinf.shenwei.me/taxonkit/
long_description: |
TaxonKit is a set of tools for analyzing and manipulating taxonomic data. It includes utilities for converting metagenomic profile tables to CAMI format, among other functionalities.
remote_repository_url: https://github.com/shenwei356/taxonkit
categories:
- Metagenomics
type: unrestricted
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "Wrapper for TaxonKit function: {{ tool_name }}."
suite:
name: "suite_taxonkit"
description: "A suite of tools that brings the TaxonKit project into Galaxy."
long_description: |
TaxonKit is a set of tools for analyzing and manipulating taxonomic data, including converting metagenomic profile tables to CAMI format.
22 changes: 22 additions & 0 deletions tools/taxonkit/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<macros>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">taxonkit</requirement>
<yield/>
</requirements>
</xml>
<token name="@TOOL_VERSION@">0.17.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">21.05</token>
<xml name="biotools">
<xrefs>
<xref type="bio.tools">taxonkit</xref>
</xrefs>
</xml>
<xml name="citations">
<citations>
<citation type="doi">10.1016/j.jgg.2021.03.006</citation>
<yield/>
</citations>
</xml>
</macros>
111 changes: 111 additions & 0 deletions tools/taxonkit/taxonkit_profile2cami.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
<tool id="profile2cami" name="Profile2CAMI" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>Convert metagenomic profile table to CAMI format</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="biotools"/>
<expand macro="requirements" />
<command detect_errors="exit_code">
<![CDATA[
taxonkit profile2cami
--data-dir '${taxonomy.fields.path}'
--abundance-field '${abundance_field}'
--taxid-field '${taxid_field}'
$percentage
$recompute_abd
$keep_zero
$no_sum_up
#if $sample_id:
-s '${sample_id}'
#end if
#if $taxonomy_id:
-t '${taxonomy_id}'
#end if
#if $ranks:
--show-rank '${ranks}'
#end if
${input_file}
> '${cami_output}'
]]>
</command>
<inputs>
<param name="input_file" type="data" format="txt" label="Input Profile File" help="A tab-delimited profile file with TaxId and abundance columns." />
<param argument="--taxonomy" type="select" label="NCBI taxonomy" help="This NCBI database is used to map human-readable taxon names to TaxId's.">
<options from_data_table="ncbi_taxonomy">
<validator message="No NCBI database is available" type="no_options"/>
</options>
</param>
<param name="abundance_field" type="integer" value="2" label="Abundance Field Index" help="Field index of abundance in the input data." />
<param name="taxid_field" type="integer" value="1" label="TaxId Field Index" help="Field index of TaxId in the input data." />
<param name="percentage" type="boolean" value="false" label="Abundance in Percentage" help="Check if the abundance values are in percentage." truevalue="-p" falsevalue=""/>
<param name="recompute_abd" type="boolean" value="false" label="Recompute Abundance" help="Check to recompute abundance if some TaxIds are deleted in the current taxonomy version." truevalue="-R" falsevalue=""/>
<param name="keep_zero" type="boolean" value="false" label="Keep Zero Abundances" help="Check to keep taxons with abundance of zero." truevalue="-0" falsevalue=""/>
<param name="no_sum_up" type="boolean" value="false" label="Do Not Sum Up Abundance" help="Do not sum up abundance from child to parent TaxIds." truevalue="-S" falsevalue="" />
<param name="sample_id" type="text" value="" label="Sample ID" help="Optional sample ID to include in the result file." />
<param name="taxonomy_id" type="text" value="" label="Taxonomy ID" help="Optional taxonomy ID to include in the result file." />
<param name ="ranks" argument="--show-rank" type="select" multiple="true" label="Show Ranks" help="Specify the ranks to show in the result file (default [superkingdom,phylum,class,order,family,genus,species,strain]).">
<option value="superkingdom">Superkingdom</option>
<option value="phylum">Phylum</option>
<option value="class">Class</option>
<option value="order">Order</option>
<option value="family">Family</option>
<option value="genus">Genus</option>
<option value="species">Species</option>
<option value="strain">Strain</option>
</param>
</inputs>
<outputs>
<data name="cami_output" format="tsv" label="${tool.name} on ${on_string}" />
</outputs>
<tests>
<!-- Test 1: Basic functionality with default parameters -->
<test expect_num_outputs="1">
<param name="input_file" value="abundance.tsv" ftype="tsv" />
<output name="cami_output" file="output1_basic_functionality.tsv" />
</test>

<!-- Test 2: Using percentage flag -->
<test expect_num_outputs="1">
<param name="input_file" value="abundance.tsv" ftype="tsv" />
<param name="percentage" value="true" />
<output name="cami_output" file="output2_percentage_flag.tsv" />
</test>

<!-- Test 3: Recomputing abundance with deleted TaxIds -->
<test expect_num_outputs="1">
<param name="input_file" value="abundance.tsv" ftype="tsv" />
<param name="recompute_abd" value="true" />
<output name="cami_output" file="output3_recompute_abd.tsv" />
</test>

<!-- Test 4: Profile2Cami with all parameters checked -->
<test expect_num_outputs="1">
<param name="input_file" value="abundance.tsv" ftype="tsv" />
<param name="percentage" value="true" />
<param name="recompute_abd" value="true" />
<param name="keep_zero" value="true" />
<param name="no_sum_up" value="true" />
<output name="cami_output" file="output4_all_param.tsv" />
</test>
</tests>
<help>
<![CDATA[
**What is Profile2CAMI**
Profile2CAMI is a tool for converting metagenomic profile tables to CAMI format.
**Inputs**
- A tab-delimited profile file with TaxId and abundance columns.
**Outputs**
- A CAMI formatted file.
For more information, please refer to the tool's documentation.
]]>
</help>
<expand macro="citations" />
</tool>
4 changes: 4 additions & 0 deletions tools/taxonkit/test-data/abundance.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
83333 0.2 merged to 562
83333 0.2 absord 562
561 0.5 no change
91347 0.1 deleted
1 change: 1 addition & 0 deletions tools/taxonkit/test-data/ncbi_taxonomy.loc.test
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test-db-tox Test Database ${__HERE__}/test-db
12 changes: 12 additions & 0 deletions tools/taxonkit/test-data/output1_basic_functionality.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@SampleID:
@Version:0.10.0
@Ranks:superkingdom|phylum|class|order|family|genus|species|strain
@TaxonomyID:
@@TAXID RANK TAXPATH TAXPATHSN PERCENTAGE
2 superkingdom 2 Bacteria 100.000000000000000
1224 phylum 2|1224 Bacteria|Proteobacteria 100.000000000000000
1236 class 2|1224|1236 Bacteria|Proteobacteria|Gammaproteobacteria 100.000000000000000
91347 order 2|1224|1236|91347 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales 100.000000000000000
543 family 2|1224|1236|91347|543 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae 90.000000000000000
561 genus 2|1224|1236|91347|543|561 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia 90.000000000000000
562 species 2|1224|1236|91347|543|561|562 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia|Escherichia coli 40.000000000000000
12 changes: 12 additions & 0 deletions tools/taxonkit/test-data/output2_percentage_flag.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@SampleID:
@Version:0.10.0
@Ranks:superkingdom|phylum|class|order|family|genus|species|strain
@TaxonomyID:
@@TAXID RANK TAXPATH TAXPATHSN PERCENTAGE
2 superkingdom 2 Bacteria 1.000000000000000
1224 phylum 2|1224 Bacteria|Proteobacteria 1.000000000000000
1236 class 2|1224|1236 Bacteria|Proteobacteria|Gammaproteobacteria 1.000000000000000
91347 order 2|1224|1236|91347 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales 1.000000000000000
543 family 2|1224|1236|91347|543 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae 0.900000000000000
561 genus 2|1224|1236|91347|543|561 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia 0.900000000000000
562 species 2|1224|1236|91347|543|561|562 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia|Escherichia coli 0.400000000000000
12 changes: 12 additions & 0 deletions tools/taxonkit/test-data/output3_recompute_abd.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@SampleID:
@Version:0.10.0
@Ranks:superkingdom|phylum|class|order|family|genus|species|strain
@TaxonomyID:
@@TAXID RANK TAXPATH TAXPATHSN PERCENTAGE
2 superkingdom 2 Bacteria 190.000000000000000
1224 phylum 2|1224 Bacteria|Proteobacteria 190.000000000000000
1236 class 2|1224|1236 Bacteria|Proteobacteria|Gammaproteobacteria 190.000000000000000
91347 order 2|1224|1236|91347 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales 190.000000000000000
543 family 2|1224|1236|91347|543 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae 90.000000000000000
561 genus 2|1224|1236|91347|543|561 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia 90.000000000000000
562 species 2|1224|1236|91347|543|561|562 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia|Escherichia coli 40.000000000000000
12 changes: 12 additions & 0 deletions tools/taxonkit/test-data/output4_all_param.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@SampleID:
@Version:0.10.0
@Ranks:superkingdom|phylum|class|order|family|genus|species|strain
@TaxonomyID:
@@TAXID RANK TAXPATH TAXPATHSN PERCENTAGE
2 superkingdom 2 Bacteria 0.500000000000000
1224 phylum 2|1224 Bacteria|Proteobacteria 0.500000000000000
1236 class 2|1224|1236 Bacteria|Proteobacteria|Gammaproteobacteria 0.500000000000000
91347 order 2|1224|1236|91347 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales 0.500000000000000
543 family 2|1224|1236|91347|543 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae 0.500000000000000
561 genus 2|1224|1236|91347|543|561 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia 0.500000000000000
562 species 2|1224|1236|91347|543|561|562 Bacteria|Proteobacteria|Gammaproteobacteria|Enterobacteriales|Enterobacteriaceae|Escherichia|Escherichia coli 0.400000000000000
Loading

0 comments on commit 695ea58

Please sign in to comment.