galaxyproject · Albert-Ber · Jun 14, 2024 · Jun 14, 2024 · Jun 17, 2024 · Jun 17, 2024
diff --git a/tools/cami_opal/.shed.yml b/tools/cami_opal/.shed.yml
@@ -0,0 +1,12 @@
+name: cami_opal
+owner: iuc
+description: Evaluation package for metagenome taxonomic assignments
+homepage_url: https://github.com/CAMI-challenge/OPAL
+long_description: |
+  OPAL is an evaluation package designed for assessing metagenome taxonomic assignments.
+  It provides performance metrics, results rankings, and comparative visualizations
+  for evaluating multiple programs or parameter effects on metagenome taxonomic assignments.
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/opal/
+type: unrestricted
+categories: 
+  - Metagenomics
diff --git a/tools/cami_opal/cami_opal.xml b/tools/cami_opal/cami_opal.xml
@@ -0,0 +1,293 @@
+<tool id="cami_opal" name="CAMI OPAL" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Evaluation tool for multiple read-based metagenomic taxonomic profilers</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code">
+        <![CDATA[
+            ## Set environment variable to ignore specific Python warnings
+            export PYTHONWARNINGS="ignore::FutureWarning" &&
+
+            #import re
+
+            ## Define the path for the input files and create directories
+            mkdir -p inputs &&
+            #set $labels = []
+
+            ## Create symbolic links for input files in the 'inputs' directory
+            #for $i, $file in enumerate($input_files):
+                #set safe_identifier = re.sub('[^\w\-\.]', '_', $file.element_identifier)
+                ln -s '$file' 'inputs/${safe_identifier}' &&
+                $labels.append($file.element_identifier)
+            #end for
+
+            opal.py 
+            -g '${gold_standard_file}' 
+
+            #for $i, $file in enumerate($input_files):
+                #set safe_identifier = re.sub('[^\w\-\.]', '_', $file.element_identifier)
+                'inputs/${safe_identifier}'
+            #end for
+
+            -l '${','.join($labels)}'
+
+            $normalize
+
+            #if $filter:
+                -f '${filter}' 
+            #end if
+
+            $plot_abundances
+
+            #if $desc:
+                -d '${desc}' 
+            #end if
+            #if $ranks:
+                -r '${ranks}' 
+            #end if
+            #if $metrics_plot_rel:
+                --metrics_plot_rel '${metrics_plot_rel}' 
+            #end if
+            #if $metrics_plot_abs:
+                --metrics_plot_abs '${metrics_plot_abs}' 
+            #end if
+            #if $branch_length_function:
+                -b '${branch_length_function}' 
+            #end if
+
+            $normalized_unifrac
+
+            -o output
+
+            #if $html_output
+                ## Copy the results to the specified output folder
+                && mkdir '$htmlreport.extra_files_path'
+                && cp output/results.html $htmlreport
+                && cp -r output/* '$htmlreport.extra_files_path'
+            #end if
+        ]]>
+    </command>
+    <inputs>
+        <param name="gold_standard_file" type="data" format="txt" label="Gold standard file"
+            help="Input the gold standard file here. Format: CAMI Profiling Bioboxes." />
+        <param name="input_files" type="data" format="txt" multiple="true" label="Input files"
+            help="Enter multiple input files. Format: CAMI Profiling Bioboxes. If your files are not in this format, you can use the 'profile2cami' tool to convert them to the CAMI Profiling format." />
+        <param name="html_output" type="boolean" label="Output in HTML format" 
+            help="Select this option to generate an HTML file that contains the analysis results."
+            truevalue="--html_output" falsevalue="" checked="true" />
+        <param name="output_collections" type="boolean" label="Generate tool and rank output collections" 
+            help="Select this option to generate collections of tool-specific and rank-specific tables."
+            truevalue="true" falsevalue="false" checked="true" />
+        <param argument="-n" name="normalize" type="boolean" optional="true"
+            label="Normalize samples" 
+            help="Normalize the samples to compare them on the same scale."
+            truevalue="-n" falsevalue="" />
+        <param argument="--filter" type="float" value="0" optional="true"
+            label="Filter out predictions with the smallest relative abundances summing up to this percentage within a rank"
+            help="This parameter allows you to filter out the predictions with the smallest relative abundances, such that their cumulative sum is equal to the specified percentage within a taxonomic rank. The value should be between 0 and 100."
+            min="0" max="100" />
+        <param name="plot_abundances" type="boolean" optional="true"
+            label="Plot abundances in the gold standard"
+            help="Plot abundances in the gold standard (can take some minutes)"
+            truevalue="-p" falsevalue="" />
+        <param argument="--desc" type="text" value=""
+            label="HTML description"
+            help="Enter the HTML page description here" />
+        <param argument="--ranks" type="select" multiple="true" label="Taxonomic ranks"
+            help="Choose the highest and lowest taxonomic ranks to consider in performance rankings.">
+            <option value="superkingdom">Superkingdom</option>
+            <option value="phylum">Phylum</option>
+            <option value="class">Class</option>
+            <option value="order">Order</option>
+            <option value="family">Family</option>
+            <option value="genus">Genus</option>
+            <option value="species">Species</option>
+            <option value="strain">Strain</option>
+        </param>
+        <param argument="--metrics_plot_rel" type="select" multiple="true" label="Metrics for relative performance plot"
+            help="Select metrics to include in the spider plot of relative performances.">
+            <option value="w">Weighted Unifrac</option>
+            <option value="l">L1 Norm</option>
+            <option value="c">Completeness</option>
+            <option value="p">Purity</option>
+            <option value="f">False Positives</option>
+            <option value="t">True Positives</option>
+        </param>
+        <param argument="--metrics_plot_abs" type="select" multiple="true" optional="true"
+            label="Metrics for spider plot of absolute performances"
+            help="Select valid metrics for the spider plot of absolute performances.">
+            <option value="c">Completeness</option>
+            <option value="p">Purity</option>
+            <option value="b">Bray-Curtis</option>
+        </param>
+        <param argument="--branch_length_function" type="text" value="" optional="true"
+            label="UniFrac tree branch length function"
+            help="Default: 'lambda x: 1/x', where x=tree depth" />
+        <param name="normalized_unifrac" type="boolean" optional="true"
+            label="Compute normalized version of weighted UniFrac"
+            help="Compute normalized version of weighted UniFrac by dividing by the theoretical max unweighted UniFrac"
+            truevalue="--normalized_unifrac" falsevalue="" />
+    </inputs>
+    <outputs>
+        <data format="html" name="htmlreport" label="${tool.name} on ${on_string}: HTML report" >
+            <filter>html_output</filter>
+        </data>
+        <data name="result" format="tabular" from_work_dir="output/results.tsv" label="${tool.name} on ${on_string}: Results" />
+        <collection name="rank_output" type="list" label="${tool.name}: Rank tables" >
+            <filter>output_collections</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" directory="output/by_rank" format="tabular"/>
+        </collection>
+        <collection name="tool_output" type="list" label="${tool.name}: Tool tables" >
+            <filter>output_collections</filter>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv" directory="output/by_tool/" format="tabular"/>
+        </collection>
+    </outputs>
+    <tests>
+        <!-- Test basic functionality with one input file and default parameters -->
+        <test expect_num_outputs="1">
+            <param name="gold_standard_file" value="gs_test.profile" />
+            <param name="input_files" value="motus_test.profile,metaphlan2_test.profile" />
+            <param name="html_output" value="false" />
+            <param name="output_collections" value="false"/>
+            <param name="normalize" value="false"/>
+            <output name="result" ftype="tabular">
+                <assert_contents>
+                    <has_text text="Gold standard" />
+                    <has_text text="metaphlan2_test.profile"/>
+                    <has_text text="motus_test.profile"/>
+                </assert_contents> 
+            </output>   
+        </test>
+
+        <!-- Test with HTML output enabled -->
+        <test expect_num_outputs="2">
+            <param name="gold_standard_file" value="gs_test.profile" />
+            <param name="input_files" value="motus_test.profile,metaphlan2_test.profile" />
+            <param name="desc" value="Test description for OPAL"/>
+            <param name="html_output" value="true"/>
+            <param name="output_collections" value="false"/>
+            <output name="htmlreport" ftype="html">
+                <assert_contents>
+                    <has_text text="Test description for OPAL" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- Test with all parameters enabled -->
+        <test expect_num_outputs="4">
+            <param name="gold_standard_file" value="gs_test.profile" />
+            <param name="input_files" value="motus_test.profile,metaphlan2_test.profile,metaphyler_test.profile" />  
+            <param name="normalize" value="true"/>
+            <param name="filter" value="5"/>
+            <param name="plot_abundances" value="true"/>
+            <param name="desc" value="Test description for OPAL"/>
+            <param name="ranks" value="superkingdom,species"/>
+            <param name="metrics_plot_rel" value="w,l,c,p,f,t"/>
+            <param name="metrics_plot_abs" value="c,p,b"/>
+            <param name="branch_length_function" value="lambda x: 1/x"/>
+            <param name="normalized_unifrac" value="true"/>
+            <param name="html_output" value="true"/>
+            <param name="output_collections" value="true"/>
+            <output name="htmlreport" ftype="html">
+                <assert_contents>
+                    <has_text text="Test description for OPAL" />
+                </assert_contents>
+            </output>
+            <output name="result" ftype="tabular">
+                <assert_contents>
+                    <has_text text="Gold standard" />
+                    <has_text text="metaphlan2_test.profile"/>
+                    <has_text text="motus_test.profile"/>
+                    <has_text text="metaphyler_test.profile"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- Test with all multiple samples in Test-Data -->
+        <test expect_num_outputs="2">
+            <param name="gold_standard_file" value="gs_test.profile" />
+            <param name="input_files" value="motus_test.profile,metaphlan2_test.profile,metaphyler_test.profile" />  
+            <param name="normalize" value="true"/>
+            <param name="filter" value="5"/>
+            <param name="plot_abundances" value="true"/>
+            <param name="desc" value="Test description for OPAL"/>
+            <param name="ranks" value="superkingdom,species"/>
+            <param name="metrics_plot_rel" value="w,l,c,p,f,t"/>
+            <param name="metrics_plot_abs" value="c,p,b"/>
+            <param name="branch_length_function" value="lambda x: 1/x"/>
+            <param name="normalized_unifrac" value="true"/>
+            <param name="html_output" value="true"/>
+            <param name="output_collections" value="false"/>
+            <output name="htmlreport" ftype="html">
+                <assert_contents>
+                    <has_text text="Test description for OPAL" />
+                </assert_contents>
+            </output>
+            <output name="result" ftype="tabular">
+                <assert_contents>
+                    <has_text text="Gold standard" />
+                    <has_text text="metaphlan2_test.profile"/>
+                    <has_text text="motus_test.profile"/>
+                    <has_text text="metaphyler_test.profile"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test with normalization enabled -->
+        <test expect_num_outputs="1">
+            <param name="gold_standard_file" value="gs_test.profile" />
+            <param name="input_files" value="kraken_test.profile" />
+            <param name="normalize" value="true"/>
+            <param name="html_output" value="false"/>
+            <param name="output_collections" value="false"/>
+            <output name="result" ftype="tabular" file="normalized_k.tsv" lines_diff="30" />
+        </test>
+
+        <!-- Test with normalization disabled -->
+        <test expect_num_outputs="1">
+            <param name="gold_standard_file" value="gs_test.profile" />
+            <param name="input_files" value="kraken_test.profile" />
+            <param name="normalize" value="false"/>
+            <param name="html_output" value="false"/>
+            <param name="output_collections" value="false"/>
+            <output name="result" ftype="tabular" file="not_normalized_k.tsv" lines_diff="30" />
+        </test>
+    </tests>    
+    <help>
+    <![CDATA[
+        .. class:: infomark
+
+        **What is OPAL**
+
+        OPAL is an evaluation package for the comparative assessment of metagenome benchmark datasets. It calculates multiple metrics per dataset and provides results rankings and visualizations for assessing multiple programs or parameter effects.
+
+        **What it does**
+
+        OPAL performs the following key tasks:
+        - Evaluates profiles using a gold standard file.
+        - Generates multiple metrics for each profile.
+        - Provides comparative visualizations and performance rankings.
+
+        For more information, please visit `OPAL on GitHub <https://github.com/CAMI-challenge/OPAL>`_.
+
+        **Input**
+
+        OPAL requires the following inputs:
+
+        1. **Gold Standard File**
+            - This file is essential for the evaluation and should be CAMI Profiling Bioboxes format.
+
+        2. **Profiles Files**
+            - Multiple profile files are required for evaluation. If your files are not in the required format, you can use the `profile2cami` tool to convert them to the CAMI Profiling format.
+
+        **Outputs**
+
+        OPAL generates the following outputs:
+
+        1. **HTML Report**: An HTML file containing visualizations and summary of the evaluation.
+        2. **Results File**: A TSV file with detailed evaluation metrics for each profile.
+    ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
diff --git a/tools/cami_opal/macros.xml b/tools/cami_opal/macros.xml
@@ -0,0 +1,22 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">cami-opal</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">1.0.12</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="biotools">
+        <xrefs>
+            <xref type="bio.tools">Open-community_Profiling_Assessment_tooL</xref>
+        </xrefs>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-022-01431-4</citation>
+            <yield/>
+        </citations>
+    </xml>
+</macros>