Skip to content

migrate in emboss_5, fastqc, freebayes, picard from devteam #1109

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jan 28, 2017
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
294 changes: 294 additions & 0 deletions macros/read_group_macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
<macros>
<!-- Import this at the top of your command block and then
define rg_auto_name. -->
<token name="@define_read_group_helpers@">
#def identifier_or_name($input1)
#if hasattr($input1, 'element_identifier')
#return $input1.element_identifier
#else
#return $input1.name.rstrip('.gz').rstrip('.fastq').rstrip('.fq')
#end if
#end def

#def clean(name)
#import re
#set $name_clean = re.sub('[^\w\-_\.]', '_', $name)
#return $name_clean
#end def

#def read_group_name_default($input1, $input2=None)
#if $input2 is None
#return $clean($identifier_or_name($input1))
#else
#import itertools
#set $input_name1 = $clean($identifier_or_name($input1))
#set $input_name2 = $clean($identifier_or_name($input2))
#set $common_prefix = ''.join([c[0] for c in itertools.takewhile(lambda x: all(x[0] == y for y in x), itertools.izip(*[$input_name1, $input_name2]))])
#if len($common_prefix) > 3
#return $common_prefix
#else
#return $input_name1
#end if
#end if
#end def

#def format_read_group(prefix, value, quote='', arg='')
#if $value
#return $arg + $quote + $prefix + $value + $quote
#else
#return ''
#end if
#end def

#def rg_param(name)
#if $varExists("rg")
#return $rg.get($name, None)
#else
#return $getVar($name, None)
#end if
#end def

#set $use_rg = True
</token>
<!-- preconditions use_rg and rg_auto_name have been
defined.
-->
<token name="@set_read_group_vars@">
#if $use_rg
#if $rg_param('read_group_id_conditional') is None
#set $rg_id = $rg_auto_name
#elif $rg_param('read_group_id_conditional').do_auto_name
#set $rg_id = $rg_auto_name
#else
#set $rg_id = str($rg_param('read_group_id_conditional').ID)
#end if

#if $rg_param('read_group_sm_conditional') is None
#set $rg_sm = ''
#elif $rg_param('read_group_sm_conditional').do_auto_name
#set $rg_sm = $rg_auto_name
#else
#set $rg_sm = str($rg_param('read_group_sm_conditional').SM)
#end if

#if $rg_param('PL')
#set $rg_pl = str($rg_param('PL'))
#else
#set $rg_pl = ''
#end if

#if $rg_param('read_group_lb_conditional') is None
#set $rg_lb = ''
#elif $rg_param('read_group_lb_conditional').do_auto_name
#set $rg_lb = $rg_auto_name
#else
#set $rg_lb = str($rg_param('read_group_lb_conditional').LB)
#end if

#if $rg_param('CN')
#set $rg_cn = str($rg_param('CN'))
#else
#set $rg_cn = ''
#end if

#if $rg_param("DS")
#set $rg_ds = str($rg_param("DS"))
#else
#set $rg_ds = ''
#end if

#if $rg_param("DT")
#set $rg_dt = str($rg_param("DT"))
#else
#set $rg_dt = ''
#end if

#if $rg_param("FO")
#set $rg_fo = str($rg_param("FO"))
#else
#set $rg_fo = ''
#end if

#if $rg_param("KS")
#set $rg_ks = str($rg_param("KS"))
#else
#set $rg_ks = ''
#end if

#if $rg_param("PG")
#set $rg_pg = str($rg_param("PG"))
#else
#set $rg_pg = ''
#end if

#if $rg_param("PI") != None
#set $rg_pi = str($rg_param("PI"))
#else
#set $rg_pi = ''
#end if

#if $rg_param("PU")
#set $rg_pu = str($rg_param("PU"))
#else
#set $rg_pu = ''
#end if
#end if
</token>
<token name="@set_use_rg_var@">
#set $use_rg = str($rg.rg_selector) != "do_not_set"
</token>
<xml name="read_group_auto_name_conditional">
<param name="do_auto_name" type="boolean" label="Auto-assign" help="Use dataset name or collection information to automatically assign this value" checked="no" />
<when value="true">
</when>
<when value="false">
<yield />
</when>
</xml>
<xml name="read_group_id_param">
<param name="ID" type="text" value="" label="Read group identifier (ID)" help="This value must be unique among multiple samples in your experiment" optional="false">
<validator type="empty_field" />
</param>
</xml>
<xml name="read_group_id_conditional">
<conditional name="read_group_id_conditional">
<expand macro="read_group_auto_name_conditional">
<expand macro="read_group_id_param" />
</expand>
</conditional>
</xml>
<xml name="read_group_sm_param">
<param name="SM" type="text" value="" label="Read group sample name (SM)" help="This value should be descriptive. Use pool name where a pool is being sequenced" />
</xml>
<xml name="read_group_sm_conditional">
<conditional name="read_group_sm_conditional">
<expand macro="read_group_auto_name_conditional">
<expand macro="read_group_sm_param" />
</expand>
</conditional>
</xml>
<!-- Above SM param is optional (for SAM/BAM spec, this is required
as per Picard.
-->
<xml name="read_group_sm_param_required">
<param name="SM" type="text" value="" label="Read group sample name (SM)" optional="false" help="This value should be descriptive. Use pool name where a pool is being sequenced">
<validator type="empty_field" />
</param>
</xml>
<xml name="read_group_sm_required_conditional">
<conditional name="read_group_sm_conditional">
<expand macro="read_group_auto_name_conditional">
<expand macro="read_group_sm_param" />
</expand>
</conditional>
</xml>
<xml name="read_group_pl_param">
<param name="PL" type="select" label="Platform/technology used to produce the reads (PL)">
<option value="CAPILLARY">CAPILLARY</option>
<option value="LS454">LS454</option>
<option selected="True" value="ILLUMINA">ILLUMINA</option>
<option value="SOLID">SOLID</option>
<option value="HELICOS">HELICOS</option>
<option value="IONTORRENT">IONTORRENT</option>
<option value="PACBIO">PACBIO</option>
</param>
</xml>
<xml name="read_group_lb_param">
<param name="LB" type="text" label="Library name (LB)" optional="true" />
</xml>
<xml name="read_group_lb_conditional">
<conditional name="read_group_lb_conditional">
<expand macro="read_group_auto_name_conditional">
<expand macro="read_group_lb_param" />
</expand>
</conditional>
</xml>
<xml name="read_group_lb_required_param">
<param name="LB" type="text" label="Library name (LB)" optional="false">
<validator type="empty_field" />
</param>
</xml>
<xml name="read_group_lb_required_conditional">
<conditional name="read_group_lb_conditional">
<expand macro="read_group_auto_name_conditional">
<expand macro="read_group_lb_required_param" />
</expand>
</conditional>
</xml>
<xml name="read_group_cn_param">
<param name="CN" type="text" label="Sequencing center that produced the read (CN)" />
</xml>
<xml name="read_group_ds_param">
<param name="DS" type="text" label="Description (DS)" />
</xml>
<xml name="read_group_dt_param">
<param name="DT" type="text" label="Date that run was produced (DT)" help="ISO8601 format date or date/time, like YYYY-MM-DD" />
</xml>
<xml name="read_group_fo_param">
<param name="FO" type="text" optional="true" label="Flow order (FO)" help="The array of nucleotide bases that correspond to the nucleotides used for each flow of each read. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by various other characters. Format: /\*|[ACMGRSVTWYHKDBN]+/">
<validator type="regex" message="Invalid flow order">\*|[ACMGRSVTWYHKDBN]+$</validator>
</param>
</xml>
<xml name="read_group_ks_param">
<param name="KS" type="text" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" />
</xml>
<xml name="read_group_pg_param">
<param name="PG" type="text" label="Programs used for processing the read group (PG)" />
</xml>
<xml name="read_group_pi_param">
<param name="PI" type="integer" optional="true" label="Predicted median insert size (PI)" />
</xml>
<xml name="read_group_pu_param">
<param name="PU" type="text" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="True" />
</xml>
<xml name="read_group_pu_required_param">
<param name="PU" type="text" label="Platform unit (PU)" help="Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" optional="False" />
</xml>
<!-- Only ID is required - all groups available -->
<xml name="read_group_inputs_spec">
<expand macro="read_group_id_conditional" />
<expand macro="read_group_sm_conditional" />
<expand macro="read_group_pl_param" />
<expand macro="read_group_lb_conditional" />
<expand macro="read_group_cn_param" />
<expand macro="read_group_ds_param" />
<expand macro="read_group_dt_param" />
<expand macro="read_group_fo_param" />
<expand macro="read_group_ks_param" />
<expand macro="read_group_pg_param" />
<expand macro="read_group_pi_param" />
<expand macro="read_group_pu_param" />
</xml>
<!-- ID, SM, LB, PU, PL all required - not ks, pg, or fo params. -->
<xml name="read_group_inputs_picard">
<expand macro="read_group_id_conditional" />
<expand macro="read_group_sm_required_conditional" />
<expand macro="read_group_lb_required_conditional" />
<expand macro="read_group_pl_param" />
<expand macro="read_group_pu_required_param" />
<expand macro="read_group_cn_param" />
<expand macro="read_group_ds_param" />
<expand macro="read_group_pi_param" />
<expand macro="read_group_dt_param" />
</xml>
<xml name="read_group_conditional">
<conditional name="rg">
<param name="rg_selector" type="select" label="Set read groups information?" help="Specifying read group information can greatly simplify your downstream analyses by allowing combining multiple datasets.">
<option value="set">Set read groups (SAM/BAM specification)</option>
<option value="set_picard">Set read groups (Picard style)</option>
<option value="set_id_auto">Automatically assign ID</option>
<option value="do_not_set" selected="True">Do not set</option>
</param>
<when value="set_picard">
<expand macro="read_group_inputs_picard" />
</when>
<when value="set">
<expand macro="read_group_inputs_spec" />
</when>
<when value="set_id_auto">
</when>
<when value="do_not_set">
</when>
</conditional>
</xml>
</macros>
11 changes: 11 additions & 0 deletions tools/emboss_5/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
categories:
- Sequence Analysis
- Fasta Manipulation
description: Galaxy wrappers for EMBOSS version 5.0.0 tools
homepage_url: http://emboss.open-bio.org/
long_description: |
The European Molecular Biology Open Software Suite (EMBOSS) is a high quality, well documented package of open source software tools for molecular biology. It includes over 200 applications for molecular sequence analysis and other common tasks in bioinformatics.
name: emboss_5
owner: devteam
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/emboss_5
type: unrestricted
48 changes: 48 additions & 0 deletions tools/emboss_5/emboss_antigenic.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<tool id="EMBOSS: antigenic1" name="antigenic" version="5.0.0.1">
<description>Predicts potentially antigenic regions of a protein sequence, using the method of Kolaskar and Tongaonkar.</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<code file="emboss_format_corrector.py" />
<command>antigenic -sequence '$input1' -outfile '$out_file1' -minlen $minlen -rformat2 $out_format1 -auto</command>
<inputs>
<param name="input1" type="data" format="data" label="On query" />
<param name="minlen" type="integer" value="6" label="Minimum length of region" />
<param name="out_format1" type="select" label="Output format">
<option value="gff">GFF</option>
<option value="pir">PIR</option>
<option value="swiss">SwissProt</option>
<option value="dbmotif">DbMotif</option>
<option value="diffseq">diffseq</option>
<option value="excel">Excel (TAB Delimited)</option>
<option value="feattable">FeatTable</option>
<option value="motif">Motif</option>
<option value="nametable">NameTable</option>
<option value="regions">Regions</option>
<option value="seqtable">SeqTable</option>
<option value="simple">SRS simple</option>
<option value="srs">SRS</option>
<option value="table">Table</option>
<option value="tagseq">Tagseq</option>
<option value="antigenic">Antigenic Output File</option>
</param>
</inputs>
<outputs>
<data name="out_file1" format="antigenic" />
</outputs>
<tests>
<test>
<param name="input1" value="2.fasta"/>
<param name="minlen" value="6"/>
<param name="out_format1" value="excel"/>
<output name="out_file1" file="emboss_antigenic_out.tabular"/>
</test>
</tests>
<help>
You can view the original documentation here_.

.. _here: http://galaxy-iuc.github.io/emboss-5.0-docs/antigenic.html
</help>
<expand macro="citations" />
</tool>
Loading