Skip to content

Commit

Permalink
Release/1.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
twestbrookunh committed Feb 7, 2017
1 parent c1181ba commit afb3976
Show file tree
Hide file tree
Showing 15 changed files with 69 additions and 79 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Change Log

## [1.3.2] - 2017-02-07
### Added
- Alignment command can now directly take a protein multi-FASTA and skip ORF detection (-p option)
- Prepare and alignment commands can now make use of a proxy server (HTTP/SOCKS) for contacting UniProt (-P option)

## [1.3.1] - 2017-01-01
### Added
- UniProt report now includes max mapping quality for each protein
Expand Down
30 changes: 21 additions & 9 deletions align.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,17 @@ int command_align(int argc, char *argv[]) {
FILE * reportPriStream = 0, * reportSecStream = 0;
char * readsProName = 0, * indexProName = 0, * prefixName = 0;
char * samName = 0, * reportPriName = 0, * reportSecName = 0;
const char * proxyAddress;

memset(&aux, 0, sizeof(ktp_aux_t));
memset(pes, 0, VALUE_DOMAIN * sizeof(mem_pestat_t));
for (i = 0; i < VALUE_DOMAIN; ++i) pes[i].failed = 1;

aux.opt = opt = mem_opt_init();
memset(&opt0, 0, sizeof(mem_opt_t));
proxyAddress = NULL;

while ((c = getopt(argc, argv, "1epabgnMCSPVYJjf:F:u:k:o:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) {
while ((c = getopt(argc, argv, "1epabgnMCSVYJjf:F:u:k:o:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:P:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
else if (c == 'u') opt->outputType = atoi(optarg);
else if (c == 'f') opt->min_orf_len = atoi(optarg);
Expand All @@ -129,9 +131,9 @@ int command_align(int argc, char *argv[]) {
else if (c == 'T') opt->T = atoi(optarg), opt0.T = 1;
else if (c == 'U') opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1;
else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1;
else if (c == 'P') opt->flag |= MEM_F_NOPAIRING;
//else if (c == 'P') opt->flag |= MEM_F_NOPAIRING;
else if (c == 'a') opt->flag |= MEM_F_ALL;
else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE;
//else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE;
else if (c == 'M') opt->flag |= MEM_F_NO_MULTI;
else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE;
else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP;
Expand All @@ -142,6 +144,7 @@ int command_align(int argc, char *argv[]) {
else if (c == 'g') opt->proteinFlag |= ALIGN_FLAG_GEN_NT;
else if (c == 'n') opt->proteinFlag |= ALIGN_FLAG_KEEP_PRO;
else if (c == 'J') opt->proteinFlag &= ~ALIGN_FLAG_ADJUST_ORF;
else if (c == 'p') opt->proteinFlag |= ALIGN_FLAG_MANUAL_PRO;
else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
else if (c == 'v') bwa_verbose = atoi(optarg);
Expand All @@ -157,6 +160,7 @@ int command_align(int argc, char *argv[]) {
else if (c == 'C') aux.copy_comment = 1;
else if (c == 'K') fixed_chunk_size = atoi(optarg);
else if (c == 'X') opt->mask_level = atof(optarg);
else if (c == 'P') proxyAddress = optarg;
else if (c == 'h') {
opt0.max_XA_hits = opt0.max_XA_hits_alt = 1;
opt->max_XA_hits = opt->max_XA_hits_alt = strtol(optarg, &p, 10);
Expand Down Expand Up @@ -344,8 +348,14 @@ int command_align(int argc, char *argv[]) {

}

// Detect ORFs and write protein file
writeReadsProtein(argv[optind + 1], readsProName, opt);
if (opt->proteinFlag & ALIGN_FLAG_MANUAL_PRO) {
// Protein input given, skip ORF detection
sprintf(readsProName, argv[optind + 1]);
}
else {
// Detect ORFs and write protein file
writeReadsProtein(argv[optind + 1], readsProName, opt);
}

// Open ORFs sequence
ko = kopen(readsProName, &fd);
Expand Down Expand Up @@ -385,14 +395,14 @@ int command_align(int argc, char *argv[]) {

// Generate UniProt report if requested
if (prefixName != NULL) {
renderUniprotReport(opt->outputType, 1, reportPriStream);
renderUniprotReport(opt->outputType, 1, reportPriStream, proxyAddress);
if (opt->flag & MEM_F_ALL) {
renderUniprotReport(opt->outputType, 0, reportSecStream);
renderUniprotReport(opt->outputType, 0, reportSecStream, proxyAddress);
}
}

// Delete protein file unless requested otherwise
if (!(opt->proteinFlag & ALIGN_FLAG_KEEP_PRO)) {
// Delete generated protein file unless requested otherwise
if (!(opt->proteinFlag & ALIGN_FLAG_KEEP_PRO) && !(opt->proteinFlag & ALIGN_FLAG_MANUAL_PRO)) {
remove(readsProName);
}

Expand Down Expand Up @@ -425,6 +435,7 @@ int renderAlignUsage(const mem_opt_t * passOptions) {
fprintf(stderr, "Usage: paladin align [options] <idxbase> <in.fq>\n\n");

fprintf(stderr, "Gene detection options:\n\n");
fprintf(stderr, " -p disable ORF detection and treat input as protein sequence\n");
fprintf(stderr, " -b disable brute force ORF detection\n");
fprintf(stderr, " -J do not adjust minimum ORF length (constant value) for shorter read lengths\n");
fprintf(stderr, " -f INT minimum ORF length accepted (as constant value) [%d]\n", passOptions->min_orf_len);
Expand Down Expand Up @@ -467,6 +478,7 @@ int renderAlignUsage(const mem_opt_t * passOptions) {
fprintf(stderr, " -u INT report type generated when using reporting and a UniProt reference [%d]\n", passOptions->outputType);
fprintf(stderr, " 0: Simple ID summary report\n");
fprintf(stderr, " 1: Detailed report (Contacts uniprot.org)\n\n");
fprintf(stderr, " -P STR HTTP or SOCKS proxy address\n");
fprintf(stderr, " -g generate detected ORF nucleotide sequence FASTA\n");
fprintf(stderr, " -n keep protein sequence after alignment\n");
//fprintf(stderr, " -p smart pairing (ignoring in2.fq)\n");
Expand Down
27 changes: 0 additions & 27 deletions bntseq.c
Original file line number Diff line number Diff line change
@@ -1,30 +1,3 @@
/* The MIT License
Copyright (c) 2008 Genome Research Ltd (GRL).
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

/* Contact: Toni Westbrook <anthonyw@wildcats.unh.edu> */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down
23 changes: 14 additions & 9 deletions bwamem.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ mem_opt_t *mem_opt_init()
return o;
}

void filterCompetingAln(worker_t * passWorker, int passCount) {
void filterCompetingAln(worker_t * passWorker, int passCount, int passDisable) {
int seqIdx, alnIdx, bestIdx;
int currentSeq, readSeq;
int seqTotal, bestTotal;
Expand All @@ -101,6 +101,12 @@ void filterCompetingAln(worker_t * passWorker, int passCount) {

// Iterate through each sequence and alignment
for (seqIdx = 0 ; seqIdx < passCount - 1 ; seqIdx++) {
// If filtering disabled, simply mark sequence as best
if (passDisable) {
passWorker->regs[seqIdx].active = 1;
continue;
}

// Check if we're in a new sequence or in an alternate frame
sscanf(passWorker->seqs[seqIdx].name, "%d:", &readSeq);
if (readSeq != currentSeq) {
Expand All @@ -113,12 +119,11 @@ void filterCompetingAln(worker_t * passWorker, int passCount) {
bestIdx = seqIdx;
}

// Aggregate all score totals for this sequence
seqTotal = 0;
for (alnIdx = 0 ; alnIdx < passWorker->regs[seqIdx].n ; alnIdx++) {
seqTotal += passWorker->regs[seqIdx].a[alnIdx].score;
}

// Aggregate all score totals for this sequence
seqTotal = 0;
for (alnIdx = 0 ; alnIdx < passWorker->regs[seqIdx].n ; alnIdx++) {
seqTotal += passWorker->regs[seqIdx].a[alnIdx].score;
}

// Check if current alignment is best so far
if (seqTotal > bestTotal) {
Expand All @@ -128,7 +133,7 @@ void filterCompetingAln(worker_t * passWorker, int passCount) {
}

// Filter final sequence
passWorker->regs[bestIdx].active = 1;
if (!passDisable) passWorker->regs[bestIdx].active = 1;
}

int getAlignmentType(worker_t * passWorker, int passEntry, int passAlignment) {
Expand Down Expand Up @@ -1300,7 +1305,7 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
}

// Filter competing alignments from multi-frame encoding during ORF detection process
filterCompetingAln(&w, n);
filterCompetingAln(&w, n, opt->proteinFlag & ALIGN_FLAG_MANUAL_PRO);

kt_for(opt->n_threads, worker2, &w, (opt->flag&MEM_F_PE)? n>>1 : n);

Expand Down
2 changes: 1 addition & 1 deletion bwamem.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ typedef struct {
extern "C" {
#endif

void filterCompetingAln(worker_t * passWorker, int passCount);
void filterCompetingAln(worker_t * passWorker, int passCount, int passDisable);
int getAlignmentType(worker_t * passWorker, int passEntry, int passAlignment);

smem_i *smem_itr_init(const bwt_t *bwt);
Expand Down
2 changes: 0 additions & 2 deletions bwt.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/* Contact: Toni Westbrook <anthonyw@wildcats.unh.edu> */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
Expand Down
2 changes: 0 additions & 2 deletions bwt.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/* Contact: Toni Westbrook <anthonyw@wildcats.unh.edu> */

#ifndef BWA_BWT_H
#define BWA_BWT_H

Expand Down
13 changes: 7 additions & 6 deletions bwtindex.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/* Contact: Toni Westbrook <anthonyw@wildcats.unh.edu> */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -393,7 +391,7 @@ int command_index(int argc, char *argv[]) {
int command_prepare(int argc, char *argv[]) {
char c;
char refArg[] = "-p0";
const char * refName;
const char * refName, * proxyAddress;
int refType, valid;

// Fixed passthrough arguments
Expand All @@ -404,10 +402,12 @@ int command_prepare(int argc, char *argv[]) {
valid = 1;
refType = -1;
refName = NULL;
proxyAddress = NULL;

while ((c = getopt(argc, argv, "r:f:")) >= 0) {
while ((c = getopt(argc, argv, "r:f:P:")) >= 0) {
if (c == 'r') refType = atoi(optarg);
if (c == 'f') refName = optarg;
if (c == 'P') proxyAddress = optarg;
if (c == '?') valid = 0;
}

Expand All @@ -420,7 +420,8 @@ int command_prepare(int argc, char *argv[]) {
fprintf(stderr, " -r <#> Reference Database:\n");
fprintf(stderr, " 1: UniProtKB Reviewed (Swiss-Prot)\n");
fprintf(stderr, " 2: UniProtKB Clustered 90%% (UniRef90)\n\n");
fprintf(stderr, " -f <ref.fasta> Skip download, use local copy of reference database (may be indexed)\n\n");
fprintf(stderr, " -f <ref.fasta> Skip download, use local copy of reference database (may be indexed)\n");
fprintf(stderr, " -P <address> HTTP or SOCKS proxy address\n\n");
fprintf(stderr, "Examples:\n\n");
fprintf(stderr, " paladin prepare -r2\n");
fprintf(stderr, " paladin prepare -r1 -f uniprot_sprot.fasta.gz\n");
Expand All @@ -432,7 +433,7 @@ int command_prepare(int argc, char *argv[]) {

// We can generalize this in the future to include other reference types
if (!refName) {
if ((refName = downloadUniprotReference(refType))[0] == 0) {
if ((refName = downloadUniprotReference(refType, proxyAddress))[0] == 0) {
return 1;
}
}
Expand Down
1 change: 0 additions & 1 deletion bwtindex.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#ifndef BWTINDEX_H_
#define BWTINDEX_H_

Expand Down
6 changes: 3 additions & 3 deletions main.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
The MIT License
Copyright (c) 2015 by Anthony Westbrook, University of New Hampshire <anthonyw@wildcats.unh.edu>
Copyright (c) 2015 by Anthony Westbrook, University of New Hampshire <anthony.westbrook@unh.edu>
Copyright (c) 2011 by Attractive Chaos <attractor@live.co.uk>
Permission is hereby granted, free of charge, to any person obtaining
Expand Down Expand Up @@ -51,7 +51,7 @@
UniProt-generated functional profile. This text file may be used for all
downstream characterizations.
Contact: Toni Westbrook <anthonyw@wildcats.unh.edu>
Contact: Toni Westbrook <anthony.westbrook@unh.edu>
For information regarding BWA, contact Heng Li <lh3@sanger.ac.uk>
*/

Expand Down Expand Up @@ -135,7 +135,7 @@ int renderVersion() {
fprintf(stderr, "Program: PALADIN (Protein Alignment and Detection Interface)\n");

fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
fprintf(stderr, "Contact: Toni Westbrook (UNH) <anthonyw@wildcats.unh.edu>\n");
fprintf(stderr, "Contact: Toni Westbrook (UNH) <anthony.westbrook@unh.edu>\n");
fprintf(stderr, "Based on: BWA by Heng Li <lh3@sanger.ac.uk>\n\n");

return 1;
Expand Down
6 changes: 3 additions & 3 deletions main.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
The MIT License
Copyright (c) 2015 by Anthony Westbrook, University of New Hampshire <anthonyw@wildcats.unh.edu>
Copyright (c) 2015 by Anthony Westbrook, University of New Hampshire <anthony.westbrook@unh.edu>
Copyright (c) 2011 by Attractive Chaos <attractor@live.co.uk>
Permission is hereby granted, free of charge, to any person obtaining
Expand Down Expand Up @@ -51,7 +51,7 @@
UniProt-generated functional profile. This text file may be used for all
downstream characterizations.
Contact: Toni Westbrook <anthonyw@wildcats.unh.edu>
Contact: Toni Westbrook <anthony.westbrook@unh.edu>
For information regarding BWA, contact Heng Li <lh3@sanger.ac.uk>
*/

Expand All @@ -66,7 +66,7 @@
#define PACKAGE_VERSION STR(PACKAGE_VERSION_MAJOR) "." STR(PACKAGE_VERSION_MINOR) "." STR(PACKAGE_VERSION_REV)
#define PACKAGE_VERSION_MAJOR 1
#define PACKAGE_VERSION_MINOR 3
#define PACKAGE_VERSION_REV 1
#define PACKAGE_VERSION_REV 2
#endif

// Render usage and version details
Expand Down
2 changes: 0 additions & 2 deletions protein.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/* Contact: Toni Westbrook <anthonyw@wildcats.unh.edu> */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
Expand Down
5 changes: 2 additions & 3 deletions protein.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
/* Contact: Toni Westbrook <anthonyw@wildcats.unh.edu> */

#ifndef PROTEIN_H_
#define PROTEIN_H_

Expand All @@ -11,10 +9,11 @@
#define OUTPUT_TYPE_UNIPROT_SIMPLE 0
#define OUTPUT_TYPE_UNIPROT_FULL 1

#define ALIGN_FLAG_BRUTE_ORF 0x0001
#define ALIGN_FLAG_BRUTE_ORF 0x0001
#define ALIGN_FLAG_GEN_NT 0x0002
#define ALIGN_FLAG_KEEP_PRO 0x0004
#define ALIGN_FLAG_ADJUST_ORF 0x0008
#define ALIGN_FLAG_MANUAL_PRO 0x0010

extern unsigned char codon_aa_hash[64];

Expand Down
Loading

0 comments on commit afb3976

Please sign in to comment.