Skip to content

Commit 6069a2c

Browse files
author
Stefan Washietl
committed
Removed unnecessary function call in inner loop which made this cubic;
Scan both forward and reverse complement.
1 parent 3be960f commit 6069a2c

File tree

7 files changed

+229
-198
lines changed

7 files changed

+229
-198
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,7 @@ depcomp
1010
install-sh
1111
missing
1212
src/*ps
13+
src/scratch.c
14+
src/RNAcode
1315
stamp-h1
1416
RNAcode*.tar.gz

src/RNAcode

-1.35 MB
Binary file not shown.

src/RNAcode.c

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ void usage(void);
2020
void help(void);
2121
void version(void);
2222

23+
double sigma=+4.0;
24+
double omega=-2.0;
25+
double Omega=-4.0;
26+
double Delta=-10.0;
27+
2328
int main(int argc, char *argv[]){
2429

2530
int i,j,k,L,N,hssCount;
@@ -28,7 +33,7 @@ int main(int argc, char *argv[]){
2833
char *tmpSeq, *treeString;
2934
double kappa, parMu, parLambda;
3035
bgModel* models;
31-
segmentStats *results;
36+
segmentStats *results, *resultsRev, *allResults;
3237
TTree* tree;
3338
char debugFileName[1024]="";
3439
FILE *inputFile=stdin;
@@ -42,6 +47,7 @@ int main(int argc, char *argv[]){
4247

4348
struct gengetopt_args_info args;
4449
struct aln *inputAln[MAX_NUM_NAMES];
50+
struct aln *inputAlnRev[MAX_NUM_NAMES];
4551

4652
int sampleN=1000;
4753
int sampleMode=1;
@@ -185,19 +191,38 @@ int main(int argc, char *argv[]){
185191
}
186192

187193
tree=string2tree(treeString);
188-
189194
models=getModels(tree,inputAln,kappa);
190195

196+
copyAln((struct aln**)inputAln,(struct aln**)inputAlnRev);
197+
revAln((struct aln**)inputAlnRev);
198+
191199
Sk=getPairwiseScoreMatrix(models,(const struct aln**)inputAln);
192200
S=getMultipleScoreMatrix(Sk,models,(const struct aln**)inputAln);
201+
getExtremeValuePars(tree, models, (const struct aln**)inputAln, sampleN, &parMu, &parLambda);
202+
results=getHSS(S, (const struct aln**)inputAln, '+', parMu, parLambda, cutoff);
203+
204+
Sk=getPairwiseScoreMatrix(models,(const struct aln**)inputAlnRev);
205+
S=getMultipleScoreMatrix(Sk,models,(const struct aln**)inputAlnRev);
206+
resultsRev=getHSS(S, (const struct aln**)inputAln, '-', parMu, parLambda, cutoff);
207+
208+
hssCount=0;
209+
210+
i=0;
211+
while (results[i].score > 0.0){
212+
allResults=(segmentStats*)realloc(allResults,sizeof(segmentStats)*(hssCount+2));
213+
allResults[hssCount++]=results[i++];
214+
}
193215

194-
//backtrack(Sk, 1, (const struct aln**)inputAln);
216+
i=0;
217+
while (resultsRev[i].score > 0.0){
218+
allResults=(segmentStats*)realloc(allResults,sizeof(segmentStats)*(hssCount+2));
219+
allResults[hssCount++]=resultsRev[i++];
220+
}
195221

196-
getExtremeValuePars(tree, models, (const struct aln**)inputAln, sampleN, &parMu, &parLambda);
197-
198-
results=getHSS(S, (const struct aln**)inputAln, parMu, parLambda, cutoff);
199-
printResults(outputFile,outputFormat,results);
222+
printResults(outputFile,outputFormat,allResults);
200223

224+
//backtrack(Sk, 1, (const struct aln**)inputAln);
225+
201226
}
202227

203228
//results=getHSS(models, (const struct aln**)inputAln, parMu, parLambda,cutoff);
@@ -223,11 +248,8 @@ int main(int argc, char *argv[]){
223248
*/
224249

225250
freeModels(models,N);
226-
227251
free(treeString);
228-
229252
freeSeqgenTree(tree);
230-
231253
freeAln((struct aln**)inputAln);
232254

233255

src/misc.c

Lines changed: 142 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ z ... difference of gaps between reference and k in the block
6363
6464
*/
6565

66-
void getBlock(const char* seq_0, const char* seq_k, int i, char* block_0, char* block_k, int* z ){
66+
void getBlock(int i, const char* seq_0, const char* seq_k, const int* map_0, const int* map_k, char* block_0, char* block_k, int* z ){
6767

6868
int start,end;
6969
int gap_0, gap_k;
@@ -76,14 +76,16 @@ void getBlock(const char* seq_0, const char* seq_k, int i, char* block_0, char*
7676
}
7777

7878
if (i>3){
79-
start=pos2col(seq_0,i-3)+1;
79+
//start=pos2col(seq_0,i-3)+1;
80+
start=map_0[i-3]+1;
8081
}
8182

8283
if (i==3){
8384
start=1;
8485
}
8586

86-
end=pos2col(seq_0,i);
87+
//end=pos2col(seq_0,i);
88+
end=map_0[i];
8789

8890
strncpy(block_0, seq_0+start-1,end-start+1);
8991
strncpy(block_k, seq_k+start-1,end-start+1);
@@ -230,3 +232,140 @@ double stddev(double* data, int N){
230232
return sqrt(sum/(double)(N-1));
231233
}
232234

235+
void copyAln(struct aln *src[],struct aln *dest[]){
236+
237+
int i,j,L;
238+
char* seq;
239+
240+
L=strlen(src[0]->seq);
241+
242+
for (i=0;src[i]!=NULL;i++){
243+
244+
dest[i]=createAlnEntry(strdup(src[i]->name),
245+
strdup(src[i]->seq),
246+
src[i]->start,
247+
src[i]->length,
248+
src[i]->fullLength,
249+
src[i]->strand);
250+
}
251+
252+
dest[i]=NULL;
253+
}
254+
255+
256+
257+
258+
259+
260+
void printAlnClustal(FILE *out, const struct aln* AS[]){
261+
262+
int i;
263+
264+
fprintf(out,"CLUSTAL W\n\n");
265+
266+
for (i=0;AS[i]!=NULL;i++){
267+
fprintf(out, "%s %s\n",AS[i]->name,AS[i]->seq);
268+
}
269+
270+
fprintf(out, "\n");
271+
272+
}
273+
274+
275+
void printResults(FILE* outfile, int outputFormat, segmentStats results[]){
276+
277+
int i,k;
278+
char c;
279+
char name[1024]="";
280+
char prefix[1024]="";
281+
char suffix[1024]="";
282+
283+
if (outputFormat==0){
284+
285+
if (results[0].score<0.0){
286+
fprintf(outfile,"No significant coding regions found.\n");
287+
} else {
288+
289+
fprintf(outfile, "\n%5s%7s%6s%6s%12s%12s%12s%9s%9s\n",
290+
"Frame","Length","From","To","Name","Start","End", "Score","P");
291+
292+
fprintf(outfile, "==============================================================================\n");
293+
294+
i=0;
295+
296+
while (results[i].score>0){
297+
298+
fprintf(outfile, "%4c%i%7i%6i%6i%12s%12i%12i%9.2f",
299+
results[i].strand, results[i].frame,
300+
results[i].endSite-results[i].startSite+1,
301+
results[i].startSite,results[i].endSite,
302+
results[i].name,
303+
results[i].start,results[i].end,
304+
results[i].score);
305+
306+
if (results[i].pvalue < 0.001){
307+
fprintf(outfile, "% 9.1e\n",results[i].pvalue);
308+
} else {
309+
fprintf(outfile, "% 9.2f\n",results[i].pvalue);
310+
}
311+
312+
i++;
313+
}
314+
}
315+
}
316+
317+
318+
if (outputFormat==1){
319+
i=0;
320+
while (results[i].score>0){
321+
/* if name is of the form hg18.chromX than only display chromX */
322+
k=0;
323+
while (1){
324+
if (results[i].name[k]=='\0' || results[i].name[k]=='.'){
325+
break;
326+
}
327+
k++;
328+
}
329+
330+
if (k==strlen((char*)results[i].name)){
331+
strcpy(name,results[i].name);
332+
} else {
333+
strcpy(name,results[i].name+k+1);
334+
}
335+
336+
fprintf(outfile,"%s\t%s\t%s\t%i\t%i\t%.2f|%.2e\t%c\t%c\t%s\n",
337+
name, "RNAcode","CDS",
338+
results[i].start+1,results[i].end+1,
339+
results[i].score,
340+
results[i].pvalue,
341+
results[i].strand, '.',"gene_id \"Gene 0\"; transcript_id \"transcript 0\";");
342+
i++;
343+
344+
/* GTF, currently only outputs highest scoring hit */
345+
if (i > 0) break;
346+
}
347+
}
348+
349+
if (outputFormat==2){
350+
351+
i=0;
352+
353+
while (results[i].score>0){
354+
fprintf(outfile, "%c\t%i\t%i\t%i\t%i\t%s\t%i\t%i\t%7.3f",
355+
results[i].strand, results[i].frame,
356+
results[i].endSite-results[i].startSite+1,
357+
results[i].startSite,results[i].endSite,
358+
results[i].name,
359+
results[i].start,results[i].end,
360+
results[i].score);
361+
362+
if (results[i].pvalue < 0.001){
363+
fprintf(outfile, "% 9.3e\n",results[i].pvalue);
364+
} else {
365+
fprintf(outfile, "% 9.3f\n",results[i].pvalue);
366+
}
367+
break;
368+
i++;
369+
}
370+
}
371+
}

src/misc.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ void sortAln(const struct aln* origAln[], struct aln* sampledAln[]);
1111

1212
void freeResults(segmentStats results[]);
1313

14-
void getBlock(const char* seq_0, const char* seq_k, int i, char* block_0, char* block_k, int* z );
14+
//void getBlock(const char* seq_0, const char* seq_k, int i, char* block_0, char* block_k, int* z );
15+
void getBlock(int i, const char* seq_0, const char* seq_k, const int* map_0, const int* map_k, char* block_0, char* block_k, int* z );
1516

1617
int pos2col(const char* seq, int pos);
1718

@@ -25,5 +26,10 @@ double stddev(double* data, int N);
2526

2627
double gaussian (const double sigma);
2728

29+
void copyAln(struct aln *src[],struct aln *dest[]);
30+
31+
void printAlnClustal(FILE *out, const struct aln* AS[]);
32+
33+
void printResults(FILE* outfile, int outputFormat, segmentStats results[]);
2834

2935
#endif

0 commit comments

Comments
 (0)