Skip to content

Commit

Permalink
ENH Normalize output format
Browse files Browse the repository at this point in the history
This makes the result more reproducible (and should fix test failures)
  • Loading branch information
luispedro committed Apr 25, 2024
1 parent 0c1bf81 commit 4acde50
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 29 deletions.
5 changes: 3 additions & 2 deletions macrel/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,15 +287,16 @@ def do_predict(args, tdir):
def do_density(args, clen, prediction):
tpred = prediction.reset_index()
tpred['contig'] = tpred['index'].apply(lambda x: '_'.join(x.split('_')[:-1]))
tpred = tpred[tpred['AMP_probability'] > 0.5]
tpred = tpred.query('AMP_probability > 0.5')
tpred = tpred.groupby('contig').agg('size')
tpred = tpred.reset_index()
tpred = tpred.rename({0: 'AMPs'}, axis=1)
clen = clen.merge(on='contig', right=tpred, how='outer').fillna(0)
clen[clen.columns[1:]] = clen[clen.columns[1:]].astype(int)
clen[clen.columns[1:]] = clen[clen.columns[1:]].astype(int)
ofile = path.join(args.output, args.outtag + '.percontigs.gz')
sample = clen.set_index('contig').sum(axis=0).tolist()
sample_density = sample[-1] * 1e6 / sample[0]
clen.sort_values('contig', inplace=True)
with open_output(ofile, mode='wb') as raw_out:
with gzip.open(raw_out, 'wt') as out:
from .macrel_version import __version__
Expand Down
38 changes: 19 additions & 19 deletions tests/contigs/expected.percontigs
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
# Prediction from macrel v1.3.0
# Macrel calculated for the sample a density of 45.062 AMPs / Mbp.
contig length ORFs smORFs AMPs
scaffold2530_2_MH0058 717 2 2 0
scaffold75334_1_MH0058 3424 1 1 1
scaffold54112_5_MH0058 728 1 1 0
scaffold24504_2_MH0058 505 1 1 0
scaffold95393_2_MH0058 995 2 1 0
scaffold8449_1_MH0058 1037 2 1 0
C4060843_1_MH0058 518 1 1 0
C4067509_1_MH0058 534 1 1 0
C4177507_1_MH0058 1322 2 1 0
C4193751_1_MH0058 1820 3 1 0
scaffold10455_3_MH0058 808 3 3 0
scaffold98598_5_MH0058 3426 4 2 0
scaffold76045_5_MH0058 960 3 3 0
scaffold106190_2_MH0058 688 1 1 0
C4067509_1_MH0058 534 1 1 0
scaffold90770_1_MH0058 1031 2 2 0
scaffold33693_17_MH0058 3481 2 1 1
scaffold77554_3_MH0058 6086 9 4 0
scaffold34596_7_MH0058 1345 1 0 0
scaffold75223_9_MH0058 3597 2 2 0
scaffold30291_4_MH0058 824 2 1 0
scaffold107406_2_MH0058 1401 2 1 0
scaffold16564_1_MH0058 992 2 2 0
scaffold7019_2_MH0058 5218 6 3 0
C4060843_1_MH0058 518 1 1 0
scaffold20234_2_MH0058 2926 4 1 0
C4193751_1_MH0058 1820 3 1 0
C4177507_1_MH0058 1322 2 1 0
scaffold24504_2_MH0058 505 1 1 0
scaffold2530_2_MH0058 717 2 2 0
scaffold30291_4_MH0058 824 2 1 0
scaffold33693_17_MH0058 3481 2 1 1
scaffold34596_7_MH0058 1345 1 0 0
scaffold54112_5_MH0058 728 1 1 0
scaffold7019_2_MH0058 5218 6 3 0
scaffold75223_9_MH0058 3597 2 2 0
scaffold75334_1_MH0058 3424 1 1 1
scaffold76045_5_MH0058 960 3 3 0
scaffold77554_3_MH0058 6086 9 4 0
scaffold8449_1_MH0058 1037 2 1 0
scaffold90770_1_MH0058 1031 2 2 0
scaffold95393_2_MH0058 995 2 1 0
scaffold98598_5_MH0058 3426 4 2 0
6 changes: 3 additions & 3 deletions tests/reads.se/expected.percontigs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
contig length ORFs smORFs AMPs
k47_0 3379 4 2 0
k47_1 6046 9 4 0
k47_3 5202 6 3 0
k47_7 3483 2 2 0
k47_8 2877 3 1 0
k47_10 3374 1 1 1
k47_11 3415 2 1 1
k47_12 1307 1 0 0
k47_16 1790 3 1 0
k47_17 1376 2 1 0
k47_24 1228 2 1 0
k47_3 5202 6 3 0
k47_7 3483 2 2 0
k47_8 2877 3 1 0
10 changes: 5 additions & 5 deletions tests/reads/expected.percontigs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Prediction from macrel v1.3.0
# Macrel calculated for the sample a density of 57.627 AMPs / Mbp.
contig length ORFs smORFs AMPs
k77_3 1270 1 0 0
k77_5 5202 6 3 0
k77_6 3527 2 2 0
k77_7 6058 9 4 0
k77_8 1009 2 1 0
k77_11 1303 2 1 0
k77_12 3374 1 1 1
k77_13 2920 4 1 0
k77_15 3457 2 1 1
k77_16 1790 3 1 0
k77_20 1380 2 1 0
k77_23 3416 4 2 0
k77_3 1270 1 0 0
k77_5 5202 6 3 0
k77_6 3527 2 2 0
k77_7 6058 9 4 0
k77_8 1009 2 1 0

0 comments on commit 4acde50

Please sign in to comment.