From 4acde508688c6efda00201b861c8c248a65c5315 Mon Sep 17 00:00:00 2001 From: Luis Pedro Coelho Date: Thu, 25 Apr 2024 17:30:03 +1000 Subject: [PATCH] ENH Normalize output format This makes the result more reproducible (and should fix test failures) --- macrel/main.py | 5 ++-- tests/contigs/expected.percontigs | 38 +++++++++++++++--------------- tests/reads.se/expected.percontigs | 6 ++--- tests/reads/expected.percontigs | 10 ++++---- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/macrel/main.py b/macrel/main.py index 5e2f979..ad4970f 100644 --- a/macrel/main.py +++ b/macrel/main.py @@ -287,15 +287,16 @@ def do_predict(args, tdir): def do_density(args, clen, prediction): tpred = prediction.reset_index() tpred['contig'] = tpred['index'].apply(lambda x: '_'.join(x.split('_')[:-1])) - tpred = tpred[tpred['AMP_probability'] > 0.5] + tpred = tpred.query('AMP_probability > 0.5') tpred = tpred.groupby('contig').agg('size') tpred = tpred.reset_index() tpred = tpred.rename({0: 'AMPs'}, axis=1) clen = clen.merge(on='contig', right=tpred, how='outer').fillna(0) - clen[clen.columns[1:]] = clen[clen.columns[1:]].astype(int) + clen[clen.columns[1:]] = clen[clen.columns[1:]].astype(int) ofile = path.join(args.output, args.outtag + '.percontigs.gz') sample = clen.set_index('contig').sum(axis=0).tolist() sample_density = sample[-1] * 1e6 / sample[0] + clen.sort_values('contig', inplace=True) with open_output(ofile, mode='wb') as raw_out: with gzip.open(raw_out, 'wt') as out: from .macrel_version import __version__ diff --git a/tests/contigs/expected.percontigs b/tests/contigs/expected.percontigs index f0612a6..11603cf 100644 --- a/tests/contigs/expected.percontigs +++ b/tests/contigs/expected.percontigs @@ -1,27 +1,27 @@ # Prediction from macrel v1.3.0 # Macrel calculated for the sample a density of 45.062 AMPs / Mbp. contig length ORFs smORFs AMPs -scaffold2530_2_MH0058 717 2 2 0 -scaffold75334_1_MH0058 3424 1 1 1 -scaffold54112_5_MH0058 728 1 1 0 -scaffold24504_2_MH0058 505 1 1 0 -scaffold95393_2_MH0058 995 2 1 0 -scaffold8449_1_MH0058 1037 2 1 0 +C4060843_1_MH0058 518 1 1 0 +C4067509_1_MH0058 534 1 1 0 +C4177507_1_MH0058 1322 2 1 0 +C4193751_1_MH0058 1820 3 1 0 scaffold10455_3_MH0058 808 3 3 0 -scaffold98598_5_MH0058 3426 4 2 0 -scaffold76045_5_MH0058 960 3 3 0 scaffold106190_2_MH0058 688 1 1 0 -C4067509_1_MH0058 534 1 1 0 -scaffold90770_1_MH0058 1031 2 2 0 -scaffold33693_17_MH0058 3481 2 1 1 -scaffold77554_3_MH0058 6086 9 4 0 -scaffold34596_7_MH0058 1345 1 0 0 -scaffold75223_9_MH0058 3597 2 2 0 -scaffold30291_4_MH0058 824 2 1 0 scaffold107406_2_MH0058 1401 2 1 0 scaffold16564_1_MH0058 992 2 2 0 -scaffold7019_2_MH0058 5218 6 3 0 -C4060843_1_MH0058 518 1 1 0 scaffold20234_2_MH0058 2926 4 1 0 -C4193751_1_MH0058 1820 3 1 0 -C4177507_1_MH0058 1322 2 1 0 +scaffold24504_2_MH0058 505 1 1 0 +scaffold2530_2_MH0058 717 2 2 0 +scaffold30291_4_MH0058 824 2 1 0 +scaffold33693_17_MH0058 3481 2 1 1 +scaffold34596_7_MH0058 1345 1 0 0 +scaffold54112_5_MH0058 728 1 1 0 +scaffold7019_2_MH0058 5218 6 3 0 +scaffold75223_9_MH0058 3597 2 2 0 +scaffold75334_1_MH0058 3424 1 1 1 +scaffold76045_5_MH0058 960 3 3 0 +scaffold77554_3_MH0058 6086 9 4 0 +scaffold8449_1_MH0058 1037 2 1 0 +scaffold90770_1_MH0058 1031 2 2 0 +scaffold95393_2_MH0058 995 2 1 0 +scaffold98598_5_MH0058 3426 4 2 0 diff --git a/tests/reads.se/expected.percontigs b/tests/reads.se/expected.percontigs index e668cb8..744ef17 100644 --- a/tests/reads.se/expected.percontigs +++ b/tests/reads.se/expected.percontigs @@ -3,12 +3,12 @@ contig length ORFs smORFs AMPs k47_0 3379 4 2 0 k47_1 6046 9 4 0 -k47_3 5202 6 3 0 -k47_7 3483 2 2 0 -k47_8 2877 3 1 0 k47_10 3374 1 1 1 k47_11 3415 2 1 1 k47_12 1307 1 0 0 k47_16 1790 3 1 0 k47_17 1376 2 1 0 k47_24 1228 2 1 0 +k47_3 5202 6 3 0 +k47_7 3483 2 2 0 +k47_8 2877 3 1 0 diff --git a/tests/reads/expected.percontigs b/tests/reads/expected.percontigs index a33fd26..e3ef230 100644 --- a/tests/reads/expected.percontigs +++ b/tests/reads/expected.percontigs @@ -1,11 +1,6 @@ # Prediction from macrel v1.3.0 # Macrel calculated for the sample a density of 57.627 AMPs / Mbp. contig length ORFs smORFs AMPs -k77_3 1270 1 0 0 -k77_5 5202 6 3 0 -k77_6 3527 2 2 0 -k77_7 6058 9 4 0 -k77_8 1009 2 1 0 k77_11 1303 2 1 0 k77_12 3374 1 1 1 k77_13 2920 4 1 0 @@ -13,3 +8,8 @@ k77_15 3457 2 1 1 k77_16 1790 3 1 0 k77_20 1380 2 1 0 k77_23 3416 4 2 0 +k77_3 1270 1 0 0 +k77_5 5202 6 3 0 +k77_6 3527 2 2 0 +k77_7 6058 9 4 0 +k77_8 1009 2 1 0