Skip to content

Commit 22aa33c

Browse files
Merge pull request #103 from shandley/dev
Dev
2 parents ca0ff32 + a35b26f commit 22aa33c

25 files changed

+93
-64
lines changed

hecatomb/hecatomb.VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
v1.3.0
1+
v1.3.1

hecatomb/snakemake/workflow/AddHost.smk

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ rule minimap_viral_refseq:
4444
ref = config["args"]["hostFa"],
4545
vir = config["virRefSeq"]
4646
output:
47-
temp(os.path.join(dir["out"]["temp"], f'{config["args"]["hostName"]}.bed'))
47+
temp(os.path.join(dir["out"]["temp"], config["args"]["hostName"] + ".bed"))
4848
params:
4949
config["addHost"]["minViralAlnLen"]
5050
conda:
@@ -73,9 +73,9 @@ rule mask_fasta:
7373
"""Mask the host genome using bedtools"""
7474
input:
7575
fa = config["args"]["hostFa"],
76-
aln = os.path.join(dir["out"]["temp"], f'{config["args"]["hostName"]}.bed')
76+
aln = os.path.join(dir["out"]["temp"], config["args"]["hostName"] + ".bed")
7777
output:
78-
mask = temp(os.path.join(dir["out"]["temp"], f'{config["args"]["hostName"]}.mask.bed')),
78+
mask = temp(os.path.join(dir["out"]["temp"], config["args"]["hostName"] + ".mask.bed")),
7979
fa = config["outFasta"]
8080
params:
8181
fa = os.path.join(dir["dbs"]["hostBase"],config["args"]["hostName"],"masked_ref.fa"),

hecatomb/snakemake/workflow/combineOutputs.smk

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ for f in config["args"]["combineRuns"]:
2929
]:
3030
if not is_non_zero_file(assemblyFile):
3131
sys.stderr.write(
32-
f"No/missing assembly files for {f}, skipping assembly files.\n"
32+
"No/missing assembly files for " + f + ", skipping assembly files.\n"
3333
)
3434
assemblyFiles = False
3535
with open(os.path.join(f, "results", "sampleSeqCounts.tsv"), "r") as t:
@@ -43,7 +43,7 @@ for f in config["args"]["combineRuns"]:
4343
allDirSmplLen[f] = {}
4444
allDirSmplLen[f][l[0]] = l[1]
4545
else:
46-
sys.stderr.write(f"Ignoring duplicated sample {l[0]} in {f}\n")
46+
sys.stderr.write("Ignoring duplicated sample " + l[0] + " in " + f + "\n")
4747

4848

4949
# hijack contig_mapping.smk for remaking the contigSeqTable
@@ -97,10 +97,10 @@ rule combineSampleSeqCounts:
9797
output:
9898
os.path.join(dir["out"]["results"], 'sampleSeqCounts.tsv')
9999
run:
100-
with open(output[0],'w') as o:
100+
with open(output[0], 'w') as o:
101101
for oDir in allDirSmplLen.keys():
102102
for smpl in allDirSmplLen[oDir].keys():
103-
o.write(f'{smpl}\t{allDirSmplLen[oDir][smpl]}\n')
103+
o.write(smpl + "\t" + allDirSmplLen[oDir][smpl] + "\n")
104104

105105

106106
rule combineBigtables:

hecatomb/snakemake/workflow/envs/koverage.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ channels:
44
- bioconda
55
- defaults
66
dependencies:
7-
- koverage>=0.1.9
7+
- koverage>=0.1.10

hecatomb/snakemake/workflow/envs/trimnami.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ channels:
44
- bioconda
55
- defaults
66
dependencies:
7-
- trimnami>=0.1.0
7+
- trimnami>=0.1.2

hecatomb/snakemake/workflow/rules/annotation/read_annotation.smk

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ rule primary_aa_search:
3434
os.path.join(dir["env"], "mmseqs2.yaml")
3535
shell:
3636
"mmseqs easy-search {input.seqs} {input.db} {output} {params.alnRes} "
37-
" {params.filtaa} {params.sensaa} "
38-
" --threads {threads} --split-memory-limit {params.memsplit} &> {log} "
37+
"{params.filtaa} {params.sensaa} "
38+
"--threads {threads} --split-memory-limit {params.memsplit} &> {log}; "
3939

4040

4141

@@ -105,7 +105,8 @@ rule secondary_aa_tophit_lineage:
105105
os.path.join(dir["env"], "seqkit.yaml")
106106
resources:
107107
time = resources["sml"]["time"],
108-
mem = resources["ram"]["mem"]
108+
mem_mb=resources["ram"]["mem"],
109+
mem=str(resources["ram"]["mem"]) + "MB",
109110
params:
110111
taxonFormat = lambda wildcards: config["immutable"]["taxonkitReformat"]
111112
benchmark:
@@ -134,7 +135,8 @@ rule secondary_aa_refactor_finalize:
134135
os.path.join(dir["env"], "seqkit.yaml")
135136
resources:
136137
time=resources["sml"]["time"],
137-
mem=resources["ram"]["mem"]
138+
mem_mb=resources["ram"]["mem"],
139+
mem=str(resources["ram"]["mem"]) + "MB",
138140
params:
139141
taxonFormat = lambda wildcards: config["immutable"]["taxonkitReformat"]
140142
benchmark:
@@ -164,7 +166,8 @@ rule secondary_aa_output_table:
164166
nonvir = os.path.join(dir["out"]["secondaryAA"], "AA_bigtable.nonviral.tsv")
165167
resources:
166168
time=resources["sml"]["time"],
167-
mem=resources["ram"]["mem"]
169+
mem_mb=resources["ram"]["mem"],
170+
mem=str(resources["ram"]["mem"]) + "MB",
168171
benchmark:
169172
os.path.join(dir["out"]["bench"], "secondary_aa_generate_output_table.txt")
170173
log:
@@ -187,7 +190,8 @@ rule secondary_aa_parsing:
187190
unclass_seqs = os.path.join(dir["out"]["primaryAA"], "primary.aa.unclassified.fasta")
188191
resources:
189192
time=resources["sml"]["time"],
190-
mem=resources["ram"]["mem"]
193+
mem_mb=resources["ram"]["mem"],
194+
mem=str(resources["ram"]["mem"]) + "MB",
191195
benchmark:
192196
os.path.join(dir["out"]["bench"], "secondary_aa_parsing.txt")
193197
log:
@@ -301,7 +305,8 @@ rule secondary_nt_lca_table:
301305
os.path.join(dir["out"]["stderr"], "secondary_nt_lca_table.log")
302306
resources:
303307
time = resources["sml"]["time"],
304-
mem = resources["ram"]["mem"]
308+
mem_mb=resources["ram"]["mem"],
309+
mem=str(resources["ram"]["mem"]) + "MB",
305310
group:
306311
"secondary_nt_parsing"
307312
script:
@@ -319,7 +324,8 @@ rule secondary_nt_calc_lca:
319324
top_lineage = os.path.join(dir["out"]["secondaryNT"], "top_lineage.tsv"),
320325
resources:
321326
time=resources["sml"]["time"],
322-
mem=resources["ram"]["mem"]
327+
mem_mb=resources["ram"]["mem"],
328+
mem=str(resources["ram"]["mem"]) + "MB",
323329
params:
324330
taxonFormat = lambda wildcards: config["immutable"]["taxonkitReformat"],
325331
conda:
@@ -357,7 +363,8 @@ rule secondary_nt_generate_output_table:
357363
nonvir = os.path.join(dir["out"]["secondaryNT"], "NT_bigtable.nonviral.tsv")
358364
resources:
359365
time=resources["sml"]["time"],
360-
mem=resources["ram"]["mem"]
366+
mem_mb=resources["ram"]["mem"],
367+
mem=str(resources["ram"]["mem"]) + "MB",
361368
params:
362369
taxIdIgnore = config["mmseqs"]["taxIdIgnore"].split(),
363370
bigtableHeader = config["immutable"]["bigtableHeader"]
@@ -384,7 +391,8 @@ rule combine_aa_nt:
384391
os.path.join(dir["out"]["stderr"], "combine_AA_NT.log")
385392
resources:
386393
time=resources["sml"]["time"],
387-
mem=resources["ram"]["mem"]
394+
mem_mb=resources["ram"]["mem"],
395+
mem=str(resources["ram"]["mem"]) + "MB",
388396
group:
389397
"secondary_nt_parsing"
390398
shell:

hecatomb/snakemake/workflow/rules/assembly/shortreads.smk

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ rule cross_assembly:
3434
os.path.join(dir["env"],"megahit.yaml")
3535
shell:
3636
"if [[ -d {params.mh_dir} ]]; "
37-
"then; "
37+
"then "
3838
"rm -rf {params.mh_dir}; "
3939
"fi; "
4040
"megahit "
@@ -90,7 +90,7 @@ rule megahit_sample_paired:
9090
"assembly"
9191
shell:
9292
"if [[ -d {params.mh_dir} ]]; "
93-
"then; "
93+
"then "
9494
"rm -rf {params.mh_dir}; "
9595
"fi; "
9696
"megahit "
@@ -132,7 +132,7 @@ rule megahit_sample_unpaired:
132132
"assembly"
133133
shell:
134134
"if [[ -d {params.mh_dir} ]]; "
135-
"then; "
135+
"then "
136136
"rm -rf {params.mh_dir}; "
137137
"fi; "
138138
"megahit "

hecatomb/snakemake/workflow/rules/preflight/targets.smk

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,10 @@ targets["summary"] = [
105105

106106
# BUILD ENV TARGETS
107107
targets["envs"] = []
108-
108+
# Envs in the envs/ directory
109109
for filename in os.listdir(dir["env"]):
110110
if filename.endswith(".yaml") or filename.endswith(".yml"):
111-
targets["envs"].append(os.path.join(dir["out"]["temp"], filename + ".done"))
111+
targets["envs"].append(os.path.join(dir["out"]["temp"], filename + ".done"))
112+
# Envs build by sub-snaketools
113+
targets["envs"].append(os.path.join(dir["out"]["temp"], "subenvs.trimnami"))
114+
targets["envs"].append(os.path.join(dir["out"]["temp"], "subenvs.koverage"))

hecatomb/snakemake/workflow/rules/preflight/validate.smk

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ for f in config["dbs"]["files"] + config["dbtax"]["files"]:
2626
dbFile = os.path.join(dir["dbs"]["base"], f)
2727
if not os.path.isfile(dbFile):
2828
dbFail = True
29-
sys.stderr.write(f" ERROR: missing database file {dbFile}\n")
29+
sys.stderr.write(" ERROR: missing database file " + dbFile + "\n")
3030
if dbFail:
3131
sys.stderr.write("\n"
3232
" FATAL: One or more database files is missing.\n"
@@ -39,7 +39,7 @@ onstart:
3939
if os.path.isdir(dir["out"]["stderr"]):
4040
oldLogs = filter(re.compile(r'^(?!old_).*').match, os.listdir(dir["out"]["stderr"]))
4141
for logfile in oldLogs:
42-
os.rename(os.path.join(dir["out"]["stderr"], logfile), os.path.join(dir["out"]["stderr"], f'old_{logfile}'))
42+
os.rename(os.path.join(dir["out"]["stderr"], logfile), os.path.join(dir["out"]["stderr"], "old_" + logfile))
4343

4444
# Success message
4545
onsuccess:
@@ -50,7 +50,7 @@ onsuccess:
5050
onerror:
5151
copy_log()
5252
sys.stderr.write('\n\n FATAL: Hecatomb encountered an error.\n\n')
53-
sys.stderr.write(f'Check the Hecatomb logs directory for command-related errors:\n\n{dir["out"]["stderr"]}\n\n')
53+
sys.stderr.write("Check the Hecatomb logs directory for command-related errors:\n\n" + dir["out"]["stderr"] + "\n\n")
5454
if config["args"]["profile"]:
5555
sys.stderr.write(
5656
'Also check your scheduler logs for sheduler-related errors. Your profile determins where these are saved'

hecatomb/snakemake/workflow/rules/preprocessing/preprocessing.smk

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,15 @@ rule buildEnv:
77
"touch {output}"
88

99

10+
rule subsnake_build_envs:
11+
output:
12+
touch(os.path.join(dir["out"]["temp"], "subenvs.{env}"))
13+
conda:
14+
lambda wildcards: os.path.join(dir["env"], wildcards.env + ".yaml")
15+
shell:
16+
"{wildcards.env} test build_envs"
17+
18+
1019
rule trimnami_config:
1120
output:
1221
os.path.join(dir["out"]["temp"], "trimnami.config.yaml")
@@ -63,7 +72,7 @@ rule cluster_sequences:
6372
temp(os.path.join(dir["out"]["temp"],"{sample}_R1_all_seqs.fasta"))
6473
params:
6574
respath=lambda wildcards, output: os.path.split(output[0])[0],
66-
tmppath=lambda wildcards, output: os.path.join(os.path.split(output[0])[0],f"{wildcards.sample}_TMP"),
75+
tmppath=lambda wildcards, output: os.path.join(os.path.split(output[0])[0], wildcards.sample + "_TMP"),
6776
prefix="{sample}_R1",
6877
config=config["mmseqs"]["linclustParams"]
6978
benchmark:
@@ -78,9 +87,12 @@ rule cluster_sequences:
7887
conda:
7988
os.path.join(dir["env"],"mmseqs2.yaml")
8089
shell:
81-
"mmseqs easy-linclust {input.fq} {params.respath}/{params.prefix} {params.tmppath} "
90+
"mmseqs easy-linclust {input.fq} "
91+
"{params.respath}/{params.prefix} "
92+
"{params.tmppath} "
8293
"{params.config} "
83-
"--threads {threads} &> {log}; "
94+
"--threads {threads} "
95+
"&> {log}; "
8496

8597

8698
rule create_individual_seqtables:

hecatomb/snakemake/workflow/rules/reports/summaries_optional.smk

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ rule trimmedHostRemovedCounts:
3434
run:
3535
out_counts = dict()
3636
for sample in params.samples["names"]:
37-
out_counts[sample]["s2_host_removed_R1"] = file_len(os.path.join(dir["out"]["temp"], "p04", f"{sample}_R1.all.fastq"))
37+
out_counts[sample]["s2_host_removed_R1"] = file_len(os.path.join(dir["out"]["temp"], "p04", sample + "_R1.all.fastq"))
3838
try:
39-
out_counts[sample]["s2_host_removed_R2"] = file_len(os.path.join(dir["out"]["temp"],"p04", f"{sample}_R2.all.fastq"))
39+
out_counts[sample]["s2_host_removed_R2"] = file_len(os.path.join(dir["out"]["temp"],"p04", sample + "_R2.all.fastq"))
4040
except FileNotFoundError:
4141
pass
4242
with open(output[0],"w") as stream:
@@ -134,9 +134,9 @@ rule unclassifiedSeqs:
134134
try:
135135
classSeq[id]
136136
except KeyError:
137-
out_fh.write(f">{id}\n{seq}\n")
137+
out_fh.write(">" + id + "\n" + seq + "\n")
138138
else:
139-
sys.stderr.write(f"malformed {input.fa} file? expecting {line} to be fasta header, complain to Mike")
139+
sys.stderr.write("malformed " + input.fa + " file? expecting " + line + " to be fasta header, complain to Mike")
140140
exit(1)
141141

142142

hecatomb/snakemake/workflow/scripts/aaBigtable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
for line in baltfh:
1616
l = line.strip().split("\t")
1717
if len(l) == 3:
18-
balt[l[0]] = f"{l[1]}\t{l[2]}"
18+
balt[l[0]] = l[1] + "\t" + l[2]
1919

2020
logging.debug("Reading in Taxon info for LCA seqs")
2121
lcaLin = {}

hecatomb/snakemake/workflow/scripts/aaPrimaryParse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
id = line.strip().replace(">", "")
2323
seq = inFa.readline().strip()
2424
if id in topHit:
25-
outClass.write(f">{id}\n{seq}\n")
25+
outClass.write(">" + id + "\n" + seq + "\n")
2626
else:
2727
sys.stderr.write(
28-
f"malformed {snakemake.input.seqs} file, or something, complain to Mike."
28+
"malformed " + snakemake.input.seqs + " file, or something, complain to Mike."
2929
)
3030
exit(1)
3131

hecatomb/snakemake/workflow/scripts/aaSecondaryParse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
id = line.strip().replace(">", "")
2323
seq = inFa.readline().strip()
2424
if not id in virSeqs:
25-
outFa.write(f">{id}\n{seq}\n")
25+
outFa.write(">" + id + "\n" + seq + "\n")
2626
else:
2727
logging.error(
28-
f"malformed {snakemake.input.seqs} file, or something, complain to Mike"
28+
"malformed " + snakemake.input.seqs + " file, or something, complain to Mike"
2929
)
3030
exit(1)
3131

hecatomb/snakemake/workflow/scripts/contigCountTable.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
logging.basicConfig(filename=snakemake.log[0], filemode="w", level=logging.DEBUG)
99

1010
covStat = {}
11-
logging.debug(f"Reading {snakemake.input.covstats}")
11+
logging.debug("Reading " + snakemake.input.covstats)
1212
with open(snakemake.input.covstats, "r") as f:
1313
for line in f:
1414
if not line.startswith("#"):
@@ -18,15 +18,15 @@
1818
covStat[l[0]] = [l[1], l[3], l[4], l[5], l[9]]
1919

2020
total = 0
21-
logging.debug(f"Reading {snakemake.input.rpkm}")
21+
logging.debug("Reading " + snakemake.input.rpkm)
2222
with open(snakemake.input.rpkm, "r") as f:
2323
for line in f:
2424
if not line.startswith("#"):
2525
l = line.split("\t")
2626
rpk = int(l[2]) / (int(l[1]) / 1000) # RPK
2727
total += rpk / 1000000 # size factor per million
2828

29-
logging.debug(f"Writing to {snakemake.output.count_tbl}")
29+
logging.debug("Writing to " + snakemake.output.count_tbl)
3030
with open(snakemake.output.count_tbl, "w") as o:
3131
o.write(
3232
"\t".join(

hecatomb/snakemake/workflow/scripts/contigKronaText.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,5 @@
2323
logging.debug("Sorting and writing contig taxon info")
2424
outFH = open(snakemake.output[0], "w")
2525
for k in sorted(counts.keys()):
26-
outFH.write(f"{counts[k]}\t{k}\n")
26+
outFH.write(str(counts[k]) + "\t" + k + "\n")
2727
outFH.close()

hecatomb/snakemake/workflow/scripts/contigReadTaxon.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
taxOut = "\t".join(
4747
([c[1], str((int(c[1]) / smplCounts[c[0]]) * 1000000)] + ["NA"] * 11)
4848
)
49-
outFH.write(f"{infOut}\t{taxOut}\n")
49+
outFH.write(infOut + "\t" + taxOut + "\n")
5050
bam.close()
5151

5252
logging.debug("Done")

hecatomb/snakemake/workflow/scripts/kronaText.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,5 @@
2222
logging.debug("Sorting, counting, and writing tax assignments")
2323
outFH = open(snakemake.output[0], "w")
2424
for k in sorted(counts.keys()):
25-
outFH.write(f"{counts[k]}\t{k}\n")
25+
outFH.write(str(counts[k]) + "\t" + k + "\n")
2626
outFH.close()

0 commit comments

Comments
 (0)