-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #22 from PathoGenOmics-Lab/feature-minorrev
Minor workflow revision
- Loading branch information
Showing
15 changed files
with
266 additions
and
202 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import argparse | ||
from pathlib import Path | ||
from typing import List | ||
|
||
import yaml | ||
|
||
|
||
def find_file_with_extension(directory: Path, prefix: str, extensions: List[str]) -> str: | ||
candidate_files = [] | ||
for path in directory.rglob(f"*"): | ||
if any(path.name.endswith(ext) for ext in extensions) and path.name.startswith(prefix): | ||
candidate_files.append(path) | ||
if len(candidate_files) == 1: | ||
return candidate_files[0].as_posix() | ||
else: | ||
sys.exit(f"ERROR: {len(candidate_files)} candidates found in '{directory}' for prefix '{prefix}' with extensions {extensions}: {candidate_files}") | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"-i", "--sample-directory", | ||
help="Directory containing sample sequencing data", | ||
required=True, | ||
type=Path | ||
) | ||
parser.add_argument( | ||
"-s", "--sample-names", | ||
nargs="+", | ||
help="Sample names to look for in the sample directory", | ||
required=True | ||
) | ||
parser.add_argument( | ||
"-b", "--bam-extensions", | ||
nargs="+", | ||
help="File extensions for BAM files", | ||
required=False, | ||
default=[".trim.sort.bam"] | ||
) | ||
parser.add_argument( | ||
"-f", "--fasta-extensions", | ||
nargs="+", | ||
help="File extensions for FASTA files", | ||
required=False, | ||
default=[".fa", ".fasta"] | ||
) | ||
parser.add_argument( | ||
"-m", "--metadata-csv", | ||
help="Metadata CSV file", | ||
required=True, | ||
type=Path | ||
) | ||
parser.add_argument( | ||
"-o", "--output-yaml", | ||
help="Output YAML file", | ||
required=True | ||
) | ||
args = parser.parse_args() | ||
|
||
# Build targets | ||
targets = {"SAMPLES": {}} | ||
for sample_name in args.sample_names: | ||
targets["SAMPLES"][sample_name] = {} | ||
targets["SAMPLES"][sample_name]["bam"] = find_file_with_extension(args.sample_directory, sample_name, args.bam_extensions) | ||
targets["SAMPLES"][sample_name]["fasta"] = find_file_with_extension(args.sample_directory, sample_name, args.fasta_extensions) | ||
|
||
# Write empty fields | ||
if args.metadata_csv.is_file(): | ||
targets["METADATA"] = args.metadata_csv.as_posix() | ||
else: | ||
sys.exit(f"ERROR: metadata file '{args.metadata_csv}' does not exist") | ||
targets["OUTPUT_DIRECTORY"] = "output" | ||
targets["CONTEXT_FASTA"] = None | ||
targets["MAPPING_REFERENCES_FASTA"] = None | ||
|
||
# Write output | ||
with open(args.output_yaml, "w") as fw: | ||
yaml.dump(targets, fw, indent=2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"ORF1ab polyprotein": "orf1ab", | ||
"ORF1a polyprotein": "orf1ab", | ||
"surface glycoprotein": "S", | ||
"ORF3a protein": "ORF3a", | ||
"envelope protein": "E", | ||
"membrane glycoprotein": "M", | ||
"ORF6 protein": "ORF6", | ||
"ORF7a protein": "ORF7", | ||
"ORF7b": "ORF7", | ||
"ORF8 protein": "ORF8", | ||
"nucleocapsid phosphoprotein": "N", | ||
"ORF10 protein": "ORF10" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
{ | ||
"AAA": "K", | ||
"AAC": "N", | ||
"AAG": "K", | ||
"AAT": "N", | ||
"ACA": "T", | ||
"ACC": "T", | ||
"ACG": "T", | ||
"ACT": "T", | ||
"AGA": "R", | ||
"AGC": "S", | ||
"AGG": "R", | ||
"AGT": "S", | ||
"ATA": "I", | ||
"ATC": "I", | ||
"ATG": "M", | ||
"ATT": "I", | ||
"CAA": "Q", | ||
"CAC": "H", | ||
"CAG": "Q", | ||
"CAT": "H", | ||
"CCA": "P", | ||
"CCC": "P", | ||
"CCG": "P", | ||
"CCT": "P", | ||
"CGA": "R", | ||
"CGC": "R", | ||
"CGG": "R", | ||
"CGT": "R", | ||
"CTA": "L", | ||
"CTC": "L", | ||
"CTG": "L", | ||
"CTT": "L", | ||
"GAA": "E", | ||
"GAC": "D", | ||
"GAG": "E", | ||
"GAT": "D", | ||
"GCA": "A", | ||
"GCC": "A", | ||
"GCG": "A", | ||
"GCT": "A", | ||
"GGA": "G", | ||
"GGC": "G", | ||
"GGG": "G", | ||
"GGT": "G", | ||
"GTA": "V", | ||
"GTC": "V", | ||
"GTG": "V", | ||
"GTT": "V", | ||
"TAA": "*", | ||
"TAC": "Y", | ||
"TAG": "*", | ||
"TAT": "Y", | ||
"TCA": "S", | ||
"TCC": "S", | ||
"TCG": "S", | ||
"TCT": "S", | ||
"TGA": "*", | ||
"TGC": "C", | ||
"TGG": "W", | ||
"TGT": "C", | ||
"TTA": "L", | ||
"TTC": "F", | ||
"TTG": "L", | ||
"TTT": "F" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.