Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update species probes #177

Merged
merged 29 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
b9f184a
Add option --ncbi_names; handle new tb probes/hierarchy
martinghunt Jul 7, 2023
47057be
Remove hard-coded panel fix (crashes mykrobe predict)
martinghunt Jul 18, 2023
7af1192
Merge branch 'master' into update_species_probes
martinghunt Sep 22, 2023
47996b0
Add option --dump_species_covgs
martinghunt Sep 25, 2023
b0988d5
Remove commented out code
martinghunt Sep 25, 2023
1da8c8e
Run black
martinghunt Sep 25, 2023
0a56caf
Update changelog
martinghunt Sep 25, 2023
583dd46
Try numpy from pip install on windows
martinghunt Sep 25, 2023
587290f
Uninstall numpy to force install again
martinghunt Sep 25, 2023
fe0572b
Uninstall numpy to force install again
martinghunt Sep 25, 2023
db54fc4
Try pip install numpy --upgrade
martinghunt Sep 25, 2023
3cc4f9b
Revert to pacman install numpy; echo PATH to see wassup
martinghunt Sep 25, 2023
8783c75
Fix typo in echo
martinghunt Sep 25, 2023
676a656
windows build debugging
martinghunt Sep 25, 2023
72e0b74
Remove numpy dependency
martinghunt Sep 26, 2023
701d599
Revert "Remove numpy dependency"
martinghunt Sep 27, 2023
8ecab6f
Pin windows numpy to 1.25.0
martinghunt Sep 27, 2023
00c3a22
Revert "Pin windows numpy to 1.25.0"
martinghunt Sep 27, 2023
c0f9741
Revert "Revert "Pin windows numpy to 1.25.0""
martinghunt Sep 27, 2023
0a9ae33
Revert "Pin windows numpy to 1.25.0"
martinghunt Sep 27, 2023
007d3b6
Revert "Revert "Remove numpy dependency""
martinghunt Sep 27, 2023
620eae1
Revert "Remove numpy dependency"
martinghunt Sep 27, 2023
d9234bc
Revert "windows build debugging"
martinghunt Sep 27, 2023
80e3782
Revert "Fix typo in echo"
martinghunt Sep 27, 2023
4498642
Revert "Revert to pacman install numpy; echo PATH to see wassup"
martinghunt Sep 27, 2023
8c19d35
Revert "Try pip install numpy --upgrade"
martinghunt Sep 27, 2023
dd23e31
Revert "Uninstall numpy to force install again"
martinghunt Sep 27, 2023
75f60c5
Revert "Uninstall numpy to force install again"
martinghunt Sep 27, 2023
ad1c2bf
Revert "Try numpy from pip install on windows"
martinghunt Sep 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,24 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- When using `--debug` flag, log species probe coverage/depth info and also each
time a probe is rejected due to low coverage and/or depth.

- Added flag `--ncbi_names`. This is for soon to be updated tb probes for
species calling, where the JSON file will also report alternative NCBI taxon
names, as well as the default GTBD names.

- Added option `--dump_species_covgs`, to dump a JSON file of all the species
probe coverage information. This includes the raw coverage info from mccortex,
before it is aggregated into a single call that you see in the usual
mykrobe output.

### Changed

- The tb species probes are going to be updated after the next release of
mykrobe. Code changed to handle these new probes, specifically where a node in
the taxon tree has no probes. If a child node is called as present from the
reads, then push that call up to the parent node. Code still works as normal
on the existing (soon to be old) panels.


## [0.12.2]

### Fixed
Expand Down
11 changes: 11 additions & 0 deletions src/mykrobe/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ def __call__(self, parser, namespace, values, option_string=None):
action="store_true",
default=False,
)
genotyping_mixin.add_argument(
"--dump_species_covgs",
help="Dump species probes coverage information to a JSON file",
metavar="FILENAME",
)
genotyping_mixin.add_argument(
"-e",
"--expected_error_rate",
Expand Down Expand Up @@ -174,6 +179,12 @@ def __call__(self, parser, namespace, values, option_string=None):
help="File path to save output file as. Default is to stdout",
default="",
)
genotyping_mixin.add_argument(
"--ncbi_names",
help="Report NCBI species names in addiition to the usual species names in the JSON output. Only applies when the species is tb",
action="store_true",
default=False,
)


panels_mixin = argparse.ArgumentParser(add_help=False)
Expand Down
31 changes: 25 additions & 6 deletions src/mykrobe/cmds/amr.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,20 +167,24 @@ def ref_data_from_args(args):
"var_to_res_json": species_dir.json_file("amr"),
"hierarchy_json": species_dir.json_file("hierarchy"),
"lineage_json": species_dir.json_file("lineage"),
"ncbi_names_json": species_dir.json_file("ncbi_names"),
"kmer": species_dir.kmer(),
"version": species_dir.version(),
"species_phylo_group": species_dir.species_phylo_group(),
}

if ref_data["lineage_json"] is None:
ref_data["lineage_dict"] = None
else:
ref_data["lineage_dict"] = load_json(ref_data["lineage_json"])
for key in ["lineage", "ncbi_names"]:
if ref_data[f"{key}_json"] is None:
ref_data[f"{key}_dict"] = None
else:
ref_data[f"{key}_dict"] = load_json(ref_data[f"{key}_json"])

return ref_data


def detect_species_and_get_depths(cov_parser, hierarchy_json, wanted_phylo_group):
def detect_species_and_get_depths(
cov_parser, hierarchy_json, wanted_phylo_group, probe_cov_json=None
):
depths = []
if wanted_phylo_group is None:
return {}, depths
Expand All @@ -193,6 +197,7 @@ def detect_species_and_get_depths(cov_parser, hierarchy_json, wanted_phylo_group
species_covgs=cov_parser.covgs["species"],
lineage_covgs=cov_parser.covgs.get("sub-species", {}),
hierarchy_json_file=hierarchy_json,
probe_cov_json_file=probe_cov_json,
)
phylogenetics = species_predictor.run()

Expand Down Expand Up @@ -249,6 +254,13 @@ def fix_X_amino_acid_variants(sample_json):
fix_amino_acid_X_variants_keys(sample_json["variant_calls"])


def add_ncbi_species_names_to_phylo_dict(phylo, ncbi_names):
if "species" not in phylo or ncbi_names is None:
return
for species, species_d in phylo["species"].items():
species_d["ncbi_names"] = ncbi_names.get(species, "UNKNOWN")


def run(parser, args):
logger.info(f"Start runnning mykrobe predict. Command line: {' '.join(sys.argv)}")
base_json = {args.sample: {}}
Expand Down Expand Up @@ -298,7 +310,10 @@ def run(parser, args):
depths = [cp.estimate_depth()]
else:
phylogenetics, depths = detect_species_and_get_depths(
cp, ref_data["hierarchy_json"], ref_data["species_phylo_group"]
cp,
ref_data["hierarchy_json"],
ref_data["species_phylo_group"],
probe_cov_json=args.dump_species_covgs,
)

# Genotype
Expand Down Expand Up @@ -450,5 +465,9 @@ def run(parser, args):

logger.info("Progress: writing output")
fix_X_amino_acid_variants(base_json[args.sample])
if args.ncbi_names and ref_data["ncbi_names_dict"] is not None:
add_ncbi_species_names_to_phylo_dict(
base_json[args.sample]["phylogenetics"], ref_data["ncbi_names_dict"]
)
write_outputs(args, base_json)
logger.info("Progress: finished")
Loading
Loading