From 090d74dfb6a650cee8a0a206b9bb08a5764f7f2d Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 7 Mar 2024 15:10:35 -0500 Subject: [PATCH 1/7] Added --keep_invalid LCA flag --- environment.yaml | 2 +- pytaxonkit.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/environment.yaml b/environment.yaml index 0c08f4e..67a0e96 100644 --- a/environment.yaml +++ b/environment.yaml @@ -10,5 +10,5 @@ dependencies: - pytest>=5.4 - pytest-cov>=2.8 - pytest-xdist>=1.31 - - taxonkit>=0.8 + - taxonkit>=0.16.0 - wget>=1.20 diff --git a/pytaxonkit.py b/pytaxonkit.py index 2b94b20..3afa7e3 100644 --- a/pytaxonkit.py +++ b/pytaxonkit.py @@ -1046,6 +1046,7 @@ def lca( multi=False, skip_deleted=False, skip_unfound=False, + keep_invalid=False, threads=None, data_dir=None, debug=False, @@ -1063,6 +1064,8 @@ def lca( Ignore deleted taxids and compute LCA with the remaining taxa skip_unfound : bool, default False Ignore taxids not found in the taxonomy database and compute LCA with the remaining taxa + keep_invalid: bool, default False + Returns 0 when all taxids have been skipped from `skip_deleted` or `skip_unfound` threads : int Override the default taxonkit threads setting data_dir : str, default None @@ -1096,6 +1099,8 @@ def lca( arglist.append("--skip-deleted") if skip_unfound: arglist.append("--skip-unfound") + if keep_invalid: + arglist.append("--keep-invalid") if threads: arglist.extend(("--threads", validate_threads(threads))) if data_dir: @@ -1134,6 +1139,15 @@ def test_lca_unfound(capsys): assert "taxonkit lca --skip-unfound" in terminal.err +def test_lca_keep_invalid(capsys): + assert lca([11111111], skip_deleted=True, skip_unfound=True) == None + assert lca([22222222], skip_deleted=True, skip_unfound=True) == None + assert lca([11111111], skip_deleted=True, skip_unfound=True, keep_invalid=True) == 0 + assert lca([11111111, 22222222], skip_deleted=True, skip_unfound=True, keep_invalid=True, debug=True) == 0 + terminal = capsys.readouterr() + assert "taxonkit lca --skip-deleted --skip-unfound --keep-invalid" in terminal.err + + @pytest.mark.parametrize( "domulti, ids,result", [ From 1f45c32cbd4c3b5c3bfeb50335a0bb33d75d201b Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 8 Mar 2024 14:52:29 -0500 Subject: [PATCH 2/7] Add another test --- .github/workflows/cibuild.yml | 6 +++--- pytaxonkit.py | 26 ++++++++++++++++++++------ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cibuild.yml b/.github/workflows/cibuild.yml index 2470518..f274f33 100644 --- a/.github/workflows/cibuild.yml +++ b/.github/workflows/cibuild.yml @@ -17,14 +17,14 @@ jobs: max-parallel: 4 matrix: include: - - os: ubuntu-latest - python-version: 3.7 - os: ubuntu-latest python-version: 3.8 - os: ubuntu-latest python-version: 3.9 + - os: ubuntu-latest + python-version: 3.10 - os: macos-latest - python-version: 3.8 + python-version: 3.9 steps: - uses: actions/checkout@v1 - uses: conda-incubator/setup-miniconda@v2 diff --git a/pytaxonkit.py b/pytaxonkit.py index 3afa7e3..be7f3d8 100644 --- a/pytaxonkit.py +++ b/pytaxonkit.py @@ -339,10 +339,10 @@ def lineage( >>> result.columns Index(['TaxID', 'Code', 'Name', 'Lineage', 'LineageTaxIDs', 'Rank', 'FullLineage', 'FullLineageTaxIDs', 'FullLineageRanks'], dtype='object') >>> result[["TaxID", "Lineage", "LineageTaxIDs"]] - TaxID Lineage LineageTaxIDs - 0 1325911 Eukaryota;Arthropoda;Insecta;Hymenoptera;Eucharitidae;Pogonocharis; 2759;6656;50557;7399;216140;1325911; - 1 1649473 Bacteria;Bacteroidota;Cytophagia;Cytophagales;Spirosomaceae;Nibrella; 2;976;768503;768507;2896860;1649473; - 2 1401311 Eukaryota;Arthropoda;Insecta;Coleoptera;Staphylinidae;Styngetus; 2759;6656;50557;7041;29026;1401311; + TaxID Lineage LineageTaxIDs + 0 1325911 Eukaryota;Arthropoda;Insecta;Hymenoptera;Eucharitidae;Pogonocharis; 2759;6656;50557;7399;216140;1325911; + 1 1649473 Bacteria;Bacteroidota;Cytophagia;Cytophagales;Spirosomataceae;Nibrella; 2;976;768503;768507;2896860;1649473; + 2 1401311 Eukaryota;Arthropoda;Insecta;Coleoptera;Staphylinidae;Styngetus; 2759;6656;50557;7041;29026;1401311; >>> result = pytaxonkit.lineage(["1382510", "929505", "390333"], formatstr="{f};{g};{s};{S}") >>> result[["TaxID", "Lineage", "LineageTaxIDs"]] TaxID Lineage LineageTaxIDs @@ -1140,14 +1140,28 @@ def test_lca_unfound(capsys): def test_lca_keep_invalid(capsys): - assert lca([11111111], skip_deleted=True, skip_unfound=True) == None - assert lca([22222222], skip_deleted=True, skip_unfound=True) == None + assert lca([11111111], skip_deleted=True, skip_unfound=True) is None + assert lca([22222222], skip_deleted=True, skip_unfound=True) is None assert lca([11111111], skip_deleted=True, skip_unfound=True, keep_invalid=True) == 0 assert lca([11111111, 22222222], skip_deleted=True, skip_unfound=True, keep_invalid=True, debug=True) == 0 terminal = capsys.readouterr() assert "taxonkit lca --skip-deleted --skip-unfound --keep-invalid" in terminal.err +def test_lca_keep_invalid_multi(): + query = [ + [743375], + [123456789], + [987654321], + [743375, 123456789], + [743375, 987654321], + [123456789, 987654321] + ] + observed = lca(query, skip_deleted=True, skip_unfound=True, keep_invalid=True, multi=True) + expected = [743375, 0, 0, 743375, 743375, 0] + assert expected == observed + + @pytest.mark.parametrize( "domulti, ids,result", [ From 47e9c4978acdfc7583e7b75c56772ed801c80f2c Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 8 Mar 2024 14:54:34 -0500 Subject: [PATCH 3/7] Minor change to trigger CI build --- pytaxonkit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytaxonkit.py b/pytaxonkit.py index be7f3d8..237d3ab 100644 --- a/pytaxonkit.py +++ b/pytaxonkit.py @@ -1139,7 +1139,7 @@ def test_lca_unfound(capsys): assert "taxonkit lca --skip-unfound" in terminal.err -def test_lca_keep_invalid(capsys): +def test_lca_keep_invalid_single(capsys): assert lca([11111111], skip_deleted=True, skip_unfound=True) is None assert lca([22222222], skip_deleted=True, skip_unfound=True) is None assert lca([11111111], skip_deleted=True, skip_unfound=True, keep_invalid=True) == 0 From 6a8795a2f27670d77691e5e2d104a811dceed0da Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 8 Mar 2024 14:56:26 -0500 Subject: [PATCH 4/7] Fix CI config --- .github/workflows/cibuild.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cibuild.yml b/.github/workflows/cibuild.yml index f274f33..a6faff7 100644 --- a/.github/workflows/cibuild.yml +++ b/.github/workflows/cibuild.yml @@ -18,13 +18,13 @@ jobs: matrix: include: - os: ubuntu-latest - python-version: 3.8 + python-version: "3.8" - os: ubuntu-latest - python-version: 3.9 + python-version: "3.9" - os: ubuntu-latest - python-version: 3.10 + python-version: "3.10" - os: macos-latest - python-version: 3.9 + python-version: "3.9" steps: - uses: actions/checkout@v1 - uses: conda-incubator/setup-miniconda@v2 From 5280a755508070ee7857259c9be3f9f200c3f8c1 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 8 Mar 2024 15:02:18 -0500 Subject: [PATCH 5/7] Fix code style --- pytaxonkit.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pytaxonkit.py b/pytaxonkit.py index 237d3ab..88bf65d 100644 --- a/pytaxonkit.py +++ b/pytaxonkit.py @@ -1143,7 +1143,14 @@ def test_lca_keep_invalid_single(capsys): assert lca([11111111], skip_deleted=True, skip_unfound=True) is None assert lca([22222222], skip_deleted=True, skip_unfound=True) is None assert lca([11111111], skip_deleted=True, skip_unfound=True, keep_invalid=True) == 0 - assert lca([11111111, 22222222], skip_deleted=True, skip_unfound=True, keep_invalid=True, debug=True) == 0 + result = lca( + [11111111, 22222222], + skip_deleted=True, + skip_unfound=True, + keep_invalid=True, + debug=True, + ) + assert result == 0 terminal = capsys.readouterr() assert "taxonkit lca --skip-deleted --skip-unfound --keep-invalid" in terminal.err @@ -1155,7 +1162,7 @@ def test_lca_keep_invalid_multi(): [987654321], [743375, 123456789], [743375, 987654321], - [123456789, 987654321] + [123456789, 987654321], ] observed = lca(query, skip_deleted=True, skip_unfound=True, keep_invalid=True, multi=True) expected = [743375, 0, 0, 743375, 743375, 0] From 9b1f07e7e0aa21f4e51c1af2347a3201942bd438 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 8 Mar 2024 15:08:48 -0500 Subject: [PATCH 6/7] Troubleshoot CI --- .github/workflows/cibuild.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cibuild.yml b/.github/workflows/cibuild.yml index a6faff7..9b6fe62 100644 --- a/.github/workflows/cibuild.yml +++ b/.github/workflows/cibuild.yml @@ -36,7 +36,7 @@ jobs: run: | mkdir ~/.taxonkit pushd ~/.taxonkit - curl -L -O ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz + curl -L -O https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz tar -xzf taxdump.tar.gz popd - name: Install From 16aea6b5de4aa929309d0c332952675cc971018d Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 8 Mar 2024 15:11:51 -0500 Subject: [PATCH 7/7] Troubleshoot CI --- .github/workflows/cibuild.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cibuild.yml b/.github/workflows/cibuild.yml index 9b6fe62..e6e1aae 100644 --- a/.github/workflows/cibuild.yml +++ b/.github/workflows/cibuild.yml @@ -23,8 +23,8 @@ jobs: python-version: "3.9" - os: ubuntu-latest python-version: "3.10" - - os: macos-latest - python-version: "3.9" +# - os: macos-latest +# python-version: "3.9" steps: - uses: actions/checkout@v1 - uses: conda-incubator/setup-miniconda@v2