diff --git a/.gitignore b/.gitignore index d42b330..34c79cb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,136 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Output data from example script +output/ + build/ .vscode/ __pycache__/ diff --git a/README.md b/README.md index 1a57153..fd19ef8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,13 @@ # Caribou Alignment-free bacterial identification and classification in metagenomics sequencing data using machine learning. +## Proof of Concept +The jupyter notebook `workflow_example.ipynb` shows the workflow and it's output using example data. In this notebook, the steps are identified for better understanding. + +Data used in the `workflow_example.ipynb` is located in the `example_data/` folder. + +This data was also used to test and debug the Caribou analysis pipeline. + ## Installation The Caribou analysis pipeline was developped in python3 and can be easily installed through the python wheel. The repo must be cloned first and then the package can be installed using the following commands lines in the desired folder : ``` diff --git a/example_data/30_genomes.csv b/example_data/30_genomes.csv new file mode 100644 index 0000000..1652712 --- /dev/null +++ b/example_data/30_genomes.csv @@ -0,0 +1,31 @@ +id,species,genus,family,order,class,phylum,domain +VBOR01000009.1,WS-7 sp005893165,WS-7,SZUA-252,SZUA-252,RBG-16-71-46,Eisenbacteria,Bacteria +PMOP01000016.1,Palsa-360 sp003161495,Palsa-360,UBA7541,UBA7541,Acidobacteriae,Acidobacteriota,Bacteria +DHUT01000069.1,Sedimentibacter sp002409285,Sedimentibacter,Sedimentibacteraceae,Tissierellales,Clostridia,Firmicutes_A,Bacteria +JAAZAC010000025.1,Actinotalea sp012514545,Actinotalea,Cellulomonadaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +URUG01000300.1,Faecalimonas sp900550975,Faecalimonas,Lachnospiraceae,Lachnospirales,Clostridia,Firmicutes_A,Bacteria +CAJCBY010000033.1,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +JABXKY010000147.1,RBG-16-57-9 sp013619005,RBG-16-57-9,TCS64,TCS64,Bathyarchaeia,Thermoproteota,Archaea +JABBOX010000109.1,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JAABRC010000419.1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +CAAFRK010000216.1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +URSE01000035.1,Veillonella sp900550455,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +UQEY01000009.1,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +NZ_QEST01000278.1,Streptomyces sp003311645,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +JACMKV010000045.1,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +WLHF01000026.1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +DKBA01000026.1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +WBXD01000017.1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +PBSX01000072.1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +JAAYQI010000217.1,JAAYQI01 sp012519385,JAAYQI01,Anaerotignaceae,Lachnospirales,Clostridia,Firmicutes_A,Bacteria +PMSQ01000054.1,Sulfotelmatobacter sp003168355,Sulfotelmatobacter,Koribacteraceae,Acidobacteriales,Acidobacteriae,Acidobacteriota,Bacteria +NZ_LCZE01000023.1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +CAIXRL010000197.1,CAIXRL01 sp903921835,CAIXRL01,RBG-16-71-46,RBG-16-71-46,RBG-16-71-46,Eisenbacteria,Bacteria +JAAYXU010000041.1,JAAYXU01 sp012515725,JAAYXU01,UMGS416,Christensenellales,Clostridia_A,Firmicutes_A,Bacteria +DHMB01000127.1,CAG-841 sp002405565,CAG-841,CAG-272,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +JACNFQ010000081.1,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +CAJCHR010000269.1,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +QMMC01000579.1,B10-G4 sp003647065,B10-G4,SG8-38,Polyangiales,Polyangia,Myxococcota,Bacteria diff --git a/example_data/30_genomes.fna.gz b/example_data/30_genomes.fna.gz new file mode 100644 index 0000000..30087a2 Binary files /dev/null and b/example_data/30_genomes.fna.gz differ diff --git a/example_data/cucurbita_sample_3.csv b/example_data/cucurbita_sample_3.csv new file mode 100644 index 0000000..6a700de --- /dev/null +++ b/example_data/cucurbita_sample_3.csv @@ -0,0 +1,4 @@ +id,species,domain +NW_019663258,Cucurbita,host +NW_019657536,Cucurbita,host +NEWN01002765,Cucurbita,host diff --git a/example_data/cucurbita_sample_3.fna.gz b/example_data/cucurbita_sample_3.fna.gz new file mode 100644 index 0000000..fe8a36a Binary files /dev/null and b/example_data/cucurbita_sample_3.fna.gz differ diff --git a/example_data/metagenome.csv b/example_data/metagenome.csv new file mode 100644 index 0000000..ce6d1c8 --- /dev/null +++ b/example_data/metagenome.csv @@ -0,0 +1,299 @@ +id,species,genus,family,order,class,phylum,domain +VBOR01000009.1_0_7_1,WS-7 sp005893165,WS-7,SZUA-252,SZUA-252,RBG-16-71-46,Eisenbacteria,Bacteria +VBOR01000009.1_0_7_2,WS-7 sp005893165,WS-7,SZUA-252,SZUA-252,RBG-16-71-46,Eisenbacteria,Bacteria +VBOR01000009.1_1_7_1,WS-7 sp005893165,WS-7,SZUA-252,SZUA-252,RBG-16-71-46,Eisenbacteria,Bacteria +VBOR01000009.1_1_7_2,WS-7 sp005893165,WS-7,SZUA-252,SZUA-252,RBG-16-71-46,Eisenbacteria,Bacteria +PMOP01000016.1_0_7_1,Palsa-360 sp003161495,Palsa-360,UBA7541,UBA7541,Acidobacteriae,Acidobacteriota,Bacteria +PMOP01000016.1_0_7_2,Palsa-360 sp003161495,Palsa-360,UBA7541,UBA7541,Acidobacteriae,Acidobacteriota,Bacteria +PMOP01000016.1_1_7_1,Palsa-360 sp003161495,Palsa-360,UBA7541,UBA7541,Acidobacteriae,Acidobacteriota,Bacteria +PMOP01000016.1_1_7_2,Palsa-360 sp003161495,Palsa-360,UBA7541,UBA7541,Acidobacteriae,Acidobacteriota,Bacteria +PMOP01000016.1_2_7_1,Palsa-360 sp003161495,Palsa-360,UBA7541,UBA7541,Acidobacteriae,Acidobacteriota,Bacteria +PMOP01000016.1_2_7_2,Palsa-360 sp003161495,Palsa-360,UBA7541,UBA7541,Acidobacteriae,Acidobacteriota,Bacteria +DHUT01000069.1_0_7_1,Sedimentibacter sp002409285,Sedimentibacter,Sedimentibacteraceae,Tissierellales,Clostridia,Firmicutes_A,Bacteria +DHUT01000069.1_0_7_2,Sedimentibacter sp002409285,Sedimentibacter,Sedimentibacteraceae,Tissierellales,Clostridia,Firmicutes_A,Bacteria +JAAZAC010000025.1_0_7_1,Actinotalea sp012514545,Actinotalea,Cellulomonadaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JAAZAC010000025.1_0_7_2,Actinotalea sp012514545,Actinotalea,Cellulomonadaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +CAJCBY010000033.1_0_7_1,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_0_7_2,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_1_7_1,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_1_7_2,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_2_7_1,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_2_7_2,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_3_7_1,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_3_7_2,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_4_7_1,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_4_7_2,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_5_7_1,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +CAJCBY010000033.1_5_7_2,Aquirufa sp903960725,Aquirufa,Spirosomaceae,Cytophagales,Bacteroidia,Bacteroidota,Bacteria +JABXKY010000147.1_0_7_1,RBG-16-57-9 sp013619005,RBG-16-57-9,TCS64,TCS64,Bathyarchaeia,Thermoproteota,Archaea +JABXKY010000147.1_0_7_2,RBG-16-57-9 sp013619005,RBG-16-57-9,TCS64,TCS64,Bathyarchaeia,Thermoproteota,Archaea +JABXKY010000147.1_1_7_1,RBG-16-57-9 sp013619005,RBG-16-57-9,TCS64,TCS64,Bathyarchaeia,Thermoproteota,Archaea +JABXKY010000147.1_1_7_2,RBG-16-57-9 sp013619005,RBG-16-57-9,TCS64,TCS64,Bathyarchaeia,Thermoproteota,Archaea +JABXKY010000147.1_2_7_1,RBG-16-57-9 sp013619005,RBG-16-57-9,TCS64,TCS64,Bathyarchaeia,Thermoproteota,Archaea +JABXKY010000147.1_2_7_2,RBG-16-57-9 sp013619005,RBG-16-57-9,TCS64,TCS64,Bathyarchaeia,Thermoproteota,Archaea +JABBOX010000109.1_0_7_1,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_0_7_2,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_1_7_1,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_1_7_2,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_2_7_1,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_2_7_2,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_3_7_1,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_3_7_2,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_4_7_1,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_4_7_2,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_5_7_1,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_5_7_2,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_6_7_1,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JABBOX010000109.1_6_7_2,UBA4719 sp012927555,UBA4719,Dermatophilaceae,Actinomycetales,Actinomycetia,Actinobacteriota,Bacteria +JAABRC010000419.1_0_0_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_0_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_1_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_1_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_2_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_2_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_3_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_3_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_4_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_4_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_5_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_5_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_6_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_6_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_7_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_0_7_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_1_7_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_1_7_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_2_7_1,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JAABRC010000419.1_2_7_2,JAABRC01 sp011391115,JAABRC01,Burkholderiaceae,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +CAAFRK010000216.1_0_0_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_0_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_1_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_1_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_2_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_2_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_3_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_3_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_4_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_4_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_5_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_5_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_6_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_6_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_7_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_0_7_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_1_7_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_1_7_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_2_7_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_2_7_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_3_7_1,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +CAAFRK010000216.1_3_7_2,Veillonella sp900765235,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +URSE01000035.1_0_7_1,Veillonella sp900550455,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +URSE01000035.1_0_7_2,Veillonella sp900550455,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +URSE01000035.1_1_7_1,Veillonella sp900550455,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +URSE01000035.1_1_7_2,Veillonella sp900550455,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +URSE01000035.1_2_7_1,Veillonella sp900550455,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +URSE01000035.1_2_7_2,Veillonella sp900550455,Veillonella,Veillonellaceae,Veillonellales,Negativicutes,Firmicutes_C,Bacteria +UQEY01000009.1_0_7_1,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_0_7_2,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_1_7_1,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_1_7_2,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_2_7_1,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_2_7_2,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_3_7_1,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_3_7_2,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_4_7_1,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_4_7_2,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_5_7_1,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_5_7_2,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_6_7_1,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +UQEY01000009.1_6_7_2,Eubacterium_R sp900540235,Eubacterium_R,Acutalibacteraceae,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +NZ_LRTR01000260.1_0_7_1,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_0_7_2,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_1_7_1,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_1_7_2,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_2_7_1,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_2_7_2,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_3_7_1,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_3_7_2,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_4_7_1,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +NZ_LRTR01000260.1_4_7_2,Streptomyces europaeiscabiei,Streptomyces,Streptomycetaceae,Streptomycetales,Actinomycetia,Actinobacteriota,Bacteria +JACMKV010000045.1_0_7_1,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_0_7_2,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_1_7_1,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_1_7_2,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_2_7_1,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_2_7_2,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_3_7_1,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_3_7_2,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_4_7_1,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +JACMKV010000045.1_4_7_2,JACMKV01 sp014379915,JACMKV01,JACMKV01,Burkholderiales,Gammaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_0_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_0_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_1_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_1_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_2_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_2_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_3_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_3_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_4_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_4_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_5_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_5_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_6_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_6_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_7_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_0_7_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_1_7_1,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +CACNVV010000042.1_1_7_2,Pelagibacter sp902624015,Pelagibacter,Pelagibacteraceae,Pelagibacterales,Alphaproteobacteria,Proteobacteria,Bacteria +WLHF01000026.1_0_0_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_0_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_1_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_1_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_2_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_2_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_3_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_3_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_4_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_4_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_5_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_5_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_6_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_6_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_7_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_0_7_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_1_7_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_1_7_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_2_7_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_2_7_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_3_7_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_3_7_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_4_7_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_4_7_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_5_7_1,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +WLHF01000026.1_5_7_2,Planktophila sp009702835,Planktophila,Nanopelagicaceae,Nanopelagicales,Actinomycetia,Actinobacteriota,Bacteria +DKBA01000026.1_0_0_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_0_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_1_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_1_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_2_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_2_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_3_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_3_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_4_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_4_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_5_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_5_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_6_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_6_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_7_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_0_7_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_1_7_1,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +DKBA01000026.1_1_7_2,UBA6912 sp002450985,UBA6912,UBA5794,UBA5794,Acidimicrobiia,Actinobacteriota,Bacteria +WBXD01000017.1_0_0_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_0_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_1_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_1_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_2_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_2_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_3_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_3_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_4_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_4_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_5_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_5_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_6_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_6_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_7_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_0_7_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_1_7_1,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +WBXD01000017.1_1_7_2,UBA1315 sp008932935,UBA1315,Akkermansiaceae,Verrucomicrobiales,Verrucomicrobiae,Verrucomicrobiota,Bacteria +PBSX01000072.1_0_0_1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_0_2,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_1_1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_1_2,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_2_1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_2_2,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_3_1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_3_2,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_4_1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_4_2,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_5_1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_5_2,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_6_1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_6_2,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_7_1,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +PBSX01000072.1_0_7_2,CABZJG01 sp002726375,CABZJG01,Rhodobacteraceae,Rhodobacterales,Alphaproteobacteria,Proteobacteria,Bacteria +JAAYQI010000217.1_0_7_1,JAAYQI01 sp012519385,JAAYQI01,Anaerotignaceae,Lachnospirales,Clostridia,Firmicutes_A,Bacteria +JAAYQI010000217.1_0_7_2,JAAYQI01 sp012519385,JAAYQI01,Anaerotignaceae,Lachnospirales,Clostridia,Firmicutes_A,Bacteria +JAAYQI010000217.1_1_7_1,JAAYQI01 sp012519385,JAAYQI01,Anaerotignaceae,Lachnospirales,Clostridia,Firmicutes_A,Bacteria +JAAYQI010000217.1_1_7_2,JAAYQI01 sp012519385,JAAYQI01,Anaerotignaceae,Lachnospirales,Clostridia,Firmicutes_A,Bacteria +JAAYQI010000217.1_2_7_1,JAAYQI01 sp012519385,JAAYQI01,Anaerotignaceae,Lachnospirales,Clostridia,Firmicutes_A,Bacteria +JAAYQI010000217.1_2_7_2,JAAYQI01 sp012519385,JAAYQI01,Anaerotignaceae,Lachnospirales,Clostridia,Firmicutes_A,Bacteria +NZ_LCZE01000023.1_0_0_1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_0_2,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_1_1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_1_2,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_2_1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_2_2,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_3_1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_3_2,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_4_1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_4_2,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_5_1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_5_2,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_6_1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_6_2,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_7_1,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +NZ_LCZE01000023.1_0_7_2,Pseudomonas_E fluorescens_N,Pseudomonas_E,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_0_7_1,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_0_7_2,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_1_7_1,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_1_7_2,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_2_7_1,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_2_7_2,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_3_7_1,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_3_7_2,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_4_7_1,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_4_7_2,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_5_7_1,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +DNMQ01000225.1_5_7_2,Pseudomonas_A sp003488145,Pseudomonas_A,Pseudomonadaceae,Pseudomonadales,Gammaproteobacteria,Proteobacteria,Bacteria +CAIXRL010000197.1_0_7_1,CAIXRL01 sp903921835,CAIXRL01,RBG-16-71-46,RBG-16-71-46,RBG-16-71-46,Eisenbacteria,Bacteria +CAIXRL010000197.1_0_7_2,CAIXRL01 sp903921835,CAIXRL01,RBG-16-71-46,RBG-16-71-46,RBG-16-71-46,Eisenbacteria,Bacteria +CAIXRL010000197.1_1_7_1,CAIXRL01 sp903921835,CAIXRL01,RBG-16-71-46,RBG-16-71-46,RBG-16-71-46,Eisenbacteria,Bacteria +CAIXRL010000197.1_1_7_2,CAIXRL01 sp903921835,CAIXRL01,RBG-16-71-46,RBG-16-71-46,RBG-16-71-46,Eisenbacteria,Bacteria +JAAYXU010000041.1_0_7_1,JAAYXU01 sp012515725,JAAYXU01,UMGS416,Christensenellales,Clostridia_A,Firmicutes_A,Bacteria +JAAYXU010000041.1_0_7_2,JAAYXU01 sp012515725,JAAYXU01,UMGS416,Christensenellales,Clostridia_A,Firmicutes_A,Bacteria +DHMB01000127.1_0_7_1,CAG-841 sp002405565,CAG-841,CAG-272,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +DHMB01000127.1_0_7_2,CAG-841 sp002405565,CAG-841,CAG-272,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +DHMB01000127.1_1_7_1,CAG-841 sp002405565,CAG-841,CAG-272,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +DHMB01000127.1_1_7_2,CAG-841 sp002405565,CAG-841,CAG-272,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +DHMB01000127.1_2_7_1,CAG-841 sp002405565,CAG-841,CAG-272,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +DHMB01000127.1_2_7_2,CAG-841 sp002405565,CAG-841,CAG-272,Oscillospirales,Clostridia,Firmicutes_A,Bacteria +JACNFQ010000081.1_0_7_1,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_0_7_2,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_1_7_1,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_1_7_2,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_2_7_1,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_2_7_2,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_3_7_1,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_3_7_2,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_4_7_1,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_4_7_2,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_5_7_1,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_5_7_2,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_6_7_1,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +JACNFQ010000081.1_6_7_2,NIOZ-UU106 sp014384545,NIOZ-UU106,UBA6624,UBA6624,UBA6624,UBP7,Bacteria +CAJCHR010000269.1_0_7_1,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_0_7_2,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_1_7_1,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_1_7_2,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_2_7_1,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_2_7_2,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_3_7_1,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_3_7_2,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_4_7_1,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_4_7_2,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_5_7_1,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +CAJCHR010000269.1_5_7_2,Novosphingobium sp903970225,Novosphingobium,Sphingomonadaceae,Sphingomonadales,Alphaproteobacteria,Proteobacteria,Bacteria +QMMC01000579.1_0_7_1,B10-G4 sp003647065,B10-G4,SG8-38,Polyangiales,Polyangia,Myxococcota,Bacteria +QMMC01000579.1_0_7_2,B10-G4 sp003647065,B10-G4,SG8-38,Polyangiales,Polyangia,Myxococcota,Bacteria diff --git a/example_data/metagenome.fna.gz b/example_data/metagenome.fna.gz new file mode 100644 index 0000000..6dec987 Binary files /dev/null and b/example_data/metagenome.fna.gz differ diff --git a/setup.cfg b/setup.cfg index b9acfc7..65ba271 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = Caribou -version = 1.0.6 +version = 1.1.0 url = https://github.com/bioinfoUQAM/Caribou/wiki author = Nicolas de Montigny author_email = de_montigny.nicolas@courrier.uqam.ca diff --git a/src/Caribou_pipeline.py b/src/Caribou_pipeline.py index cc1f3ac..299df92 100644 --- a/src/Caribou_pipeline.py +++ b/src/Caribou_pipeline.py @@ -90,8 +90,8 @@ def caribou(opt): verify_boolean(report, 'output in abundance report form') # Check batch_size - if multi_classifier in ['cnn','widecnn'] and training_batch_size < 20: - training_batch_size = 20 + # if multi_classifier in ['cnn','widecnn'] and training_batch_size < 20: + # training_batch_size = 20 # Folders creation for output outdirs = define_create_outdirs(outdir) @@ -101,7 +101,7 @@ def caribou(opt): _system_config = { 'object_spilling_config': json.dumps( - {'type': 'filesystem', 'params': {'directory_path': str(opt['workdir'])}}) + {'type': 'filesystem', 'params': {'directory_path': str(workdir)}}) } ) @@ -204,7 +204,6 @@ def caribou(opt): metagenome ) - # Part 4 - Outputs for biological analysis of bacterial population ################################################################################ diff --git a/src/data/build_data.py b/src/data/build_data.py index 7b6c32e..022d8ed 100644 --- a/src/data/build_data.py +++ b/src/data/build_data.py @@ -28,7 +28,7 @@ def build_load_save_data(file, hostfile, prefix, dataset, host, kmers_list=None, if os.path.isfile(data_file) and os.path.isfile(data_file_host) and isinstance(hostfile, tuple): data = load_Xy_data(data_file) data_host = load_Xy_data(data_file_host) - elif os.path.isfile(data_file): + elif os.path.isfile(data_file) : data = load_Xy_data(data_file) else: # Build Xy_data of database @@ -53,7 +53,7 @@ def build_load_save_data(file, hostfile, prefix, dataset, host, kmers_list=None, nb_features_keep = nb_features_keep) save_Xy_data(data, data_file) - # Assign kmers_list to variable ater extracting database data + # Assign kmers_list to variable after extracting database data if kmers_list is None: kmers_list = data['kmers'] diff --git a/src/models/classification.py b/src/models/classification.py index ec7593e..5cf1bd1 100644 --- a/src/models/classification.py +++ b/src/models/classification.py @@ -137,12 +137,11 @@ def _train_model(self, taxa): self._binary_training(taxa) else: self._multiclass_training(taxa) - if isinstance(self.models[taxa], KerasTFModel): - for file in glob(os.path.join(self._outdirs['data_dir'], '*sim*')): - if os.path.isdir(file): - rmtree(file) - else: - os.remove(file) + for file in glob(os.path.join(self._outdirs['data_dir'], '*sim*')): + if os.path.isdir(file): + rmtree(file) + else: + os.remove(file) def _binary_training(self, taxa): print('_binary_training') @@ -516,7 +515,6 @@ def _sim_4_cv(self, df, kmers_ds, name): cv_sim = readsSimulation(kmers_ds['fasta'], cls, sim_cls_dct['id'], 'miseq', sim_outdir, name) sim_data = cv_sim.simulation(self._k, self._database_data['kmers']) sim_ids = sim_data['ids'] - sim_ids = sim_data['ids'] sim_cls = pd.DataFrame({'sim_id':sim_ids}, dtype = object) sim_cls['id'] = sim_cls['sim_id'].str.replace('_[0-9]+_[0-9]+_[0-9]+', '', regex=True) sim_cls = sim_cls.set_index('id').join(cls.set_index('id')) diff --git a/src/models/kerasTF/ray_keras_tf.py b/src/models/kerasTF/ray_keras_tf.py index ec8617c..4924f2e 100644 --- a/src/models/kerasTF/ray_keras_tf.py +++ b/src/models/kerasTF/ray_keras_tf.py @@ -191,7 +191,6 @@ def _sim_4_val(self, df, kmers_ds, name): cv_sim = readsSimulation(kmers_ds['fasta'], cls, sim_genomes, 'miseq', sim_outdir, name) sim_data = cv_sim.simulation(self.k, self.kmers) sim_ids = sim_data['ids'] - sim_ids = sim_data['ids'] sim_cls = pd.DataFrame({'sim_id':sim_ids}, dtype = object) sim_cls['id'] = sim_cls['sim_id'].str.replace('_[0-9]+_[0-9]+_[0-9]+', '', regex=True) sim_cls = sim_cls.set_index('id').join(cls.set_index('id')) @@ -247,13 +246,6 @@ def _fit_model(self, datasets): 'model': self.classifier } - print(f'num_workers : {self._n_workers}') - print(f'nb_CPU_data : {self._nb_CPU_data}') - print(f'nb_CPU_training : {self._nb_CPU_training}') - print(f'nb_GPU : {self._nb_GPU}') - print(f'nb_CPU_per_worker : {self._nb_CPU_per_worker}') - print(f'nb_GPU_per_worker : {self._nb_GPU_per_worker}') - # Define trainer / tuner self._trainer = TensorflowTrainer( train_loop_per_worker = train_func, @@ -313,13 +305,9 @@ def predict(self, df, threshold=0.8, cv=False): len(self.kmers) ) - print('predictions after batch_prediction :', predictions.to_pandas()) - # Convert predictions to labels predictions = self._prob_2_cls(predictions, threshold) - print('predictions after probs_2_cls :', predictions) - return self._label_decode(predictions) else: raise ValueError('No data to predict') @@ -440,6 +428,7 @@ def build_model(classifier, nb_cls, nb_kmers): model = build_wideCNN(nb_kmers, nb_cls) return model +""" def batch_predict_val(checkpoint, batch, clf, batch_size, nb_classes, nb_kmers): def convert_logits_to_classes(df): best_class = df["predictions"].map(lambda x: np.array(x).argmax()) @@ -470,6 +459,7 @@ def calculate_prediction_scores(df): ) return correct_dataset +""" def batch_prediction(checkpoint, batch, clf, batch_size, nb_classes, nb_kmers): predictor = BatchPredictor.from_checkpoint( diff --git a/workflow_example.ipynb b/workflow_example.ipynb new file mode 100644 index 0000000..69f1606 --- /dev/null +++ b/workflow_example.ipynb @@ -0,0 +1,1672 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Environment" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "rm -r output/" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import gc\n", + "import ray\n", + "import json\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "from pathlib import Path\n", + "from src.utils import *\n", + "\n", + "# K-mers\n", + "from src.data.seq_collections import SeqCollection\n", + "from src.data.kmers_collection import KmersCollection\n", + "\n", + "# Preprocessing\n", + "from src.models.kerasTF.ray_one_hot_tensor import OneHotTensorEncoder\n", + "from ray.data.preprocessors import Chain, LabelEncoder\n", + "from src.models.ray_tensor_min_max import TensorMinMaxScaler\n", + "from src.models.sklearn.ray_sklearn_onesvm_encoder import OneClassSVMLabelEncoder\n", + "\n", + "# Training\n", + "import tensorflow as tf\n", + "from ray.air import session\n", + "from sklearn.linear_model import SGDOneClassSVM\n", + "from ray.air.integrations.keras import Callback\n", + "from src.models.kerasTF.build_neural_networks import *\n", + "from src.models.sklearn.ray_sklearn_partial_trainer import SklearnPartialTrainer\n", + "from ray.train.tensorflow import TensorflowTrainer, TensorflowCheckpoint, prepare_dataset_shard\n", + "\n", + "# Tuning\n", + "from ray.air.config import RunConfig, ScalingConfig, DatasetConfig\n", + "\n", + "# Predicting\n", + "from ray.train.sklearn import SklearnPredictor\n", + "from ray.train.tensorflow import TensorflowPredictor\n", + "from ray.train.batch_predictor import BatchPredictor\n", + "from joblib import Parallel, delayed, parallel_backend\n", + "\n", + "# Simulation\n", + "from src.models.reads_simulation import readsSimulation\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parameters / global variables" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Definition" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Names\n", + "db_name = '30_genomes'\n", + "host_name = 'cucurbita_3'\n", + "metagenome_name = 'metagenome'\n", + "# IO\n", + "db_fasta = 'example_data/30_genomes.fna.gz'\n", + "db_cls = 'example_data/30_genomes.csv'\n", + "host_fasta = 'example_data/cucurbita_sample_3.fna.gz'\n", + "host_cls = 'example_data/cucurbita_sample_3.csv'\n", + "metagenome_fasta = 'example_data/metagenome.fna.gz'\n", + "metagenome_cls = 'example_data/metagenome.csv'\n", + "outdir = 'output/'\n", + "workdir = '/tmp/spill'\n", + "# Settings\n", + "klen = 5\n", + "batch_size = 1\n", + "epochs = 10\n", + "classif_threshold = 0.8\n", + "features_threshold = np.inf\n", + "nb_features = np.inf" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Verification" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# IO\n", + "verify_seqfiles(db_fasta,host_fasta)\n", + "verify_fasta(metagenome_fasta)\n", + "verify_file(db_cls)\n", + "verify_file(host_cls)\n", + "verify_file(metagenome_cls)\n", + "# Settings\n", + "verify_positive_int(klen, 'kmers length')\n", + "verify_positive_int(batch_size, 'training batch size')\n", + "verify_positive_int(epochs, 'number of iterations in neural networks training')\n", + "verify_0_1(classif_threshold, 'classification threshold')\n", + "# Folders creation for output\n", + "if not os.path.isdir(outdir):\n", + " os.makedirs(outdir)\n", + "outdirs = define_create_outdirs(outdir)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cluster initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 15:50:18,083\tINFO worker.py:1529 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265 \u001b[39m\u001b[22m\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + "

Ray

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "
Python version:3.8.10
Ray version: 2.2.0
Dashboard:http://127.0.0.1:8265
\n", + "
\n", + "
\n" + ], + "text/plain": [ + "RayContext(dashboard_url='127.0.0.1:8265', python_version='3.8.10', ray_version='2.2.0', ray_commit='b6af0887ee5f2e460202133791ad941a41f15beb', address_info={'node_ip_address': '192.168.65.207', 'raylet_ip_address': '192.168.65.207', 'redis_address': None, 'object_store_address': '/tmp/ray/session_2023-03-13_15-50-16_029330_21098/sockets/plasma_store', 'raylet_socket_name': '/tmp/ray/session_2023-03-13_15-50-16_029330_21098/sockets/raylet', 'webui_url': '127.0.0.1:8265', 'session_dir': '/tmp/ray/session_2023-03-13_15-50-16_029330_21098', 'metrics_export_port': 63058, 'gcs_address': '192.168.65.207:65125', 'address': '192.168.65.207:65125', 'dashboard_agent_listen_port': 52365, 'node_id': 'd571557535c8ecc41d103c8ee18b50f1f1cdf8deedd4632d2bd18525'})" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ray.init(\n", + " _system_config={\n", + " 'object_spilling_config': json.dumps(\n", + " {'type': 'filesystem', 'params': {'directory_path': str(workdir)}})\n", + " }\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# K-mers" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Local variables" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "db_kmers_data = {}\n", + "db_seqdata = None\n", + "db_kmers_collection = None\n", + "host_kmers_data = {}\n", + "host_seqdata = None\n", + "host_kmers_collection = None\n", + "metagenome_kmers_data = {}\n", + "metagenome_seqdata = None\n", + "metagenome_kmers_collection = None" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Filenames" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Filenames\n", + "db_profile_file = os.path.join(outdirs['data_dir'], f'profile_genome_{db_name}_data_K{klen}')\n", + "host_profile_file = os.path.join(outdirs['data_dir'], f'profile_genome_{host_name}_data_K{klen}')\n", + "metagenome_profile_file = os.path.join(outdirs['data_dir'], f'profile_genome_{metagenome_name}_data_K{klen}')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Seqdata" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "db_seqdata = SeqCollection((db_fasta, db_cls))\n", + "host_seqdata = SeqCollection((host_fasta, host_cls))\n", + "metagenome_seqdata = SeqCollection(metagenome_fasta)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### K-mers collections" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "seen_kmers\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 30 out of 30 | elapsed: 1.7s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_batch_read_write_seen\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Map_Batches: 100%|██████████| 29/29 [00:00<00:00, 102.58it/s]\n", + "Repartition: 100%|██████████| 29/29 [00:00<00:00, 746.53it/s]\n", + "Repartition: 100%|██████████| 29/29 [00:00<00:00, 768.93it/s]\n", + "Repartition: 100%|██████████| 29/29 [00:00<00:00, 686.86it/s]\n", + "Write Progress: 100%|██████████| 29/29 [00:00<00:00, 348.20it/s]\n", + "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "given_kmers\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Done 3 out of 3 | elapsed: 0.4s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_batch_read_write_given\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Repartition: 100%|██████████| 3/3 [00:00<00:00, 351.92it/s]\n", + "Repartition: 100%|██████████| 3/3 [00:00<00:00, 462.91it/s]\n", + "Repartition: 100%|██████████| 3/3 [00:00<00:00, 410.64it/s]\n", + "Write Progress: 100%|██████████| 3/3 [00:00<00:00, 162.88it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "given_kmers\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 2.5s\n", + "[Parallel(n_jobs=-1)]: Done 184 tasks | elapsed: 10.6s\n", + "[Parallel(n_jobs=-1)]: Done 298 out of 298 | elapsed: 17.6s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_batch_read_write_given\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Repartition: 100%|██████████| 298/298 [00:00<00:00, 1246.26it/s]\n", + "Repartition: 100%|██████████| 298/298 [00:00<00:00, 1496.60it/s]\n", + "Repartition: 100%|██████████| 298/298 [00:00<00:00, 1387.83it/s]\n", + "Write Progress: 100%|██████████| 298/298 [00:01<00:00, 246.96it/s]\n" + ] + } + ], + "source": [ + "db_kmers_collection = KmersCollection(\n", + " db_seqdata,\n", + " db_profile_file,\n", + " klen,\n", + " db_name,\n", + " None,\n", + " features_threshold,\n", + " nb_features\n", + ")\n", + "kmers_list = db_kmers_collection.kmers_list\n", + "host_kmers_collection = KmersCollection(\n", + " host_seqdata,\n", + " host_profile_file,\n", + " klen,\n", + " host_name,\n", + " kmers_list,\n", + " np.inf,\n", + " np.inf\n", + ")\n", + "metagenome_kmers_collection = KmersCollection(\n", + " metagenome_seqdata,\n", + " metagenome_profile_file,\n", + " klen,\n", + " metagenome_name,\n", + " kmers_list\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### K-mers data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "db_kmers_data['profile'] = db_kmers_collection.Xy_file\n", + "db_kmers_data['ids'] = db_kmers_collection.ids\n", + "db_kmers_data['classes'] = db_kmers_collection.classes\n", + "db_kmers_data['kmers'] = kmers_list\n", + "db_kmers_data['taxas'] = db_kmers_collection.taxas\n", + "db_kmers_data['fasta'] = db_fasta\n", + "#\n", + "host_kmers_data['profile'] = host_kmers_collection.Xy_file\n", + "host_kmers_data['ids'] = host_kmers_collection.ids\n", + "host_kmers_data['classes'] = host_kmers_collection.classes\n", + "host_kmers_data['kmers'] = kmers_list\n", + "host_kmers_data['taxas'] = host_kmers_collection.taxas\n", + "host_kmers_data['fasta'] = host_fasta\n", + "#\n", + "metagenome_kmers_data['profile'] = metagenome_kmers_collection.Xy_file\n", + "metagenome_kmers_data['ids'] = metagenome_kmers_collection.ids\n", + "metagenome_kmers_data['kmers'] = kmers_list" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bacteria isolation - OneClassSVM" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### DB + Host merging" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Read progress: 100%|██████████| 19/19 [00:00<00:00, 2026.08it/s]\n", + "Write Progress: 100%|██████████| 19/19 [00:00<00:00, 39.44it/s]\n" + ] + } + ], + "source": [ + "merged_kmers_data = {}\n", + "\n", + "merged_kmers_data['profile'] = \"{}_host_merged\".format(os.path.splitext(db_kmers_data[\"profile\"])[0]) # Kmers profile\n", + "\n", + "merged_cls = pd.DataFrame(db_kmers_data[\"classes\"], columns=db_kmers_data[\"taxas\"])\n", + "df_cls_host = pd.DataFrame(host_kmers_data[\"classes\"], columns=host_kmers_data[\"taxas\"])\n", + "\n", + "if len(np.unique(merged_cls['domain'])) != 1:\n", + " merged_cls[merged_cls != 'bacteria'] = 'bacteria'\n", + "if len(df_cls_host) > len(host_kmers_data['ids']):\n", + " to_remove = np.arange(len(df_cls_host) - len(host_kmers_data['ids']))\n", + " df_cls_host.drop(to_remove, axis=0, inplace=True)\n", + "elif len(df_cls_host) < len(host_kmers_data['ids']):\n", + " diff = len(host_kmers_data['ids']) - len(df_cls_host)\n", + " row = df_cls_host.iloc[0]\n", + " for i in range(diff):\n", + " df_cls_host = pd.concat([df_cls_host, row.to_frame().T], ignore_index=True)\n", + "\n", + "merged_cls = pd.concat([merged_cls, df_cls_host], ignore_index=True)\n", + "merged_kmers_data['classes'] = np.array(merged_cls) # Class labels\n", + "merged_kmers_data['ids'] = np.concatenate((db_kmers_data[\"ids\"], host_kmers_data[\"ids\"])) # IDs\n", + "merged_kmers_data['kmers'] = db_kmers_data[\"kmers\"] # Features\n", + "merged_kmers_data['taxas'] = db_kmers_data[\"taxas\"] # Known taxas for classification\n", + "merged_kmers_data['fasta'] = (db_kmers_data['fasta'], host_kmers_data['fasta']) # Fasta file needed for reads simulation\n", + "\n", + "merged_df = db_df.union(host_df)\n", + "merged_df.write_parquet(merged_kmers_data['profile'])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Read: 100%|██████████| 19/19 [00:00<00:00, 245.11it/s]\n", + "Repartition: 100%|██████████| 32/32 [00:00<00:00, 959.07it/s]\n", + "Repartition: 100%|██████████| 32/32 [00:00<00:00, 901.95it/s]\n", + "Repartition: 100%|██████████| 19/19 [00:00<00:00, 478.89it/s]\n", + "Repartition: 100%|██████████| 19/19 [00:00<00:00, 555.11it/s]\n" + ] + } + ], + "source": [ + "merged_df = zip_X_y(merged_df, merged_cls)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "encoded = []\n", + "labels = []" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "preprocessor = Chain(\n", + " TensorMinMaxScaler(db_kmers_data['kmers']),\n", + " OneClassSVMLabelEncoder('domain')\n", + ")\n", + "encoded = np.array([1,-1], dtype = np.int32)\n", + "labels = np.array(['bacteria','unknown'],dtype = object)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Read: 100%|██████████| 16/16 [00:00<00:00, 234.67it/s]\n", + "Repartition: 100%|██████████| 29/29 [00:00<00:00, 894.83it/s]\n", + "Repartition: 100%|██████████| 29/29 [00:00<00:00, 754.86it/s]\n", + "Repartition: 100%|██████████| 16/16 [00:00<00:00, 399.82it/s]\n", + "Repartition: 100%|██████████| 16/16 [00:00<00:00, 451.36it/s]\n" + ] + } + ], + "source": [ + "db_cls = pd.read_csv(db_cls)\n", + "if db_df.count() != len(db_cls):\n", + " db_ids = []\n", + " for row in db_df.iter_rows():\n", + " db_ids.append(row['id'])\n", + " db_cls = db_cls[db_cls['id'].isin(db_ids)]\n", + "for col in db_cls.columns:\n", + " db_cls[col] = db_cls[col].str.lower()\n", + "db_df = zip_X_y(db_df, db_cls)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 61.55it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 77.11it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 113.17it/s]\n" + ] + } + ], + "source": [ + "preprocessor.fit(db_df)\n", + "db_df = preprocessor.transform(db_df)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "clf = SGDOneClassSVM()\n", + "train_params = {\n", + " 'nu': 0.1,\n", + " 'learning_rate': 'invscaling',\n", + " 'eta0': 1000,\n", + " 'tol': 1e-4\n", + "}\n", + "trainer = SklearnPartialTrainer(\n", + " estimator = clf,\n", + " label_column = 'domain',\n", + " labels_list = np.array([0,1], dtype = np.int32),\n", + " features_list = db_kmers_data['kmers'],\n", + " params = train_params,\n", + " datasets = {'train' : ray.put(db_df)},\n", + " batch_size = batch_size,\n", + " set_estimator_cpus = True,\n", + " scaling_config = ScalingConfig(\n", + " trainer_resources={\n", + " 'CPU': int(os.cpu_count()*0.8)\n", + " }\n", + " ),\n", + " run_config = RunConfig(\n", + " name = 'OneClassSVM',\n", + " local_dir = workdir\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
\n", + "

Tune Status

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Current time:2023-03-13 15:50:53
Running for: 00:00:02.02
Memory: 6.4/12.4 GiB
\n", + "
\n", + "
\n", + "
\n", + "

System Info

\n", + " Using FIFO scheduling algorithm.
Resources requested: 6.0/8 CPUs, 0/0 GPUs, 0.0/4.19 GiB heap, 0.0/2.1 GiB objects\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "

Trial Status

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc
SklearnPartialTrainer_5b9c2_00000RUNNING 192.168.65.207:28446
\n", + "
\n", + "
\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 15:50:55,186\tINFO tune.py:762 -- Total run time: 3.97 seconds (3.81 seconds for the tuning loop).\n" + ] + } + ], + "source": [ + "training_result = trainer.fit()\n", + "model_ckpt = training_result.checkpoint" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predicting" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Read->Map_Batches: 100%|██████████| 16/16 [00:01<00:00, 9.14it/s]\n", + "Map Progress (1 actors 1 pending): 100%|██████████| 16/16 [00:02<00:00, 6.15it/s]\n" + ] + } + ], + "source": [ + "metagenome_df = preprocessor.preprocessors[0].transform(metagenome_df)\n", + "predictor = BatchPredictor.from_checkpoint(model_ckpt, SklearnPredictor)\n", + "predictions = predictor.predict(metagenome_df, batch_size = batch_size)\n", + "predictions = np.array(predictions.to_pandas()).reshape(-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "predictions_onesvm = pd.Series(np.empty(len(predictions), dtype=object))\n", + "predictions_onesvm[predictions == 1] = 'bacteria'\n", + "predictions_onesvm[predictions == -1] = 'unknown'\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bacteria classification - WideCNN" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Metadata Fetch Progress: 0%| | 0/4 [00:00 0:\n", + " use_gpu = True\n", + " n_workers = nb_GPU\n", + " nb_CPU_per_worker = int(nb_CPU_training / n_workers)\n", + " nb_GPU_per_worker = 1\n", + "else:\n", + " use_gpu = False\n", + " n_workers = int(nb_CPU_training * 0.2)\n", + " nb_CPU_per_worker = int(int(nb_CPU_training * 0.8) / n_workers)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 136.66it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:01<00:00, 14.63it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 114.35it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 85.52it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 123.06it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 16.76it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 144.19it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 130.22it/s]\n" + ] + } + ], + "source": [ + "db_df = db_df.drop_columns(['id'])\n", + "db_df = preprocessor.transform(db_df)\n", + "val_df = val_df.drop_columns(['id'])\n", + "val_df = preprocessor.transform(val_df)\n", + "datasets = {'train': db_df, 'validation': val_df}" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "train_params = {\n", + " 'batch_size': batch_size,\n", + " 'epochs': epochs,\n", + " 'size': len(db_kmers_data['kmers']),\n", + " 'nb_cls': nb_cls,\n", + " 'model': 'widecnn'\n", + "}\n", + "trainer = TensorflowTrainer(\n", + " train_loop_per_worker = train_func,\n", + " train_loop_config = train_params,\n", + " scaling_config = ScalingConfig(\n", + " trainer_resources={'CPU': nb_CPU_data},\n", + " num_workers = n_workers,\n", + " use_gpu = use_gpu,\n", + " resources_per_worker={\n", + " 'CPU': nb_CPU_per_worker,\n", + " 'GPU': nb_GPU_per_worker\n", + " }\n", + " ),\n", + " dataset_config = {\n", + " 'train': DatasetConfig(\n", + " fit = False,\n", + " transform = False,\n", + " split = True,\n", + " use_stream_api = True\n", + " ),\n", + " 'validation': DatasetConfig(\n", + " fit = False,\n", + " transform = False,\n", + " split = True,\n", + " use_stream_api = False\n", + " )\n", + " },\n", + " run_config = RunConfig(\n", + " name = 'WideCNN',\n", + " local_dir = workdir,\n", + " ),\n", + " datasets = datasets,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "
\n", + "

Tune Status

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Current time:2023-03-13 15:51:19
Running for: 00:00:03.36
Memory: 6.4/12.4 GiB
\n", + "
\n", + "
\n", + "
\n", + "

System Info

\n", + " Using FIFO scheduling algorithm.
Resources requested: 6.0/8 CPUs, 0/0 GPUs, 0.0/4.19 GiB heap, 0.0/2.1 GiB objects\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + "

Trial Status

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name status loc
TensorflowTrainer_6ab93_00000RUNNING 192.168.65.207:29527
\n", + "
\n", + "
\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(TensorflowTrainer pid=29527)\u001b[0m 2023-03-13 15:51:22,846\tINFO dataset.py:3693 -- Created DatasetPipeline with 1 windows: 0.13MiB min, 0.13MiB max, 0.13MiB mean\n", + "\u001b[2m\u001b[36m(TensorflowTrainer pid=29527)\u001b[0m 2023-03-13 15:51:22,846\tINFO dataset.py:3703 -- Blocks per window: 16 min, 16 max, 16 mean\n", + "\u001b[2m\u001b[36m(TensorflowTrainer pid=29527)\u001b[0m 2023-03-13 15:51:22,848\tINFO dataset.py:3725 -- ✔️ This pipeline's per-window parallelism is high enough to fully utilize the cluster.\n", + "\u001b[2m\u001b[36m(TensorflowTrainer pid=29527)\u001b[0m 2023-03-13 15:51:22,848\tINFO dataset.py:3742 -- ✔️ This pipeline's windows likely fit in object store memory without spilling.\n", + "Stage 0: 0%| | 0/1 [00:00\n", + "

Trial Progress

\n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Trial name _time_this_iter_s _timestamp _training_iteration accuracydate done episodes_total experiment_id hostname iterations_since_restore lossnode_ip pidshould_checkpoint time_since_restore time_this_iter_s time_total_s timestamp timesteps_since_restoretimesteps_total training_iterationtrial_id val_accuracy val_loss warmup_time
TensorflowTrainer_6ab93_00000 12.5321 1678737113 3 0.06896552023-03-13_15-51-53False c4fd5b07b4aa4d6481b9e7439f6334fbDESKTOP-TM4J0AE 33.33265192.168.65.20729527True 33.5966 12.5317 33.5966 1678737113 0 36ab93_00000 0 3.33878 0.258061
\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 5it [00:17, 4.29s/it]\u001b[Ainator pid=29591)\u001b[0m \n", + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 6it [00:29, 6.58s/it]\u001b[Ainator pid=29591)\u001b[0m \n", + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 7it [00:42, 8.31s/it]\u001b[Ainator pid=29591)\u001b[0m \n", + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 8it [00:55, 9.67s/it]\u001b[Ainator pid=29591)\u001b[0m \n", + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 9it [01:09, 10.72s/it]\u001b[Ainator pid=29591)\u001b[0m \n", + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 10it [01:21, 11.36s/it][Ainator pid=29591)\u001b[0m \n", + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 11it [01:35, 12.05s/it]\u001b[Anator pid=29591)\u001b[0m \n", + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 12it [01:49, 12.47s/it]\u001b[Anator pid=29591)\u001b[0m \n", + "\u001b[2m\u001b[36m(PipelineSplitExecutorCoordinator pid=29591)\u001b[0m \n", + "Stage 0: : 13it [02:02, 12.89s/it]\u001b[Anator pid=29591)\u001b[0m \n", + "2023-03-13 15:53:43,102\tINFO tune.py:762 -- Total run time: 146.51 seconds (146.39 seconds for the tuning loop).\n" + ] + } + ], + "source": [ + "training_result = trainer.fit()\n", + "model_ckpt = training_result.best_checkpoints[0][0]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predicting" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Map_Batches: 100%|██████████| 16/16 [00:00<00:00, 110.38it/s]\n", + "Map_Batches: 100%|██████████| 16/16 [00:02<00:00, 6.19it/s]\n", + "Map Progress (7 actors 2 pending): 100%|██████████| 16/16 [00:39<00:00, 2.44s/it]\n" + ] + } + ], + "source": [ + "if len(metagenome_df.schema().names) > 1:\n", + " col_2_drop = [col for col in metagenome_df.schema().names if col != '__value__']\n", + " metagenome_df = metagenome_df.drop_columns(col_2_drop)\n", + "\n", + "metagenome_df = preprocessor.preprocessors[0].transform(metagenome_df)\n", + "\n", + "predictor = BatchPredictor.from_checkpoint(\n", + " model_ckpt,\n", + " TensorflowPredictor,\n", + " model_definition = lambda: build_wideCNN(len(db_kmers_data['kmers']), nb_cls)\n", + ")\n", + "predictions = predictor.predict(\n", + " data = metagenome_df,\n", + " batch_size = batch_size\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "def map_predicted_label_multiclass(df, threshold):\n", + " predict = pd.DataFrame({\n", + " 'best_proba': [df['predictions'][i][np.argmax(df['predictions'][i])] for i in range(len(df))],\n", + " 'predicted_label': df[\"predictions\"].map(lambda x: np.array(x).argmax())\n", + " })\n", + " predict.loc[predict['best_proba'] < threshold, 'predicted_label'] = -1\n", + " return predict['predicted_label'].to_numpy(dtype = np.int32)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.\n", + "[Parallel(n_jobs=-1)]: Done 34 tasks | elapsed: 0.3s\n", + "[Parallel(n_jobs=-1)]: Done 184 tasks | elapsed: 0.9s\n", + "[Parallel(n_jobs=-1)]: Done 298 out of 298 | elapsed: 1.3s finished\n" + ] + } + ], + "source": [ + "with parallel_backend('threading'):\n", + " predict = Parallel(n_jobs=-1, prefer='threads', verbose=1)(\n", + " delayed(map_predicted_label_multiclass)(batch, classif_threshold) for batch in predictions.iter_batches(batch_size = batch_size))\n", + "\n", + "predictions = np.concatenate(predict)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "encoded = []\n", + "encoded.append(-1)\n", + "labels = ['unknown']\n", + "for k, v in preprocessor.preprocessors[1].stats_['unique_values({})'.format('genus')].items():\n", + " encoded.append(v)\n", + " labels.append(k)\n", + "predictions_widecnn = pd.Series(np.empty(len(predictions), dtype=object))\n", + "for label, coded in zip(labels, encoded):\n", + " predictions_widecnn[predictions == coded] = label" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Results" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
classificationabundance
0bacteria298
\n", + "
" + ], + "text/plain": [ + " classification abundance\n", + "0 bacteria 298" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf_onesvm = pd.DataFrame(\n", + " predictions_onesvm.value_counts(),\n", + " columns = ['abundance']\n", + ")\n", + "clf_onesvm = clf_onesvm.reset_index(names = 'classification')\n", + "clf_onesvm\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
classificationabundance
0unknown298
\n", + "
" + ], + "text/plain": [ + " classification abundance\n", + "0 unknown 298" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf_widecnn = pd.DataFrame(\n", + " predictions_widecnn.value_counts(),\n", + " columns = ['abundance']\n", + ")\n", + "clf_widecnn = clf_widecnn.reset_index(names = 'classification')\n", + "clf_widecnn" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAts0lEQVR4nO3de3RTdb7//1coNLYlLfRCLxJKGQsHbPFSHLCjAnKTAQFhhjp45CKOuriMHUAULyMelQIeCiIjHj0cynWAcQRdXikiRURGKDBcREQsAtJaqbVpsZNCu39/+DW/iQXENJD0M8/HWlnL7P1J8t74R59rZyexWZZlCQAAwFBNAj0AAADAxUTsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoTQM9QDCoq6vTiRMn5HA4ZLPZAj0OAAC4AJZlqbKyUklJSWrS5Nznb4gdSSdOnJDT6Qz0GAAAwAfHjh1T69atz7mf2JHkcDgkff+PFRkZGeBpAADAhXC5XHI6nZ6/4+dC7Eiet64iIyOJHQAAGpmfugSFC5QBAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0QIaOwsXLlTnzp0931x8/fXX66233vLstyxL06dPV1JSksLCwtSjRw/t37/f6zncbrcmTpyo2NhYRUREaNCgQTp+/PilPhQAABCkAho7rVu31syZM7Vjxw7t2LFDN998swYPHuwJmtmzZys3N1cLFizQ9u3blZCQoD59+qiystLzHNnZ2Vq7dq1WrVqlLVu2qKqqSgMHDlRtbW2gDgsAAAQRm2VZVqCH+FfR0dF65plndNdddykpKUnZ2dl68MEHJX1/Fic+Pl6zZs3Svffeq4qKCsXFxWnZsmXKysqS9P//gvmbb76pfv36nfU13G633G635/4PPyRWUVHBb2MBANBIuFwuRUVF/eTf76C5Zqe2tlarVq3SqVOndP3116uoqEglJSXq27evZ43dblf37t21detWSVJhYaFOnz7ttSYpKUlpaWmeNWeTk5OjqKgoz83pdF68AwMAAAEV8NjZu3evmjdvLrvdrvvuu09r165Vp06dVFJSIkmKj4/3Wh8fH+/ZV1JSotDQULVs2fKca85m2rRpqqio8NyOHTvm56MCAADBommgB+jQoYN2796tb7/9Vn/72980atQoFRQUePb/+GfbLcv6yZ9y/6k1drtddru9YYMDAIBGIeCxExoaqiuuuEKS1KVLF23fvl3PPvus5zqdkpISJSYmetaXlpZ6zvYkJCSopqZG5eXlXmd3SktLlZmZeQmPInhkPLA00CMAABqBwmdGBnqESybgb2P9mGVZcrvdSklJUUJCgvLz8z37ampqVFBQ4AmZjIwMNWvWzGtNcXGx9u3b928bOwAAwFtAz+w8/PDD6t+/v5xOpyorK7Vq1Spt2rRJb7/9tmw2m7KzszVjxgylpqYqNTVVM2bMUHh4uEaMGCFJioqK0tixYzV58mTFxMQoOjpaU6ZMUXp6unr37h3IQwMAAEEioLHz1Vdf6c4771RxcbGioqLUuXNnvf322+rTp48kaerUqaqurta4ceNUXl6url27av369XI4HJ7nmDt3rpo2barhw4erurpavXr1Ul5enkJCQgJ1WAAAIIgE3ffsBMKFfk6/MeCaHQDAhTDhmp1G9z07AAAAFwOxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGgBjZ2cnBxdd911cjgcatWqlYYMGaKDBw96rRk9erRsNpvXrVu3bl5r3G63Jk6cqNjYWEVERGjQoEE6fvz4pTwUAAAQpAIaOwUFBRo/fry2bdum/Px8nTlzRn379tWpU6e81t1yyy0qLi723N58802v/dnZ2Vq7dq1WrVqlLVu2qKqqSgMHDlRtbe2lPBwAABCEmgbyxd9++22v+4sXL1arVq1UWFiom266ybPdbrcrISHhrM9RUVGhRYsWadmyZerdu7ckafny5XI6ndqwYYP69etX7zFut1tut9tz3+Vy+eNwAABAEAqqa3YqKiokSdHR0V7bN23apFatWql9+/b6/e9/r9LSUs++wsJCnT59Wn379vVsS0pKUlpamrZu3XrW18nJyVFUVJTn5nQ6L8LRAACAYBA0sWNZliZNmqQbbrhBaWlpnu39+/fXihUrtHHjRs2ZM0fbt2/XzTff7DkzU1JSotDQULVs2dLr+eLj41VSUnLW15o2bZoqKio8t2PHjl28AwMAAAEV0Lex/tWECRO0Z88ebdmyxWt7VlaW57/T0tLUpUsXJScn64033tDQoUPP+XyWZclms511n91ul91u98/gAAAgqAXFmZ2JEyfqtdde03vvvafWrVufd21iYqKSk5N16NAhSVJCQoJqampUXl7uta60tFTx8fEXbWYAANA4BDR2LMvShAkT9Morr2jjxo1KSUn5yceUlZXp2LFjSkxMlCRlZGSoWbNmys/P96wpLi7Wvn37lJmZedFmBwAAjUNA38YaP368Vq5cqVdffVUOh8NzjU1UVJTCwsJUVVWl6dOna9iwYUpMTNSRI0f08MMPKzY2Vrfddptn7dixYzV58mTFxMQoOjpaU6ZMUXp6uufTWQAA4N9XQGNn4cKFkqQePXp4bV+8eLFGjx6tkJAQ7d27V0uXLtW3336rxMRE9ezZU6tXr5bD4fCsnzt3rpo2barhw4erurpavXr1Ul5enkJCQi7l4QAAgCBksyzLCvQQgeZyuRQVFaWKigpFRkYGepwGyXhgaaBHAAA0AoXPjAz0CA12oX+/g+ICZQAAgIuF2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGC0gMZOTk6OrrvuOjkcDrVq1UpDhgzRwYMHvdZYlqXp06crKSlJYWFh6tGjh/bv3++1xu12a+LEiYqNjVVERIQGDRqk48ePX8pDAQAAQSqgsVNQUKDx48dr27Ztys/P15kzZ9S3b1+dOnXKs2b27NnKzc3VggULtH37diUkJKhPnz6qrKz0rMnOztbatWu1atUqbdmyRVVVVRo4cKBqa2sDcVgAACCI2CzLsgI9xA++/vprtWrVSgUFBbrppptkWZaSkpKUnZ2tBx98UNL3Z3Hi4+M1a9Ys3XvvvaqoqFBcXJyWLVumrKwsSdKJEyfkdDr15ptvql+/fvVex+12y+12e+67XC45nU5VVFQoMjLy0hzsRZLxwNJAjwAAaAQKnxkZ6BEazOVyKSoq6if/fgfVNTsVFRWSpOjoaElSUVGRSkpK1LdvX88au92u7t27a+vWrZKkwsJCnT592mtNUlKS0tLSPGt+LCcnR1FRUZ6b0+m8WIcEAAACLGhix7IsTZo0STfccIPS0tIkSSUlJZKk+Ph4r7Xx8fGefSUlJQoNDVXLli3PuebHpk2bpoqKCs/t2LFj/j4cAAAQJJoGeoAfTJgwQXv27NGWLVvq7bPZbF73Lcuqt+3HzrfGbrfLbrf7PiwAAGg0guLMzsSJE/Xaa6/pvffeU+vWrT3bExISJKneGZrS0lLP2Z6EhATV1NSovLz8nGsAAMC/r4DGjmVZmjBhgl555RVt3LhRKSkpXvtTUlKUkJCg/Px8z7aamhoVFBQoMzNTkpSRkaFmzZp5rSkuLta+ffs8awAAwL+vgL6NNX78eK1cuVKvvvqqHA6H5wxOVFSUwsLCZLPZlJ2drRkzZig1NVWpqamaMWOGwsPDNWLECM/asWPHavLkyYqJiVF0dLSmTJmi9PR09e7dO5CHBwAAgkBAY2fhwoWSpB49enhtX7x4sUaPHi1Jmjp1qqqrqzVu3DiVl5era9euWr9+vRwOh2f93Llz1bRpUw0fPlzV1dXq1auX8vLyFBIScqkOBQAABKmg+p6dQLnQz+k3BnzPDgDgQvA9OwAAAIYgdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYzefYOXz4sB599FH97ne/U2lpqSTp7bff1v79+/02HAAAQEP5FDsFBQVKT0/X3//+d73yyiuqqqqSJO3Zs0ePP/64XwcEAABoCJ9i56GHHtJTTz2l/Px8hYaGerb37NlTH374od+GAwAAaCifYmfv3r267bbb6m2Pi4tTWVlZg4cCAADwF59ip0WLFiouLq63fdeuXbr88ssbPBQAAIC/+BQ7I0aM0IMPPqiSkhLZbDbV1dXpgw8+0JQpUzRyZOP/FVUAAGAOn2Ln6aefVps2bXT55ZerqqpKnTp10k033aTMzEw9+uij/p4RAADAZ019eVCzZs20YsUKPfnkk9q5c6fq6up0zTXXKDU11d/zAQAANIhPsfODdu3aqV27dv6aBQAAwO98ehvrN7/5jWbOnFlv+zPPPKPf/va3DR4KAADAX3z+UsEBAwbU237LLbdo8+bNDR4KAADAX3yKnaqqKq8vE/xBs2bN5HK5GjwUAACAv/gUO2lpaVq9enW97atWrVKnTp0aPBQAAIC/+HSB8mOPPaZhw4bp8OHDuvnmmyVJ7777rv7yl7/or3/9q18HBAAAaAifYmfQoEFat26dZsyYoZdffllhYWHq3LmzNmzYoO7du/t7RgAAAJ/5/NHzAQMGnPUiZQAAgGDSoO/ZqampUWlpqerq6ry2t2nTpkFDAQAA+ItPsXPo0CHddddd2rp1q9d2y7Jks9lUW1vrl+EAAAAayqfYGT16tJo2barXX39diYmJstls/p4LAADAL3yKnd27d6uwsFD/8R//4e95AAAA/Mqn79np1KmTTp486e9ZAAAA/M6n2Jk1a5amTp2qTZs2qaysTC6Xy+sGAAAQLHx6G6t3796SpF69enlt5wJlAAAQbHyKnffee8/fcwAAAFwUPsUO35IMAAAaiwZ9qeB3332no0ePqqamxmt7586dGzQUAACAv/gUO19//bXGjBmjt95666z7uWYHAAAEC58+jZWdna3y8nJt27ZNYWFhevvtt7VkyRKlpqbqtdde8/eMAAAAPvPpzM7GjRv16quv6rrrrlOTJk2UnJysPn36KDIyUjk5OfxAKAAACBo+ndk5deqUWrVqJUmKjo7W119/LUlKT0/Xzp07/TcdAABAA/kUOx06dNDBgwclSVdffbX+53/+R19++aVeeOEFJSYm+nVAAACAhvDpbazs7GwVFxdLkh5//HH169dPK1asUGhoqPLy8vw5HwAAQIP4FDt33HGH57+vueYaHTlyRJ988onatGmj2NhYvw0HAADQUA36np0fhIeH69prr/XHUwEAAPjVBcfOpEmTLvhJc3NzfRoGAADA3y44dnbt2uV1v7CwULW1terQoYMk6dNPP1VISIgyMjL8OyEAAEADXHDs/OuPf+bm5srhcGjJkiVq2bKlJKm8vFxjxozRjTfe6P8pAQAAfOTTR8/nzJmjnJwcT+hIUsuWLfXUU09pzpw5fhsOAACgoXyKHZfLpa+++qre9tLSUlVWVjZ4KAAAAH/xKXZuu+02jRkzRi+//LKOHz+u48eP6+WXX9bYsWM1dOhQf88IAADgM59i54UXXtCAAQP0n//5n0pOTlZycrLuuOMO9e/fX88///wFP8/mzZt16623KikpSTabTevWrfPaP3r0aNlsNq9bt27dvNa43W5NnDhRsbGxioiI0KBBg3T8+HFfDgsAABjIp9gJDw/X888/r7KyMu3atUs7d+7UN998o+eff14REREX/DynTp3SVVddpQULFpxzzS233KLi4mLP7c033/Tan52drbVr12rVqlXasmWLqqqqNHDgQNXW1vpyaAAAwDAN+lLBiIgIde7c2efH9+/fX/379z/vGrvdroSEhLPuq6io0KJFi7Rs2TL17t1bkrR8+XI5nU5t2LBB/fr183k2AABgBp9i59SpU5o5c6beffddlZaWqq6uzmv/559/7pfhJGnTpk1q1aqVWrRooe7du+vpp5/2/OJ6YWGhTp8+rb59+3rWJyUlKS0tTVu3bj1n7Ljdbrndbs99l8vlt3kBAEBw8Sl27r77bhUUFOjOO+9UYmKibDabv+eS9P2Zn9/+9rdKTk5WUVGRHnvsMd18880qLCyU3W5XSUmJQkNDvT4CL0nx8fEqKSk55/Pm5OToiSeeuCgzAwCA4OJT7Lz11lt644039Ktf/crf83jJysry/HdaWpq6dOmi5ORkvfHGG+f91JdlWecNsGnTpnn9/IXL5ZLT6fTP0AAAIKj4dIFyy5YtFR0d7e9ZflJiYqKSk5N16NAhSVJCQoJqampUXl7uta60tFTx8fHnfB673a7IyEivGwAAMJNPsfPkk0/qT3/6k7777jt/z3NeZWVlOnbsmBITEyVJGRkZatasmfLz8z1riouLtW/fPmVmZl7S2QAAQHDy6W2sOXPm6PDhw4qPj1fbtm3VrFkzr/07d+68oOepqqrSZ5995rlfVFSk3bt3Kzo6WtHR0Zo+fbqGDRumxMREHTlyRA8//LBiY2N12223SZKioqI0duxYTZ48WTExMYqOjtaUKVOUnp7u+XQWAAD49+ZT7AwZMsQvL75jxw717NnTc/+H62hGjRqlhQsXau/evVq6dKm+/fZbJSYmqmfPnlq9erUcDofnMXPnzlXTpk01fPhwVVdXq1evXsrLy1NISIhfZgQAAI2bzbIsK9BDBJrL5VJUVJQqKioa/fU7GQ8sDfQIAIBGoPCZkYEeocEu9O+3T9fsAAAANBY+vY3VpEmT8360m59qAAAAwcKn2Fm7dq3X/dOnT2vXrl1asmQJX9YHAACCik+xM3jw4HrbfvOb3+jKK6/U6tWrNXbs2AYPBgAA4A9+vWana9eu2rBhgz+fEgAAoEH8FjvV1dV67rnn1Lp1a389JQAAQIP59DZWy5YtvS5QtixLlZWVCg8P1/Lly/02HAAAQEP5FDvz5s3zut+kSRPFxcWpa9eu9X6BHAAAIJB8ip1Ro0b5ew4AAICLwqfYkaTy8nItWrRIBw4ckM1mU8eOHTVmzJiA/Bo6AADAufh0gXJBQYHatm2r+fPnq7y8XN98843mz5+vlJQUFRQU+HtGAAAAn/l0Zmf8+PHKysrSwoULPT+4WVtbq3Hjxmn8+PHat2+fX4cEAADwlU9ndg4fPqzJkyd7/bJ4SEiIJk2apMOHD/ttOAAAgIbyKXauvfZaHThwoN72AwcO6Oqrr27oTAAAAH5zwW9j7dmzx/Pff/jDH3T//ffrs88+U7du3SRJ27Zt05///GfNnDnT/1MCAAD4yGZZlnUhC3/4pfOfWm6z2Rrdr567XC5FRUWpoqJCkZGRgR6nQTIeWBroEQAAjUDhMyMDPUKDXejf7ws+s1NUVOSXwQAAAC6lC46d5OTkets+/vhjHT16VDU1NZ5tNpvtrGsBAAACwaePnn/++ee67bbbtHfvXq+3tn74vazG9jYWAAAwl0+fxrr//vuVkpKir776SuHh4dq3b582b96sLl26aNOmTX4eEQAAwHc+ndn58MMPtXHjRsXFxalJkyYKCQnRDTfcoJycHP3hD3/Qrl27/D0nAACAT3w6s1NbW6vmzZtLkmJjY3XixAlJ31/Xc/DgQf9NBwAA0EA+ndlJS0vTnj171K5dO3Xt2lWzZ89WaGioXnzxRbVr187fMwIAAPjMp9h59NFHderUKUnSU089pYEDB+rGG29UTEyMVq9e7dcBAQAAGsKn2OnXr5/nv9u1a6ePP/5Y33zzjVq2bOn5RBYAAEAw8Cl2ziY6OtpfTwUAAOA3Pl2gDAAA0FgQOwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoAY2dzZs369Zbb1VSUpJsNpvWrVvntd+yLE2fPl1JSUkKCwtTjx49tH//fq81brdbEydOVGxsrCIiIjRo0CAdP378Eh4FAAAIZgGNnVOnTumqq67SggULzrp/9uzZys3N1YIFC7R9+3YlJCSoT58+qqys9KzJzs7W2rVrtWrVKm3ZskVVVVUaOHCgamtrL9VhAACAINY0kC/ev39/9e/f/6z7LMvSvHnz9Mgjj2jo0KGSpCVLlig+Pl4rV67Uvffeq4qKCi1atEjLli1T7969JUnLly+X0+nUhg0b1K9fv0t2LAAAIDgF7TU7RUVFKikpUd++fT3b7Ha7unfvrq1bt0qSCgsLdfr0aa81SUlJSktL86w5G7fbLZfL5XUDAABmCtrYKSkpkSTFx8d7bY+Pj/fsKykpUWhoqFq2bHnONWeTk5OjqKgoz83pdPp5egAAECyCNnZ+YLPZvO5bllVv24/91Jpp06apoqLCczt27JhfZgUAAMEnaGMnISFBkuqdoSktLfWc7UlISFBNTY3Ky8vPueZs7Ha7IiMjvW4AAMBMQRs7KSkpSkhIUH5+vmdbTU2NCgoKlJmZKUnKyMhQs2bNvNYUFxdr3759njUAAODfW0A/jVVVVaXPPvvMc7+oqEi7d+9WdHS02rRpo+zsbM2YMUOpqalKTU3VjBkzFB4erhEjRkiSoqKiNHbsWE2ePFkxMTGKjo7WlClTlJ6e7vl0FgAA+PcW0NjZsWOHevbs6bk/adIkSdKoUaOUl5enqVOnqrq6WuPGjVN5ebm6du2q9evXy+FweB4zd+5cNW3aVMOHD1d1dbV69eqlvLw8hYSEXPLjAQAAwcdmWZYV6CECzeVyKSoqShUVFY3++p2MB5YGegQAQCNQ+MzIQI/QYBf69ztor9kBAADwB2IHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YI6dqZPny6bzeZ1S0hI8Oy3LEvTp09XUlKSwsLC1KNHD+3fvz+AEwMAgGAT1LEjSVdeeaWKi4s9t71793r2zZ49W7m5uVqwYIG2b9+uhIQE9enTR5WVlQGcGAAABJOgj52mTZsqISHBc4uLi5P0/VmdefPm6ZFHHtHQoUOVlpamJUuW6LvvvtPKlSsDPDUAAAgWQR87hw4dUlJSklJSUnT77bfr888/lyQVFRWppKREffv29ay12+3q3r27tm7det7ndLvdcrlcXjcAAGCmoI6drl27aunSpXrnnXf00ksvqaSkRJmZmSorK1NJSYkkKT4+3usx8fHxnn3nkpOTo6ioKM/N6XRetGMAAACBFdSx079/fw0bNkzp6enq3bu33njjDUnSkiVLPGtsNpvXYyzLqrftx6ZNm6aKigrP7dixY/4fHgAABIWgjp0fi4iIUHp6ug4dOuT5VNaPz+KUlpbWO9vzY3a7XZGRkV43AABgpkYVO263WwcOHFBiYqJSUlKUkJCg/Px8z/6amhoVFBQoMzMzgFMCAIBg0jTQA5zPlClTdOutt6pNmzYqLS3VU089JZfLpVGjRslmsyk7O1szZsxQamqqUlNTNWPGDIWHh2vEiBGBHh0AAASJoI6d48eP63e/+51OnjypuLg4devWTdu2bVNycrIkaerUqaqurta4ceNUXl6url27av369XI4HAGeHAAABAubZVlWoIcINJfLpaioKFVUVDT663cyHlga6BEAAI1A4TMjAz1Cg13o3+9Gdc0OAADAz0XsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMJoxsfP8888rJSVFl112mTIyMvT+++8HeiQAABAEjIid1atXKzs7W4888oh27dqlG2+8Uf3799fRo0cDPRoAAAgwI2InNzdXY8eO1d13362OHTtq3rx5cjqdWrhwYaBHAwAAAdY00AM0VE1NjQoLC/XQQw95be/bt6+2bt161se43W653W7P/YqKCkmSy+W6eINeIrXu6kCPAABoBEz4m/fDMViWdd51jT52Tp48qdraWsXHx3ttj4+PV0lJyVkfk5OToyeeeKLedqfTeVFmBAAg2EQ9d1+gR/CbyspKRUVFnXN/o4+dH9hsNq/7lmXV2/aDadOmadKkSZ77dXV1+uabbxQTE3POxwBonFwul5xOp44dO6bIyMhAjwPAjyzLUmVlpZKSks67rtHHTmxsrEJCQuqdxSktLa13tucHdrtddrvda1uLFi0u1ogAgkBkZCSxAxjofGd0ftDoL1AODQ1VRkaG8vPzvbbn5+crMzMzQFMBAIBg0ejP7EjSpEmTdOedd6pLly66/vrr9eKLL+ro0aO67z5z3o8EAAC+MSJ2srKyVFZWpv/6r/9ScXGx0tLS9Oabbyo5OTnQowEIMLvdrscff7zeW9cA/n3YrJ/6vBYAAEAj1uiv2QEAADgfYgcAABiN2AEAAEYjdgBccj169FB2dnagx7hgbdu21bx58wI9BgAfGfFpLAD4V6NHj9a3336rdevW+eX5tm/froiICL88F4BLj9gBgHOoqalRaGio4uLiAj0KgAbgbSwAAXHmzBlNmDBBLVq0UExMjB599FHPLxcvX75cXbp0kcPhUEJCgkaMGKHS0lKvx+/fv18DBgxQZGSkHA6HbrzxRh0+fFjTp0/XkiVL9Oqrr8pms8lms2nTpk2SpC+//FJZWVlq2bKlYmJiNHjwYB05csTznKNHj9aQIUOUk5OjpKQktW/fXlL9t7Fyc3OVnp6uiIgIOZ1OjRs3TlVVVRf13wuA74gdAAGxZMkSNW3aVH//+981f/58zZ07V//7v/8r6fszKk8++aT+8Y9/aN26dSoqKtLo0aM9j/3yyy9100036bLLLtPGjRtVWFiou+66S2fOnNGUKVM0fPhw3XLLLSouLlZxcbEyMzP13XffqWfPnmrevLk2b96sLVu2qHnz5rrllltUU1Pjee53331XBw4cUH5+vl5//fWzzt6kSRPNnz9f+/bt05IlS7Rx40ZNnTr1ov57AWgACwAuse7du1sdO3a06urqPNsefPBBq2PHjmdd/9FHH1mSrMrKSsuyLGvatGlWSkqKVVNTc9b1o0aNsgYPHuy1bdGiRVaHDh28XtPtdlthYWHWO++843lcfHy85Xa7vR6bnJxszZ0795zHs2bNGismJuac+wEEFmd2AAREt27dZLPZPPevv/56HTp0SLW1tdq1a5cGDx6s5ORkORwO9ejRQ5J09OhRSdLu3bt14403qlmzZhf8eoWFhfrss8/kcDjUvHlzNW/eXNHR0frnP/+pw4cPe9alp6crNDT0vM/13nvvqU+fPrr88svlcDg0cuRIlZWV6dSpUz/jXwDApcIFygCCyj//+U/17dtXffv21fLlyxUXF6ejR4+qX79+nrebwsLCfvbz1tXVKSMjQytWrKi3718vQP6pT1198cUX+vWvf6377rtPTz75pKKjo7VlyxaNHTtWp0+f/tlzAbj4iB0AAbFt27Z691NTU/XJJ5/o5MmTmjlzppxOpyRpx44dXms7d+6sJUuW6PTp02c9uxMaGqra2lqvbddee61Wr16tVq1aKTIy0ue5d+zYoTNnzmjOnDlq0uT7k+Nr1qzx+fkAXHy8jQUgII4dO6ZJkybp4MGD+stf/qLnnntO999/v9q0aaPQ0FA999xz+vzzz/Xaa6/pySef9HrshAkT5HK5dPvtt2vHjh06dOiQli1bpoMHD0r6/tNTe/bs0cGDB3Xy5EmdPn1ad9xxh2JjYzV48GC9//77KioqUkFBge6//34dP378guf+xS9+oTNnznjmW7ZsmV544QW//tsA8C9iB0BAjBw5UtXV1frlL3+p8ePHa+LEibrnnnsUFxenvLw8/fWvf1WnTp00c+ZM/fd//7fXY2NiYrRx40ZVVVWpe/fuysjI0EsvveQ5y/P73/9eHTp0UJcuXRQXF6cPPvhA4eHh2rx5s9q0aaOhQ4eqY8eOuuuuu1RdXf2zzvRcffXVys3N1axZs5SWlqYVK1YoJyfHr/82APzLZln/74stAAAADMSZHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AfnXkyBHZbDbt3r37or9WXl6eWrRo4bXtxRdflNPpVJMmTTRv3jxNnz5dV1999UWfpW3btpo3b95Ffx0APx/foAzAr44cOaKUlBTt2rXrokdGdXW1Kisr1apVK0mSy+VSbGyscnNzNWzYMEVFRamurk5ut1sxMTF+ec28vDxlZ2fr22+/9dr+9ddfKyIiQuHh4X55HQD+w6+eA2i0wsLCFBYW5rl/9OhRnT59WgMGDFBiYqJne/PmzS/6LHFxcRf9NQD4hrexAPikrq5Os2bN0hVXXCG73a42bdro6aefrreutrZWY8eOVUpKisLCwtShQwc9++yzXms2bdqkX/7yl4qIiFCLFi30q1/9Sl988YUk6R//+Id69uwph8OhyMhIZWRkaMeOHZK838bKy8tTenq6JKldu3ay2Ww6cuTIWd/G+r//+z9deeWVstvtSkxM1IQJEzz7cnNzlZ6eroiICDmdTo0bN05VVVWeOceMGaOKigrZbDbZbDZNnz5dUv23sY4eParBgwerefPmioyM1PDhw/XVV1959v8w17Jly9S2bVtFRUXp9ttvV2Vl5c//nwHgvIgdAD6ZNm2aZs2apccee0wff/yxVq5cqfj4+Hrr6urq1Lp1a61Zs0Yff/yx/vSnP+nhhx/WmjVrJElnzpzRkCFD1L17d+3Zs0cffvih7rnnHtlsNknSHXfcodatW2v79u0qLCzUQw895Pl183+VlZWlDRs2SJI++ugjFRcXy+l01lu3cOFCjR8/Xvfcc4/27t2r1157TVdccYVnf5MmTTR//nzt27dPS5Ys0caNGzV16lRJUmZmpubNm6fIyEgVFxeruLhYU6ZMqfcalmVpyJAh+uabb1RQUKD8/HwdPnxYWVlZXusOHz6sdevW6fXXX9frr7+ugoICzZw580L/FwC4UBYA/Ewul8uy2+3WSy+9VG9fUVGRJcnatWvXOR8/btw4a9iwYZZlWVZZWZklydq0adNZ1zocDisvL++s+xYvXmxFRUV57u/atcuSZBUVFXm2Pf7449ZVV13luZ+UlGQ98sgj5z64H1mzZo0VExNzztf8QXJysjV37lzLsixr/fr1VkhIiHX06FHP/v3791uSrI8++sgzV3h4uOVyuTxrHnjgAatr164XPBuAC8OZHQA/24EDB+R2u9WrV68LWv/CCy+oS5cuiouLU/PmzfXSSy/p6NGjkqTo6GiNHj1a/fr106233qpnn31WxcXFnsdOmjRJd999t3r37q2ZM2fq8OHDPs9dWlqqEydOnHfu9957T3369NHll18uh8OhkSNHqqysTKdOnbrg1zlw4ICcTqfXmaVOnTqpRYsWOnDggGdb27Zt5XA4PPcTExNVWlr6M48KwE8hdgD8bP96UfBPWbNmjf74xz/qrrvu0vr167V7926NGTNGNTU1njWLFy/Whx9+qMzMTK1evVrt27fXtm3bJH1/bcv+/fs1YMAAbdy4UZ06ddLatWsvytxffPGFfv3rXystLU1/+9vfVFhYqD//+c+SpNOnT1/w61iW5Xkb7nzbf/x2nM1mU11d3QW/DoALQ+wA+NlSU1MVFhamd9999yfXvv/++8rMzNS4ceN0zTXX6Iorrjjr2ZlrrrlG06ZN09atW5WWlqaVK1d69rVv315//OMftX79eg0dOlSLFy/2aW6Hw6G2bduec+4dO3bozJkzmjNnjrp166b27dvrxIkTXmtCQ0NVW1t73tfp1KmTjh49qmPHjnm2ffzxx6qoqFDHjh19mh2A74gdAD/bZZddpgcffFBTp07V0qVLdfjwYW3btk2LFi2qt/aKK67Qjh079M477+jTTz/VY489pu3bt3v2FxUVadq0afrwww/1xRdfaP369fr000/VsWNHVVdXa8KECdq0aZO++OILffDBB9q+fXuDgmH69OmaM2eO5s+fr0OHDmnnzp167rnnJEm/+MUvdObMGT333HP6/PPPtWzZMr3wwgtej2/btq2qqqr07rvv6uTJk/ruu+/qvUbv3r3VuXNn3XHHHdq5c6c++ugjjRw5Ut27d1eXLl18nh2Ab4gdAD557LHHNHnyZP3pT39Sx44dlZWVddbrTe677z4NHTpUWVlZ6tq1q8rKyjRu3DjP/vDwcH3yyScaNmyY2rdvr3vuuUcTJkzQvffeq5CQEJWVlWnkyJFq3769hg8frv79++uJJ57wee5Ro0Zp3rx5ev7553XllVdq4MCBOnTokCTp6quvVm5urmbNmqW0tDStWLFCOTk5Xo/PzMzUfffdp6ysLMXFxWn27Nn1XsNms2ndunVq2bKlbrrpJvXu3Vvt2rXT6tWrfZ4bgO/4BmUAAGA0zuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAw2v8H2Wb2AyOSoesAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(\n", + " y = 'abundance',\n", + " x = 'classification',\n", + " data = clf_onesvm,\n", + " estimator = sum,\n", + " ci = None,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAtJklEQVR4nO3dfVjU9Z7/8deIMgcQRhHkJhFxxTYDs7DVqMQb1DyapZ6jlZs32VaXN+dw1CzqWHQqUVtvKsu2thW1XHUrrau7I2pQHnRT1PW2MsPUFaIUZ8BYUPj+/uhyfmdEzYbRGT49H9c11+V8v5+ZeX/rD57Xd74zY7MsyxIAAIChmvl7AAAAgMuJ2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0Zr7e4BAUF9fr2PHjik8PFw2m83f4wAAgEtgWZYqKysVHx+vZs0ufP6G2JF07NgxJSQk+HsMAADghSNHjqhdu3YX3E/sSAoPD5f003+siIgIP08DAAAuhcvlUkJCgvvv+IUQO5L7rauIiAhiBwCAJubnLkHhAmUAAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNL/GzuLFi9W1a1f3NxffdNNN+uijj9z7LctSTk6O4uPjFRISot69e2vv3r0ez1FTU6MpU6YoKipKYWFhGjp0qI4ePXqlDwUAAAQov8ZOu3btNHv2bG3btk3btm1T3759dccdd7iDZu7cuZo/f74WLVqkrVu3KjY2Vv3791dlZaX7ObKysrRmzRqtXLlSmzZtUlVVlYYMGaK6ujp/HRYAAAggNsuyLH8P8fciIyP13HPP6b777lN8fLyysrL0yCOPSPrpLE5MTIzmzJmjBx98UE6nU9HR0Vq+fLlGjRol6f//gvmHH36ogQMHnvc1ampqVFNT475/9ofEnE4nv40FAEAT4XK55HA4fvbvd8Bcs1NXV6eVK1fq1KlTuummm1RSUqKysjINGDDAvcZutysjI0NFRUWSpOLiYp0+fdpjTXx8vFJSUtxrzic3N1cOh8N9S0hIuHwHBgAA/MrvsbN79261bNlSdrtdDz30kNasWaMuXbqorKxMkhQTE+OxPiYmxr2vrKxMwcHBat269QXXnE92dracTqf7duTIER8fFQAACBTN/T3A1VdfrZ07d+rkyZN6++23NXbsWBUWFrr3n/uz7ZZl/exPuf/cGrvdLrvd3rjBAQBAk+D32AkODlanTp0kSd27d9fWrVv1/PPPu6/TKSsrU1xcnHt9eXm5+2xPbGysamtrVVFR4XF2p7y8XOnp6VfwKAJH2sPL/D0CAKAJKH5ujL9HuGL8/jbWuSzLUk1NjZKSkhQbG6v8/Hz3vtraWhUWFrpDJi0tTS1atPBYU1paqj179vxqYwcAAHjy65mdxx57TIMGDVJCQoIqKyu1cuVKFRQU6OOPP5bNZlNWVpZmzZql5ORkJScna9asWQoNDdU999wjSXI4HJowYYKmTZumNm3aKDIyUtOnT1dqaqoyMzP9eWgAACBA+DV2vvvuO917770qLS2Vw+FQ165d9fHHH6t///6SpBkzZqi6uloTJ05URUWFevTooXXr1ik8PNz9HAsWLFDz5s01cuRIVVdXq1+/fsrLy1NQUJC/DgsAAASQgPueHX+41M/pNwVcswMAuBQmXLPT5L5nBwAA4HIgdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABjNr7GTm5urG2+8UeHh4Wrbtq3uvPNOffnllx5rxo0bJ5vN5nHr2bOnx5qamhpNmTJFUVFRCgsL09ChQ3X06NEreSgAACBA+TV2CgsLNWnSJG3ZskX5+fk6c+aMBgwYoFOnTnmsu+2221RaWuq+ffjhhx77s7KytGbNGq1cuVKbNm1SVVWVhgwZorq6uit5OAAAIAA19+eLf/zxxx73lyxZorZt26q4uFi9evVyb7fb7YqNjT3vczidTr3++utavny5MjMzJUlvvPGGEhIStH79eg0cOLDBY2pqalRTU+O+73K5fHE4AAAgAAXUNTtOp1OSFBkZ6bG9oKBAbdu2VefOnfUv//IvKi8vd+8rLi7W6dOnNWDAAPe2+Ph4paSkqKio6Lyvk5ubK4fD4b4lJCRchqMBAACBIGBix7IsTZ06VbfccotSUlLc2wcNGqQ333xTGzdu1Lx587R161b17dvXfWamrKxMwcHBat26tcfzxcTEqKys7LyvlZ2dLafT6b4dOXLk8h0YAADwK7++jfX3Jk+erF27dmnTpk0e20eNGuX+d0pKirp3767ExER98MEHGj58+AWfz7Is2Wy28+6z2+2y2+2+GRwAAAS0gDizM2XKFL333nv65JNP1K5du4uujYuLU2Jiog4cOCBJio2NVW1trSoqKjzWlZeXKyYm5rLNDAAAmga/xo5lWZo8ebLeeecdbdy4UUlJST/7mOPHj+vIkSOKi4uTJKWlpalFixbKz893ryktLdWePXuUnp5+2WYHAABNg1/fxpo0aZJWrFihd999V+Hh4e5rbBwOh0JCQlRVVaWcnByNGDFCcXFxOnTokB577DFFRUVp2LBh7rUTJkzQtGnT1KZNG0VGRmr69OlKTU11fzoLAAD8evk1dhYvXixJ6t27t8f2JUuWaNy4cQoKCtLu3bu1bNkynTx5UnFxcerTp49WrVql8PBw9/oFCxaoefPmGjlypKqrq9WvXz/l5eUpKCjoSh4OAAAIQDbLsix/D+FvLpdLDodDTqdTERER/h6nUdIeXubvEQAATUDxc2P8PUKjXerf74C4QBkAAOByIXYAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYza+xk5ubqxtvvFHh4eFq27at7rzzTn355ZceayzLUk5OjuLj4xUSEqLevXtr7969Hmtqamo0ZcoURUVFKSwsTEOHDtXRo0ev5KEAAIAA5dfYKSws1KRJk7Rlyxbl5+frzJkzGjBggE6dOuVeM3fuXM2fP1+LFi3S1q1bFRsbq/79+6uystK9JisrS2vWrNHKlSu1adMmVVVVaciQIaqrq/PHYQEAgABisyzL8vcQZ33//fdq27atCgsL1atXL1mWpfj4eGVlZemRRx6R9NNZnJiYGM2ZM0cPPvignE6noqOjtXz5co0aNUqSdOzYMSUkJOjDDz/UwIEDG7xOTU2Nampq3PddLpcSEhLkdDoVERFxZQ72Mkl7eJm/RwAANAHFz43x9wiN5nK55HA4fvbvd0Bds+N0OiVJkZGRkqSSkhKVlZVpwIAB7jV2u10ZGRkqKiqSJBUXF+v06dMea+Lj45WSkuJec67c3Fw5HA73LSEh4XIdEgAA8LOAiR3LsjR16lTdcsstSklJkSSVlZVJkmJiYjzWxsTEuPeVlZUpODhYrVu3vuCac2VnZ8vpdLpvR44c8fXhAACAANHc3wOcNXnyZO3atUubNm1qsM9ms3nctyyrwbZzXWyN3W6X3W73flgAANBkBMSZnSlTpui9997TJ598onbt2rm3x8bGSlKDMzTl5eXusz2xsbGqra1VRUXFBdcAAIBfL7/GjmVZmjx5st555x1t3LhRSUlJHvuTkpIUGxur/Px897ba2loVFhYqPT1dkpSWlqYWLVp4rCktLdWePXvcawAAwK+XX9/GmjRpklasWKF3331X4eHh7jM4DodDISEhstlsysrK0qxZs5ScnKzk5GTNmjVLoaGhuueee9xrJ0yYoGnTpqlNmzaKjIzU9OnTlZqaqszMTH8eHgAACAB+jZ3FixdLknr37u2xfcmSJRo3bpwkacaMGaqurtbEiRNVUVGhHj16aN26dQoPD3evX7BggZo3b66RI0equrpa/fr1U15enoKCgq7UoQAAgAAVUN+z4y+X+jn9poDv2QEAXAq+ZwcAAMAQxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAo3kdOwcPHtSf//xn3X333SovL5ckffzxx9q7d6/PhgMAAGgsr2KnsLBQqamp+u///m+98847qqqqkiTt2rVLTz75pE8HBAAAaAyvYufRRx/VM888o/z8fAUHB7u39+nTR5s3b/bZcAAAAI3lVezs3r1bw4YNa7A9Ojpax48fb/RQAAAAvuJV7LRq1UqlpaUNtu/YsUNXXXVVo4cCAADwFa9i55577tEjjzyisrIy2Ww21dfX629/+5umT5+uMWOa/q+oAgAAc3gVO88++6zat2+vq666SlVVVerSpYt69eql9PR0/fnPf/b1jAAAAF5r7s2DWrRooTfffFNPP/20tm/frvr6el1//fVKTk729XwAAACN4lXsnNWxY0d17NjRV7MAAAD4nFdvY/3ud7/T7NmzG2x/7rnn9Pvf/77RQwEAAPiK118qOHjw4Abbb7vtNn366aeNHgoAAMBXvIqdqqoqjy8TPKtFixZyuVyNHgoAAMBXvIqdlJQUrVq1qsH2lStXqkuXLo0eCgAAwFe8ukB55syZGjFihA4ePKi+fftKkjZs2KD//M//1H/913/5dEAAAIDG8Cp2hg4dqrVr12rWrFl66623FBISoq5du2r9+vXKyMjw9YwAAABe8/qj54MHDz7vRcoAAACBpFHfs1NbW6vy8nLV19d7bG/fvn2jhgIAAPAVr2LnwIEDuu+++1RUVOSx3bIs2Ww21dXV+WQ4AACAxvIqdsaNG6fmzZvr/fffV1xcnGw2m6/nAgAA8AmvYmfnzp0qLi7WP/7jP/p6HgAAAJ/y6nt2unTpoh9++MHXswAAAPicV7EzZ84czZgxQwUFBTp+/LhcLpfHDQAAIFB49TZWZmamJKlfv34e27lAGQAABBqvYueTTz7x9RwAAACXhVexw7ckAwCApqJRXyr4448/6vDhw6qtrfXY3rVr10YNBQAA4Ctexc7333+v8ePH66OPPjrvfq7ZAQAAgcKrT2NlZWWpoqJCW7ZsUUhIiD7++GMtXbpUycnJeu+993w9IwAAgNe8OrOzceNGvfvuu7rxxhvVrFkzJSYmqn///oqIiFBubi4/EAoAAAKGV2d2Tp06pbZt20qSIiMj9f3330uSUlNTtX37dt9NBwAA0Ehexc7VV1+tL7/8UpLUrVs3/du//Zv+93//V6+88ori4uJ8OiAAAEBjePU2VlZWlkpLSyVJTz75pAYOHKg333xTwcHBysvL8+V8AAAAjeJV7IwePdr97+uvv16HDh3SF198ofbt2ysqKspnwwEAADRWo75n56zQ0FDdcMMNvngqAAAAn7rk2Jk6deolP+n8+fO9GgYAAMDXLjl2duzY4XG/uLhYdXV1uvrqqyVJX331lYKCgpSWlubbCQEAABrhkmPn73/8c/78+QoPD9fSpUvVunVrSVJFRYXGjx+vW2+91fdTAgAAeMmrj57PmzdPubm57tCRpNatW+uZZ57RvHnzfDYcAABAY3kVOy6XS999912D7eXl5aqsrGz0UAAAAL7iVewMGzZM48eP11tvvaWjR4/q6NGjeuuttzRhwgQNHz7c1zMCAAB4zavYeeWVVzR48GD98z//sxITE5WYmKjRo0dr0KBBevnlly/5eT799FPdfvvtio+Pl81m09q1az32jxs3TjabzePWs2dPjzU1NTWaMmWKoqKiFBYWpqFDh+ro0aPeHBYAADCQV7ETGhqql19+WcePH9eOHTu0fft2nThxQi+//LLCwsIu+XlOnTql6667TosWLbrgmttuu02lpaXu24cffuixPysrS2vWrNHKlSu1adMmVVVVaciQIaqrq/Pm0AAAgGEa9aWCYWFh6tq1q9ePHzRokAYNGnTRNXa7XbGxsefd53Q69frrr2v58uXKzMyUJL3xxhtKSEjQ+vXrNXDgQK9nAwAAZvAqdk6dOqXZs2drw4YNKi8vV319vcf+b775xifDSVJBQYHatm2rVq1aKSMjQ88++6z7F9eLi4t1+vRpDRgwwL0+Pj5eKSkpKioqumDs1NTUqKamxn3f5XL5bF4AABBYvIqd+++/X4WFhbr33nsVFxcnm83m67kk/XTm5/e//70SExNVUlKimTNnqm/fviouLpbdbldZWZmCg4M9PgIvSTExMSorK7vg8+bm5uqpp566LDMDAIDA4lXsfPTRR/rggw908803+3oeD6NGjXL/OyUlRd27d1diYqI++OCDi37qy7KsiwZYdna2x89fuFwuJSQk+GZoAAAQULy6QLl169aKjIz09Sw/Ky4uTomJiTpw4IAkKTY2VrW1taqoqPBYV15erpiYmAs+j91uV0REhMcNAACYyavYefrpp/XEE0/oxx9/9PU8F3X8+HEdOXJEcXFxkqS0tDS1aNFC+fn57jWlpaXas2eP0tPTr+hsAAAgMHn1Nta8efN08OBBxcTEqEOHDmrRooXH/u3bt1/S81RVVenrr7923y8pKdHOnTsVGRmpyMhI5eTkaMSIEYqLi9OhQ4f02GOPKSoqSsOGDZMkORwOTZgwQdOmTVObNm0UGRmp6dOnKzU11f3pLAAA8OvmVezceeedPnnxbdu2qU+fPu77Z6+jGTt2rBYvXqzdu3dr2bJlOnnypOLi4tSnTx+tWrVK4eHh7scsWLBAzZs318iRI1VdXa1+/fopLy9PQUFBPpkRAAA0bTbLsix/D+FvLpdLDodDTqezyV+/k/bwMn+PAABoAoqfG+PvERrtUv9+e3XNDgAAQFPh1dtYzZo1u+hHu/mpBgAAECi8ip01a9Z43D99+rR27NihpUuX8mV9AAAgoHgVO3fccUeDbb/73e907bXXatWqVZowYUKjBwMAAPAFn16z06NHD61fv96XTwkAANAoPoud6upqvfjii2rXrp2vnhIAAKDRvHobq3Xr1h4XKFuWpcrKSoWGhuqNN97w2XAAAACN5VXsLFy40ON+s2bNFB0drR49ejT4BXIAAAB/8ip2xo4d6+s5AAAALguvYkeSKioq9Prrr2v//v2y2Wy65pprNH78eL/8GjoAAMCFeHWBcmFhoTp06KAXXnhBFRUVOnHihF544QUlJSWpsLDQ1zMCAAB4zaszO5MmTdKoUaO0ePFi9w9u1tXVaeLEiZo0aZL27Nnj0yEBAAC85dWZnYMHD2ratGkevyweFBSkqVOn6uDBgz4bDgAAoLG8ip0bbrhB+/fvb7B9//796tatW2NnAgAA8JlLfhtr165d7n//4Q9/0B//+Ed9/fXX6tmzpyRpy5YteumllzR79mzfTwkAAOAlm2VZ1qUsPPtL5z+33GazNblfPXe5XHI4HHI6nYqIiPD3OI2S9vAyf48AAGgCip8b4+8RGu1S/35f8pmdkpISnwwGAABwJV1y7CQmJjbYtm/fPh0+fFi1tbXubTab7bxrAQAA/MGrj55/8803GjZsmHbv3u3x1tbZ38tqam9jAQAAc3n1aaw//vGPSkpK0nfffafQ0FDt2bNHn376qbp3766CggIfjwgAAOA9r87sbN68WRs3blR0dLSaNWumoKAg3XLLLcrNzdUf/vAH7dixw9dzAgAAeMWrMzt1dXVq2bKlJCkqKkrHjh2T9NN1PV9++aXvpgMAAGgkr87spKSkaNeuXerYsaN69OihuXPnKjg4WK+++qo6duzo6xkBAAC85lXs/PnPf9apU6ckSc8884yGDBmiW2+9VW3atNGqVat8OiAAAEBjeBU7AwcOdP+7Y8eO2rdvn06cOKHWrVu7P5EFAAAQCLyKnfOJjIz01VMBAAD4jFcXKAMAADQVxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwml9j59NPP9Xtt9+u+Ph42Ww2rV271mO/ZVnKyclRfHy8QkJC1Lt3b+3du9djTU1NjaZMmaKoqCiFhYVp6NChOnr06BU8CgAAEMj8GjunTp3Sddddp0WLFp13/9y5czV//nwtWrRIW7duVWxsrPr376/Kykr3mqysLK1Zs0YrV67Upk2bVFVVpSFDhqiuru5KHQYAAAhgzf354oMGDdKgQYPOu8+yLC1cuFCPP/64hg8fLklaunSpYmJitGLFCj344INyOp16/fXXtXz5cmVmZkqS3njjDSUkJGj9+vUaOHDgFTsWAAAQmAL2mp2SkhKVlZVpwIAB7m12u10ZGRkqKiqSJBUXF+v06dMea+Lj45WSkuJecz41NTVyuVweNwAAYKaAjZ2ysjJJUkxMjMf2mJgY976ysjIFBwerdevWF1xzPrm5uXI4HO5bQkKCj6cHAACBImBj5yybzeZx37KsBtvO9XNrsrOz5XQ63bcjR474ZFYAABB4AjZ2YmNjJanBGZry8nL32Z7Y2FjV1taqoqLigmvOx263KyIiwuMGAADMFLCxk5SUpNjYWOXn57u31dbWqrCwUOnp6ZKktLQ0tWjRwmNNaWmp9uzZ414DAAB+3fz6aayqqip9/fXX7vslJSXauXOnIiMj1b59e2VlZWnWrFlKTk5WcnKyZs2apdDQUN1zzz2SJIfDoQkTJmjatGlq06aNIiMjNX36dKWmpro/nQUAAH7d/Bo727ZtU58+fdz3p06dKkkaO3as8vLyNGPGDFVXV2vixImqqKhQjx49tG7dOoWHh7sfs2DBAjVv3lwjR45UdXW1+vXrp7y8PAUFBV3x4wEAAIHHZlmW5e8h/M3lcsnhcMjpdDb563fSHl7m7xEAAE1A8XNj/D1Co13q3++AvWYHAADAF4gdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgBAABGI3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARgvo2MnJyZHNZvO4xcbGuvdblqWcnBzFx8crJCREvXv31t69e/04MQAACDQBHTuSdO2116q0tNR92717t3vf3LlzNX/+fC1atEhbt25VbGys+vfvr8rKSj9ODAAAAknAx07z5s0VGxvrvkVHR0v66azOwoUL9fjjj2v48OFKSUnR0qVL9eOPP2rFihV+nhoAAASKgI+dAwcOKD4+XklJSbrrrrv0zTffSJJKSkpUVlamAQMGuNfa7XZlZGSoqKjoos9ZU1Mjl8vlcQMAAGYK6Njp0aOHli1bpr/+9a967bXXVFZWpvT0dB0/flxlZWWSpJiYGI/HxMTEuPddSG5urhwOh/uWkJBw2Y4BAAD4V0DHzqBBgzRixAilpqYqMzNTH3zwgSRp6dKl7jU2m83jMZZlNdh2ruzsbDmdTvftyJEjvh8eAAAEhICOnXOFhYUpNTVVBw4ccH8q69yzOOXl5Q3O9pzLbrcrIiLC4wYAAMzUpGKnpqZG+/fvV1xcnJKSkhQbG6v8/Hz3/traWhUWFio9Pd2PUwIAgEDS3N8DXMz06dN1++23q3379iovL9czzzwjl8ulsWPHymazKSsrS7NmzVJycrKSk5M1a9YshYaG6p577vH36AAAIEAEdOwcPXpUd999t3744QdFR0erZ8+e2rJlixITEyVJM2bMUHV1tSZOnKiKigr16NFD69atU3h4uJ8nBwAAgcJmWZbl7yH8zeVyyeFwyOl0Nvnrd9IeXubvEQAATUDxc2P8PUKjXerf7yZ1zQ4AAMAvRewAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwGrEDAACMRuwAAACjETsAAMBoxA4AADAasQMAAIxG7AAAAKMROwAAwGjEDgAAMBqxAwAAjEbsAAAAoxE7AADAaMQOAAAwmjGx8/LLLyspKUm/+c1vlJaWps8++8zfIwEAgABgROysWrVKWVlZevzxx7Vjxw7deuutGjRokA4fPuzv0QAAgJ8ZETvz58/XhAkTdP/99+uaa67RwoULlZCQoMWLF/t7NAAA4GfN/T1AY9XW1qq4uFiPPvqox/YBAwaoqKjovI+pqalRTU2N+77T6ZQkuVyuyzfoFVJXU+3vEQAATYAJf/POHoNlWRdd1+Rj54cfflBdXZ1iYmI8tsfExKisrOy8j8nNzdVTTz3VYHtCQsJlmREAgEDjePEhf4/gM5WVlXI4HBfc3+Rj5yybzeZx37KsBtvOys7O1tSpU9336+vrdeLECbVp0+aCjwHQNLlcLiUkJOjIkSOKiIjw9zgAfMiyLFVWVio+Pv6i65p87ERFRSkoKKjBWZzy8vIGZ3vOstvtstvtHttatWp1uUYEEAAiIiKIHcBAFzujc1aTv0A5ODhYaWlpys/P99ien5+v9PR0P00FAAACRZM/syNJU6dO1b333qvu3bvrpptu0quvvqrDhw/roYfMeT8SAAB4x4jYGTVqlI4fP66//OUvKi0tVUpKij788EMlJib6ezQAfma32/Xkk082eOsawK+Hzfq5z2sBAAA0YU3+mh0AAICLIXYAAIDRiB0AAGA0YgdAk1FQUCCbzaaTJ0/6exQATQixAwAAjEbsAAAAoxE7AK6YDh06aOHChR7bunXrppycHEk//cbdv//7v2vYsGEKDQ1VcnKy3nvvvQs+X3V1tQYPHqyePXvqxIkTOnTokGw2m9555x316dNHoaGhuu6667R582aPx7399tu69tprZbfb1aFDB82bN8+978UXX1Rqaqr7/tq1a2Wz2fTSSy+5tw0cOFDZ2dmSpJycHHXr1k3Lly9Xhw4d5HA4dNddd6mystLb/0wAfIzYARBQnnrqKY0cOVK7du3Sb3/7W40ePVonTpxosM7pdGrAgAGqra3Vhg0bFBkZ6d73+OOPa/r06dq5c6c6d+6su+++W2fOnJEkFRcXa+TIkbrrrru0e/du5eTkaObMmcrLy5Mk9e7dW3v37tUPP/wgSSosLFRUVJQKCwslSWfOnFFRUZEyMjLcr3fw4EGtXbtW77//vt5//30VFhZq9uzZl+s/EYBfiNgBEFDGjRunu+++W506ddKsWbN06tQpff755x5rvvvuO2VkZKht27b64IMPFBYW5rF/+vTpGjx4sDp37qynnnpK3377rb7++mtJ0vz589WvXz/NnDlTnTt31rhx4zR58mQ999xzkqSUlBS1adPGHTcFBQWaNm2a+/7WrVv1f//3f7rlllvcr1dfX6+8vDylpKTo1ltv1b333qsNGzZctv9GAH4ZYgdAQOnatav732FhYQoPD1d5ebnHmszMTHXs2FGrV69WcHDwRZ8jLi5OktzPsX//ft18880e62+++WYdOHBAdXV1stls6tWrlwoKCnTy5Ent3btXDz30kOrq6rR//34VFBTohhtuUMuWLd2P79Chg8LDwz1e89yZAfgPsQPgimnWrJnO/YWa06dPe9xv0aKFx32bzab6+nqPbYMHD9Znn32mffv2nfd1/v45bDabJLmfw7Is97azzp2pd+/eKigo0GeffabrrrtOrVq1Uq9evVRYWKiCggL17t37F88MwH+IHQBXTHR0tEpLS933XS6XSkpKfvHzzJ49W2PHjlW/fv0uGDwX0qVLF23atMljW1FRkTp37qygoCBJ//+6nbfeessdNhkZGVq/fn2D63UABD5iB8AV07dvXy1fvlyfffaZ9uzZo7Fjx7oD45f613/9V40ePVp9+/bVF198ccmPmzZtmjZs2KCnn35aX331lZYuXapFixZp+vTp7jVnr9t588033bHTu3dvrV27VtXV1R7X6wAIfM39PQCAX4/s7Gx98803GjJkiBwOh55++mmvzuyctWDBAtXV1alv374qKCg47/U757rhhhu0evVqPfHEE3r66acVFxenv/zlLxo3bpx7jc1mU0ZGhtauXatbb71V0k/XATkcDnXs2FERERFezwzgyrNZ575ZDQAAYBDexgIAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAYjdgB4FOHDh2SzWbTzp07L/tr5eXlqVWrVh7bXn31VSUkJKhZs2ZauHChcnJy1K1bt8s+S4cOHbRw4cLL/joAfjm+QRmATx06dEhJSUnasWPHZY+M6upqVVZWqm3btpJ++mHRqKgozZ8/XyNGjJDD4VB9fb1qamrUpk0bn7xmXl6esrKydPLkSY/t33//vcLCwhQaGuqT1wHgO/w2FoAmKyQkRCEhIe77hw8f1unTpzV48GDFxcW5t7ds2fKyzxIdHX3ZXwOAd3gbC4BX6uvrNWfOHHXq1El2u13t27fXs88+22BdXV2dJkyYoKSkJIWEhOjqq6/W888/77GmoKBA//RP/6SwsDC1atVKN998s7799ltJ0v/8z/+oT58+Cg8PV0REhNLS0rRt2zZJnm9j5eXlKTU1VZLUsWNH2Ww2HTp06LxvY/3Hf/yHrr32WtntdsXFxWny5MnuffPnz1dqaqrCwsKUkJCgiRMnqqqqyj3n+PHj5XQ6ZbPZZLPZlJOTI6nh21iHDx/WHXfcoZYtWyoiIkIjR47Ud999595/dq7ly5erQ4cOcjgcuuuuu1RZWfnL/2cAuChiB4BXsrOzNWfOHM2cOVP79u3TihUrFBMT02BdfX292rVrp9WrV2vfvn164okn9Nhjj2n16tWSpDNnzujOO+9URkaGdu3apc2bN+uBBx6QzWaTJI0ePVrt2rXT1q1bVVxcrEcffVQtWrRo8DqjRo3S+vXrJUmff/65SktLlZCQ0GDd4sWLNWnSJD3wwAPavXu33nvvPXXq1Mm9v1mzZnrhhRe0Z88eLV26VBs3btSMGTMkSenp6Vq4cKEiIiJUWlqq0tJSTZ8+vcFrWJalO++8UydOnFBhYaHy8/N18OBBjRo1ymPdwYMHtXbtWr3//vt6//33VVhYqNmzZ1/q/wIAl8oCgF/I5XJZdrvdeu211xrsKykpsSRZO3bsuODjJ06caI0YMcKyLMs6fvy4JckqKCg479rw8HArLy/vvPuWLFliORwO9/0dO3ZYkqySkhL3tieffNK67rrr3Pfj4+Otxx9//MIHd47Vq1dbbdq0ueBrnpWYmGgtWLDAsizLWrdunRUUFGQdPnzYvX/v3r2WJOvzzz93zxUaGmq5XC73mocfftjq0aPHJc8G4NJwZgfAL7Z//37V1NSoX79+l7T+lVdeUffu3RUdHa2WLVvqtdde0+HDhyVJkZGRGjdunAYOHKjbb79dzz//vEpLS92PnTp1qu6//35lZmZq9uzZOnjwoNdzl5eX69ixYxed+5NPPlH//v111VVXKTw8XGPGjNHx48d16tSpS36d/fv3KyEhwePMUpcuXdSqVSvt37/fva1Dhw4KDw9334+Li1N5efkvPCoAP4fYAfCL/f1FwT9n9erV+tOf/qT77rtP69at086dOzV+/HjV1ta61yxZskSbN29Wenq6Vq1apc6dO2vLli2Sfrq2Ze/evRo8eLA2btyoLl26aM2aNZdl7m+//Va//e1vlZKSorffflvFxcV66aWXJEmnT5++5NexLMv9NtzFtp/7dpzNZlN9ff0lvw6AS0PsAPjFkpOTFRISog0bNvzs2s8++0zp6emaOHGirr/+enXq1Om8Z2euv/56ZWdnq6ioSCkpKVqxYoV7X+fOnfWnP/1J69at0/Dhw7VkyRKv5g4PD1eHDh0uOPe2bdt05swZzZs3Tz179lTnzp117NgxjzXBwcGqq6u76Ot06dJFhw8f1pEjR9zb9u3bJ6fTqWuuucar2QF4j9gB8Iv95je/0SOPPKIZM2Zo2bJlOnjwoLZs2aLXX3+9wdpOnTpp27Zt+utf/6qvvvpKM2fO1NatW937S0pKlJ2drc2bN+vbb7/VunXr9NVXX+maa65RdXW1Jk+erIKCAn377bf629/+pq1btzYqGHJycjRv3jy98MILOnDggLZv364XX3xRkvQP//APOnPmjF588UV98803Wr58uV555RWPx3fo0EFVVVXasGGDfvjhB/34448NXiMzM1Ndu3bV6NGjtX37dn3++ecaM2aMMjIy1L17d69nB+AdYgeAV2bOnKlp06bpiSee0DXXXKNRo0ad93qThx56SMOHD9eoUaPUo0cPHT9+XBMnTnTvDw0N1RdffKERI0aoc+fOeuCBBzR58mQ9+OCDCgoK0vHjxzVmzBh17txZI0eO1KBBg/TUU095PffYsWO1cOFCvfzyy7r22ms1ZMgQHThwQJLUrVs3zZ8/X3PmzFFKSorefPNN5ebmejw+PT1dDz30kEaNGqXo6GjNnTu3wWvYbDatXbtWrVu3Vq9evZSZmamOHTtq1apVXs8NwHt8gzIAADAaZ3YAAIDRiB0AAGA0YgcAABiN2AEAAEYjdgAAgNGIHQAAYDRiBwAAGI3YAQAARiN2AACA0YgdAABgNGIHAAAY7f8BhdY0nJlewZAAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(\n", + " y = 'abundance',\n", + " x = 'classification',\n", + " data = clf_widecnn,\n", + " estimator = sum,\n", + " ci = None,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}