From ce12c1e1c971ed0669ecd0a332fe1249ad630d05 Mon Sep 17 00:00:00 2001 From: Luis Pedro Coelho Date: Sat, 27 Apr 2024 14:07:48 +1000 Subject: [PATCH] RLS Version 0.3.0 Full Changelog HANDLING GENE CLUSTERS & REVERSE COMPLEMENTS IN RESFINDER - Resfinder has gene clusters which can't be passed through RGI using 'contig' mode. - Gene clusters were identified and were manually assigned ARO numbers. - A seperate file with manual curation for gene clusters and RCs was created, and their AROs were updated after concatenating RGI results and genes not in RGI results. - 40 gene clusters present. - 9 genes in reverse complement form also present. - RC genes were manually curated. USING AMINO ACID FILE FOR ARGANNOT & RESFINDER RATHER THAN NUCLEOTIDE FILE - ARG-ANNOT and Resfinder are comprised of coding sequences. The data wasn't being handled properly before as contig mode was used when passing coding sequences to RGI. Now, the amino acid versions of ARG-ANNOT & Resfinder are used with protein mode when running the database in RGI. - ARG-ANNOT AA file is available online. Resfinder AA file is generated using biopython. - One to many ARO mapping such as NG_047831:101-955 to Erm(K) and almG in ARG-ANNOT eliminated as protein mode used - A total of 10 ARO mappings changed in ARG-ANNOT ARGNORM.LIB: MAKING ARGNORM MORE USABLE AS A LIBRARY - Introduce `argnorm.lib` module - Users can import the `map_to_aro` function from `argnorm.lib`. The function takes a gene name as input, maps the gene to the ARO and returns a pronto term object with the ARO mapping. - The `get_aro_mapping_table` function, previously within the BaseNormalizer class, has also been moved to `lib.py` to give users the ability to access the mapping tables being used for normalization. - With the introduction of `lib.py`, users will be able to access core mapping utilities through `argnorm.lib`, drug categorization through `argnorm.drug_categorization`, and the traditional normalizers through `argnorm.normalizers`. --- CHANGELOG.md | 8 ++-- docs/{images => }/whatsnew.md | 12 ++++++ setup.py | 80 ++++++++++++++++------------------- 3 files changed, 53 insertions(+), 47 deletions(-) rename docs/{images => }/whatsnew.md (77%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 068914d..31dd9ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog -## Unreleased + +## 0.3.0 - 27 April 2024 ### Handling gene clusters & reverse complements in resfinder - Resfinder has gene clusters which can't be passed through RGI using 'contig' mode. @@ -17,11 +18,12 @@ - A total of 10 ARO mappings changed in ARG-ANNOT ### argnorm.lib: Making argNorm more usable as a library -- A file called `lib.py` will be introduced so that users can use argNorm as a library more easily. -- Users can import the `map_to_aro` function using `from argnorm.lib import map_to_aro`. The function takes a gene name as input, maps the gene to the ARO and returns a pronto term object with the ARO mapping. +- Introduce `argnorm.lib` module +- Users can import the `map_to_aro` function from `argnorm.lib`. The function takes a gene name as input, maps the gene to the ARO and returns a pronto term object with the ARO mapping. - The `get_aro_mapping_table` function, previously within the BaseNormalizer class, has also been moved to `lib.py` to give users the ability to access the mapping tables being used for normalization. - With the introduction of `lib.py`, users will be able to access core mapping utilities through `argnorm.lib`, drug categorization through `argnorm.drug_categorization`, and the traditional normalizers through `argnorm.normalizers`. + ## 0.2.0 - 26 March 2024 #### ARO Mapping & Normalization diff --git a/docs/images/whatsnew.md b/docs/whatsnew.md similarity index 77% rename from docs/images/whatsnew.md rename to docs/whatsnew.md index 6cd7be4..d6365ab 100644 --- a/docs/images/whatsnew.md +++ b/docs/whatsnew.md @@ -1,5 +1,17 @@ ## What's New +## 0.3.0 - 27 April 2024 + +### User-facing changes +- Improved Resfinder mappings (40 gene clusters and 9 reverse complements were manually curated) +- Updated ARG-ANNOT mappings (a total of 10 mappings changed) +- argNorm is now more usable as a library +- Remove warnings + +### Internal changes +- Code has been refactored to be simpler + + ## 0.2.0 - 26 March 2024 #### ARO Mapping & Normalization diff --git a/setup.py b/setup.py index 77d69cc..06411bd 100644 --- a/setup.py +++ b/setup.py @@ -1,50 +1,42 @@ from setuptools import setup -from setuptools import find_packages -NAME = "argnorm" -AUTHOR = "See README" -EMAIL = "luispedro@big-data-biology.org" -URL = "https://github.com/BigDataBiology/argNorm" -LICENSE = "MIT" DESCRIPTION = """ -Normalize antibiotic resistance genes (ARGs) abundance tables (e.g., from metagenomics) by using the ARO ontology (developed by CARD). - +Normalize antibiotic resistance genes (ARGs) results by using the ARO ontology (developed by CARD). """ -if __name__ == "__main__": - setup( - name=NAME, - version="0.2.0", - author=AUTHOR, - author_email=EMAIL, - url=URL, - license=LICENSE, - description=DESCRIPTION, - packages=['argnorm', 'argnorm.data'], - include_package_data=True, - package_dir={'argnorm': 'argnorm' }, - package_data={'argnorm': ['data/*.tsv', 'data/manual_curation/*.tsv']}, - install_requires=open("./requirements.txt", "r").read().splitlines(), - long_description=open("./README.md", "r").read(), - long_description_content_type='text/markdown', - entry_points={ - "console_scripts": [ - "argnorm=argnorm.cli:main" - ] - }, - zip_safe=False, - classifiers=[ - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Development Status :: 4 - Beta", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Natural Language :: English" - ], - ) +setup( + name='argnorm', + version="0.3.0", + author='ArgNorm Developers', + author_email='luispedro@big-data-biology.org', + url="https://github.com/BigDataBiology/argNorm", + license='MIT', + description=DESCRIPTION, + packages=['argnorm', 'argnorm.data'], + include_package_data=True, + package_dir={'argnorm': 'argnorm' }, + package_data={'argnorm': ['data/*.tsv', 'data/manual_curation/*.tsv']}, + install_requires=open("./requirements.txt", "r").read().splitlines(), + long_description=open("./README.md", "r").read(), + long_description_content_type='text/markdown', + entry_points={ + "console_scripts": [ + "argnorm=argnorm.cli:main" + ] + }, + zip_safe=False, + classifiers=[ + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Development Status :: 4 - Beta", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Natural Language :: English" + ], +)