diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b9a90d8..62da99f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: - id: mixed-line-ending args: [ --fix=lf ] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.0 + rev: v0.8.6 hooks: - id: ruff - id: ruff-format diff --git a/pyproject.toml b/pyproject.toml index ced512a..03934bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ etl = [ "setuptools", # pinned for 3.12 because yoyo-migrations still uses pkg_resources ] tests = ["pytest>=6.0", "pytest-cov", "mock", "httpx", "deepdiff"] -dev = ["pre-commit>=3.7.1", "ruff==0.5.0"] +dev = ["pre-commit>=3.7.1", "ruff==0.8.6"] docs = [ "sphinx==6.1.3", "sphinx-autodoc-typehints==1.22.0", @@ -117,10 +117,14 @@ select = [ "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret "SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf + "SLOT", # https://docs.astral.sh/ruff/rules/#flake8-slots-slot "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim "ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh + "PLC", # https://docs.astral.sh/ruff/rules/#convention-c + "PLE", # https://docs.astral.sh/ruff/rules/#error-e_1 + "TRY", # https://docs.astral.sh/ruff/rules/#tryceratops-try "PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf "FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf @@ -139,13 +143,14 @@ fixable = [ "PT", "RSE", "SIM", + "PLC", + "PLE", + "TRY", "PERF", "FURB", "RUF" ] # ANN003 - missing-type-kwargs -# ANN101 - missing-type-self -# ANN102 - missing-type-cls # D203 - one-blank-line-before-class # D205 - blank-line-after-summary # D206 - indent-with-spaces* @@ -159,19 +164,20 @@ fixable = [ # E501 - line-too-long* # W191 - tab-indentation* # S321 - suspicious-ftp-lib-usage +# PLC0206 - dict-index-missing-items # *ignored for compatibility with formatter ignore = [ - "ANN003", "ANN101", "ANN102", + "ANN003", "D203", "D205", "D206", "D213", "D300", "D400", "D415", "E111", "E114", "E117", "E501", "W191", "S321", + "PLC0206" ] [tool.ruff.lint.per-file-ignores] # ANN001 - missing-type-function-argument # ANN2 - missing-return-type -# ANN102 - missing-type-cls # F401 - unused-import # N805 - invalid-first-argument-name-for-method # D100 - undocumented-public-module @@ -186,7 +192,6 @@ ignore = [ "tests/*" = [ "ANN001", "ANN2", - "ANN102", "D100", "D102", "S101", diff --git a/src/gene/cli.py b/src/gene/cli.py index c328672..6912391 100644 --- a/src/gene/cli.py +++ b/src/gene/cli.py @@ -206,7 +206,7 @@ def _load_source( try: processed_ids += source.perform_etl(use_existing) except GeneNormalizerEtlError as e: - _logger.error(e) + _logger.exception("ETL error while loading %s", n) click.echo(f"Encountered error while loading {n}: {e}.") click.get_current_context().exit() end_load = timer() @@ -322,13 +322,13 @@ def update_normalizer_db( if len(sources_split) == 0: err_msg = "Must enter 1 or more source names to update" - raise Exception(err_msg) + raise ValueError(err_msg) non_sources = set(sources_split) - set(SOURCES) if len(non_sources) != 0: err_msg = f"Not valid source(s): {non_sources}" - raise Exception(err_msg) + raise ValueError(err_msg) parsed_source_names = {SourceName(SOURCES[s]) for s in sources_split} _update_normalizer(parsed_source_names, db, update_merged, use_existing) diff --git a/src/gene/database/dynamodb.py b/src/gene/database/dynamodb.py index 37e0596..e1ebba9 100644 --- a/src/gene/database/dynamodb.py +++ b/src/gene/database/dynamodb.py @@ -105,8 +105,8 @@ def drop_db(self) -> None: try: if not self._check_delete_okay(): return - except DatabaseWriteException as e: - raise e + except DatabaseWriteException: # noqa: TRY203 + raise if self.gene_table in self.list_tables(): self.dynamodb.Table(self.gene_table).delete() @@ -250,9 +250,9 @@ def get_record_by_id( response = self.genes.query(KeyConditionExpression=exp) record = response["Items"][0] del record["label_and_type"] - return record + return record # noqa: TRY300 except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on get_records_by_id for search term %s: %s", concept_id, e.response["Error"]["Message"], @@ -275,7 +275,7 @@ def get_refs_by_type(self, search_term: str, ref_type: RefType) -> list[str]: matches = self.genes.query(KeyConditionExpression=filter_exp) return [m["concept_id"] for m in matches.get("Items", None)] except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on get_refs_by_type for search term %s: %s", search_term, e.response["Error"]["Message"], @@ -378,7 +378,7 @@ def add_record(self, record: dict, src_name: SourceName) -> None: try: self.batch.put_item(Item=record) except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on add_record for %s: %s", concept_id, e.response["Error"]["Message"], @@ -411,7 +411,7 @@ def add_merged_record(self, record: dict) -> None: try: self.batch.put_item(Item=record) except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error on add_record for " "%s: %s", concept_id, e.response["Error"]["Message"], @@ -438,7 +438,7 @@ def _add_ref_record( try: self.batch.put_item(Item=record) except ClientError as e: - _logger.error( + _logger.exception( "boto3 client error adding reference %s for %s with match type %s: %s", term, concept_id, @@ -473,7 +473,7 @@ def update_merge_ref(self, concept_id: str, merge_ref: Any) -> None: # noqa: AN ) raise DatabaseWriteException(err_msg) from e - _logger.error( + _logger.exception( "boto3 client error in `database.update_record()`: %s", e.response["Error"]["Message"], ) diff --git a/src/gene/database/postgresql.py b/src/gene/database/postgresql.py index 6e84e9a..ce5ba60 100644 --- a/src/gene/database/postgresql.py +++ b/src/gene/database/postgresql.py @@ -121,8 +121,8 @@ def drop_db(self) -> None: try: if not self._check_delete_okay(): return - except DatabaseWriteException as e: - raise e + except DatabaseWriteException: # noqa: TRY203 + raise with self.conn.cursor() as cur: cur.execute(self._drop_db_query) @@ -601,7 +601,7 @@ def add_record(self, record: dict, src_name: SourceName) -> None: # noqa: ARG00 cur.execute(self._ins_symbol_query, [record["symbol"], concept_id]) self.conn.commit() except UniqueViolation: - _logger.error("Record with ID %s already exists", concept_id) + _logger.exception("Record with ID %s already exists", concept_id) self.conn.rollback() _add_merged_record_query = b""" @@ -804,12 +804,12 @@ def load_from_remote(self, url: str | None) -> None: for chunk in r.iter_content(chunk_size=8192): if chunk: h.write(chunk) - tar = tarfile.open(temp_tarfile, "r:gz") - tar_dump_file = next( - f for f in tar.getmembers() if f.name.startswith("gene_norm_") - ) - tar.extractall(path=tempdir_path, members=[tar_dump_file]) # noqa: S202 - dump_file = tempdir_path / tar_dump_file.name + with tarfile.open(temp_tarfile, "r:gz") as tar: + tar_dump_file = next( + f for f in tar.getmembers() if f.name.startswith("gene_norm_") + ) + tar.extractall(path=tempdir_path, members=[tar_dump_file]) # noqa: S202 + dump_file = tempdir_path / tar_dump_file.name self.drop_db() system_call = f"psql {self.conninfo} -f {dump_file.absolute()}" diff --git a/src/gene/etl/__init__.py b/src/gene/etl/__init__.py index 8c26a18..ec28e33 100644 --- a/src/gene/etl/__init__.py +++ b/src/gene/etl/__init__.py @@ -10,10 +10,10 @@ from .ncbi import NCBI __all__ = [ - "Ensembl", "HGNC", "NCBI", - "GeneNormalizerEtlError", + "Ensembl", "GeneFileVersionError", + "GeneNormalizerEtlError", "GeneSourceFetchError", ] diff --git a/src/gene/etl/merge.py b/src/gene/etl/merge.py index e56c92d..8cf4a4c 100644 --- a/src/gene/etl/merge.py +++ b/src/gene/etl/merge.py @@ -57,13 +57,13 @@ def create_merged_concepts(self, record_ids: set[str]) -> None: self._database.update_merge_ref(concept_id, merge_ref) except DatabaseWriteException as dw: if str(dw).startswith("No such record exists"): - _logger.error( + _logger.exception( "Updating nonexistent record: %s for merge ref to %s", concept_id, merge_ref, ) else: - _logger.error(str(dw)) + _logger.exception("Encountered unknown DB write exception") uploaded_ids |= group self._database.complete_write_transaction() _logger.info("Merged concept generation successful.") @@ -135,9 +135,9 @@ def record_order(record: dict) -> tuple: source_rank = SourcePriority[src].value else: err_msg = ( - f"Prohibited source: {src} in concept_id " f"{record['concept_id']}" + f"Prohibited source: {src} in concept_id {record['concept_id']}" ) - raise Exception(err_msg) + raise ValueError(err_msg) return source_rank, record["concept_id"] records.sort(key=record_order) diff --git a/src/gene/query.py b/src/gene/query.py index 238b8e1..0d996a0 100644 --- a/src/gene/query.py +++ b/src/gene/query.py @@ -168,9 +168,9 @@ def _fetch_record( """ try: match = self.db.get_record_by_id(concept_id, case_sensitive=False) - except DatabaseReadException as e: - _logger.error( - "Encountered DatabaseReadException looking up %s: %s", concept_id, e + except DatabaseReadException: + _logger.exception( + "Encountered DatabaseReadException looking up %s", concept_id ) else: if match: @@ -244,12 +244,11 @@ def _get_search_response(self, query: str, sources: set[str]) -> dict: self._fetch_record(resp, ref, MatchType[item_type.upper()]) matched_concept_ids.append(ref) - except DatabaseReadException as e: - _logger.error( + except DatabaseReadException: + _logger.exception( "Encountered DatabaseReadException looking up %s %s: ", item_type, term, - e, ) continue