Apply manual ruff fixes

* requests timeout * date.today() as argument! * open() without close() Minor: * assert -> raise * exception "from <cause>" * type -> isinstance * combine if-statements * Django model method ordering * noqa changes for security checks * string line lengths
Amsterdam · Jul 9, 2024 · 42ce06d · 42ce06d
1 parent 9b48c7c
commit 42ce06d
Show file tree

Hide file tree

Showing 25 changed files with 109 additions and 92 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -86,8 +86,8 @@ known-first-party = ["schematools"]
 required-imports = ["from __future__ import annotations"]
 
 [tool.ruff.lint.mccabe]
-max-complexity = 10
+max-complexity = 20  # TODO: lower this
 
 [tool.ruff.lint.per-file-ignores]
 "**/migrations/*.py" = ["E501"]  # line too long
-"tests/**/*.py" = ["S101", "S105", "S106", "S314"]  # allow asserts, hardcoded passwords
+"tests/**/*.py" = ["S101", "S105", "S106", "S314", "S608"]  # allow asserts, hardcoded passwords, SQL injection
diff --git a/src/schematools/__init__.py b/src/schematools/__init__.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Final, List
+from typing import Final
 
 # Internal conventions
 RELATION_INDICATOR: Final[str] = "_"

diff --git a/src/schematools/_utils.py b/src/schematools/_utils.py
@@ -9,7 +9,8 @@
 def cached_method(*lru_args, **lru_kwargs):
     """A simple lru-cache per object.
     This removed the need for methodtools.lru_cache(), which uses wirerope for purity.
-    The usage of wirerope started showing up as 5% of the request time, hence it's significant to remove.
+    The usage of wirerope started showing up as 5% of the request time,
+    hence it's significant to remove.
     """
 
     def decorator(func):

diff --git a/src/schematools/cli.py b/src/schematools/cli.py
@@ -340,7 +340,7 @@ def _schema_fetch_url_file(schema_url_file: str) -> dict[str, Any]:
         with open(schema_url_file) as f:
             schema_data = json.load(f)
     else:
-        response = requests.get(schema_url_file)
+        response = requests.get(schema_url_file, timeout=60)
         response.raise_for_status()
         schema_data = response.json()
 
@@ -375,7 +375,7 @@ def _fetch_json(location: str) -> dict[str, Any]:
         with open(location) as f:
             json_obj = json.load(f)
     else:
-        response = requests.get(location)
+        response = requests.get(location, timeout=60)
         response.raise_for_status()
         json_obj = response.json()
     return json_obj
@@ -562,7 +562,7 @@ def batch_validate(
     try:
         datasets_idx = path_parts.index("datasets")
     except ValueError:
-        raise ValueError("dataset files do not live in a common 'datasets' dir")
+        raise ValueError("dataset files do not live in a common 'datasets' dir") from None
 
     # Find out if we need to go up the directory tree to get at `datasets` dir
     # This could be needed if we are only checking one dataset,
@@ -671,7 +671,7 @@ def to_ckan(schema_url: str, upload_url: str):
             continue
         try:
             data.append(ckan.from_dataset(ds, path))
-        except Exception as e:
+        except Exception as e:  # noqa: BLE001
             logger.error("in dataset %s: %s", ds.identifier, str(e))  # noqa: G200
             status = 1
 
@@ -684,13 +684,13 @@ def to_ckan(schema_url: str, upload_url: str):
     for ds in data:
         ident = ds["identifier"]
         url = f"{upload_url}/api/3/action/package_update?id={ident}"
-        response = requests.post(url, headers=headers, json=ds)
+        response = requests.post(url, headers=headers, json=ds, timeout=60)
         logger.debug("%s: %d, %s", url, response.status_code, response.json())
 
         if response.status_code == 404:
             # 404 *can* mean no such dataset. Try again with package_create.
             url = upload_url + "/api/3/action/package_create"
-            response = requests.post(url, headers=headers, json=ds)
+            response = requests.post(url, headers=headers, json=ds, timeout=60)
             logger.debug("%s: %d, %s", url, response.status_code, response.json())
 
         if not (200 <= response.status_code < 300):

diff --git a/src/schematools/contrib/django/faker/providers/geo.py b/src/schematools/contrib/django/faker/providers/geo.py
@@ -40,7 +40,7 @@ def __init__(self, has_z=False):
         try:
             shape_cls = getattr(geos, class_name)
         except AttributeError:
-            raise UnsupportedGEOTypeException(f"Class {class_name} is not allowed.")
+            raise UnsupportedGEOTypeException(f"Class {class_name} is not allowed.") from None
         self.has_z = has_z
         self.shape = shape_cls(*self.get_coordinates(), srid=SRID_RD_NEW)
 

diff --git a/src/schematools/contrib/django/management/commands/create_tables.py b/src/schematools/contrib/django/management/commands/create_tables.py
@@ -49,7 +49,7 @@ def create_tables(
     # the datasets cache (the DatasetSchema.dataset_collection)
     # by accessing the `Dataset.schema` attribute.
     for dataset in datasets:
-        dataset.schema
+        dataset.schema  # noqa: B018
 
     for dataset in datasets:
         if not dataset.enable_db or dataset.name in to_be_skipped:

diff --git a/src/schematools/contrib/django/management/commands/create_views.py b/src/schematools/contrib/django/management/commands/create_views.py
@@ -29,7 +29,7 @@ def _is_valid_sql(view_sql: str, view_name: str, write_role_name: str) -> bool:
                 cursor.execute("SET statement_timeout = 5000;")
                 cursor.execute(sql.SQL(view_sql))
             transaction.savepoint_rollback(sid)
-    except Exception as e:  # noqa: F841
+    except Exception as e:  # noqa: F841, BLE001
         return False
     return True
 
@@ -109,7 +109,7 @@ def create_views(
     # the datasets cache (the DatasetSchema.dataset_collection)
     # by accessing the `Dataset.schema` attribute.
     for dataset in datasets:
-        dataset.schema
+        dataset.schema  # noqa: B018
 
     for dataset in datasets:
         if not dataset.enable_db:
@@ -190,9 +190,9 @@ def create_views(
                             )
 
                             # Remove the view if it exists
-                            # Due to the large costs of recreating materialized views, we only create
-                            # and not drop them. When changes are made to the materialized view the view
-                            # must be droped manually.
+                            # Due to the large costs of recreating materialized views,
+                            # we only create and not drop them. When changes are made
+                            # to the materialized view the view must be dropped manually.
                             if view_type != "materialized":
                                 cursor.execute(
                                     sql.SQL("DROP VIEW IF EXISTS {view_name} CASCADE").format(
@@ -219,7 +219,8 @@ def create_views(
                         errors += 1
                 else:
                     command.stderr.write(
-                        f"  Required permissions for view {table.db_name} are not in the view dataset auth"
+                        f"  Required permissions for view {table.db_name}"
+                        " are not in the view dataset auth"
                     )
 
     if errors:

diff --git a/src/schematools/contrib/django/management/commands/import_schemas.py b/src/schematools/contrib/django/management/commands/import_schemas.py
@@ -141,23 +141,17 @@ def update_dataset_version(self, dataset: Dataset, schema: DatasetSchema) -> Non
         """
         Perform dataset version update, including changes to dataset tables.
         """
-        if not dataset.is_default_version:
-            # Dataset is currently not default. Can not be safely renamed.
-            if schema.is_default_version:
-                # Dataset is promoted to default. We need to rename current default,
-                #  if it was not done yet.
-                try:
-                    current_default = Dataset.objects.get(name=Dataset.name_from_schema(schema))
-                except Dataset.DoesNotExist:
-                    pass
-                else:
-                    # Update current default dataset name to expected name.
-                    if current_default.version:
-                        current_default.name = to_snake_case(
-                            f"{schema.id}_{current_default.version}"
-                        )
-
-                        current_default.save()
+        if not dataset.is_default_version and schema.is_default_version:
+            try:
+                current_default = Dataset.objects.get(name=Dataset.name_from_schema(schema))
+            except Dataset.DoesNotExist:
+                pass
+            else:
+                # Update current default dataset name to expected name.
+                if current_default.version:
+                    current_default.name = to_snake_case(f"{schema.id}_{current_default.version}")
+
+                    current_default.save()
 
         dataset.name = Dataset.name_from_schema(schema)
         dataset.is_default_version = schema.is_default_version
diff --git a/src/schematools/contrib/django/management/commands/sqlmigrate_schema.py b/src/schematools/contrib/django/management/commands/sqlmigrate_schema.py
@@ -59,7 +59,10 @@ def add_arguments(self, parser: CommandParser) -> None:
         parser.add_argument(
             "--from-files",
             action="store_true",
-            help="Get the tables from the filesystem. NB. the SCHEMA_URL also needs to be file-based!",
+            help=(
+                "Get the tables from the filesystem. "
+                "Note the SCHEMA_URL also needs to be file-based!"
+            ),
         )
         parser.add_argument("schema", help="Schema name")
         parser.add_argument("table", help="Table name")
@@ -68,12 +71,18 @@ def add_arguments(self, parser: CommandParser) -> None:
         parser.add_argument(
             "version1",
             metavar="OLDVERSION",
-            help="Old table version, e.g. v1.0.0, or a git ref like `master`, `tag`, `branch` or `hash` with --from-files",
+            help=(
+                "Old table version, e.g. v1.0.0, or a git ref like"
+                " `master`, `tag`, `branch` or `hash` with --from-files"
+            ),
         )
         parser.add_argument(
             "version2",
             metavar="NEWVERSION",
-            help="New table version, e.g. v1.1.0, , or a git ref like `master`, `tag`, `branch` or `hash` with --from-files",
+            help=(
+                "New table version, e.g. v1.1.0, , or a git ref like"
+                " `master`, `tag`, `branch` or `hash` with --from-files"
+            ),
         )
 
     def handle(self, *args, **options) -> None:
@@ -93,13 +102,15 @@ def handle(self, *args, **options) -> None:
         # obtain the tables for these specific references
         # for comparison and sql generation.
         if options["from_files"]:
-            assert not options["schema_url"].startswith(
-                "http"
-            ), "The --from-files can only work with a SCHEMA_URL on the local filesystem."
+            if options["schema_url"].startswith("http"):
+                raise CommandError(
+                    "The --from-files can only work with a SCHEMA_URL on the local filesystem."
+                )
+
             with tempfile.TemporaryDirectory() as tmpdir:
                 schemas_root = Path(options["schema_url"]).parent
-                subprocess.run(  # nosec
-                    ["git", "clone", schemas_root, tmpdir],
+                subprocess.run(
+                    ["git", "clone", schemas_root, tmpdir],  # noqa: S603
                 )
                 table1 = self._load_table_from_checkout(
                     dataset.id, options["table"], tmpdir, options["version1"]
@@ -142,7 +153,9 @@ def _load_table_from_checkout(
         self, dataset_id: str, table_id: str, tmpdir: str, version_ref: str
     ) -> DatasetTableSchema:
         """Load a DatasetTableSchema for the specified git reference."""
-        subprocess.run(["git", "checkout", version_ref], cwd=tmpdir, stdout=subprocess.DEVNULL)
+        subprocess.run(
+            ["git", "checkout", version_ref], cwd=tmpdir, stdout=subprocess.DEVNULL  # noqa: S603
+        )
         tmp_schema_path = Path(tmpdir) / "datasets"
         # We create a specific schema loader, because it has to read in the data
         # associated with a specific git checkout.

diff --git a/src/schematools/contrib/django/models.py b/src/schematools/contrib/django/models.py
@@ -178,6 +178,20 @@ class Meta:
     def __str__(self):
         return self.name
 
+    def save(self, *args, **kwargs):
+        """Perform a final data validation check, and additional updates."""
+        if self.schema_data_changed() and (self.schema_data or not self._state.adding):
+            self.__dict__.pop("schema", None)  # clear cached property
+            # The extra "and" above avoids the transaction savepoint for an empty dataset.
+            # Ensure both changes are saved together
+            with transaction.atomic():
+                super().save(*args, **kwargs)
+                self.save_schema_tables()
+        else:
+            super().save(*args, **kwargs)
+
+    save.alters_data = True
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._dataset_collection = None
@@ -247,20 +261,6 @@ def save_for_schema(self, schema: DatasetSchema):
         self.__dict__["schema"] = schema  # Avoid serializing/deserializing the schema data
         return changed
 
-    def save(self, *args, **kwargs):
-        """Perform a final data validation check, and additional updates."""
-        if self.schema_data_changed() and (self.schema_data or not self._state.adding):
-            self.__dict__.pop("schema", None)  # clear cached property
-            # The extra "and" above avoids the transaction savepoint for an empty dataset.
-            # Ensure both changes are saved together
-            with transaction.atomic():
-                super().save(*args, **kwargs)
-                self.save_schema_tables()
-        else:
-            super().save(*args, **kwargs)
-
-    save.alters_data = True
-
     def save_schema_tables(self):
         """Expose the schema data to the DatasetTable.
         This allows other projects (e.g. geosearch) to process our dynamic tables.
@@ -486,6 +486,9 @@ class Profile(models.Model):
     scopes = models.CharField(max_length=255)
     schema_data = models.TextField(_("Amsterdam Schema Contents"), validators=[validate_json])
 
+    def __str__(self):
+        return self.name
+
     @cached_property
     def schema(self) -> ProfileSchema:
         """Provide access to the schema data"""
@@ -512,9 +515,6 @@ def save_for_schema(self, profile_schema: ProfileSchema) -> Profile:
         self.save()
         return self
 
-    def __str__(self):
-        return self.name
-
 
 class LooseRelationField(models.ForeignKey):
     """A relation that points to one part of the composite key.

diff --git a/src/schematools/contrib/django/validators.py b/src/schematools/contrib/django/validators.py
@@ -23,4 +23,4 @@ def validate_json(value: str) -> None:
             _("Value must be valid JSON text."),
             code="invalid",
             params={"value": value},
-        )
+        ) from None
diff --git a/src/schematools/exports/__init__.py b/src/schematools/exports/__init__.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from collections.abc import Iterable
-from datetime import date
+from datetime import datetime
 from pathlib import Path
 from typing import IO
 
@@ -51,7 +51,7 @@ def __init__(
         table_ids: list[str] | None = None,
         scopes: list[str] | None = None,
         size: int | None = None,
-        temporal_date: date = date.today(),
+        temporal_date: datetime | None = None,
     ):
         """Constructor.
 
@@ -70,7 +70,7 @@ def __init__(
         self.table_ids = table_ids
         self.scopes = set(scopes)
         self.size = size
-        self.temporal_date = temporal_date
+        self.temporal_date = temporal_date or datetime.now().astimezone()
 
         self.base_dir = Path(output)
         self.tables = (

diff --git a/src/schematools/exports/csv.py b/src/schematools/exports/csv.py
@@ -2,7 +2,7 @@
 
 import csv
 from collections.abc import Iterable
-from datetime import date
+from datetime import datetime
 from typing import IO
 
 from geoalchemy2 import functions as gfunc  # ST_AsEWKT
@@ -75,7 +75,7 @@ def export_csvs(
     table_ids: list[str],
     scopes: list[str],
     size: int,
-    temporal_date: date = date.today(),
+    temporal_date: datetime | None = None,
 ):
     """Utility function to wrap the Exporter."""
     enable_datetime_cast()

diff --git a/src/schematools/exports/geopackage.py b/src/schematools/exports/geopackage.py
@@ -56,5 +56,5 @@ def export_geopackages(
             query = f"{query} LIMIT {size}"
         sql_stmt = query.as_string(connection.connection.cursor())
         os.system(  # noqa: S605  # nosec: B605
-            command.format(output_path=output_path, db_url=db_url, sql=sql_stmt)
+            command.format(output_path=output_path, db_url=db_url, sql=sql_stmt)  # noqa: S605
         )
diff --git a/src/schematools/factories.py b/src/schematools/factories.py
@@ -354,10 +354,8 @@ def index_factory(
             *build_temporal_indexes(table, dataset_table, db_table_name),
         ]
 
-    through_indexes = _build_m2m_indexes(
-        metadata, dataset_table, is_versioned_dataset, db_schema_name
-    )
-    for table_db_name, through_indexes in through_indexes.items():
+    m2m_indexes = _build_m2m_indexes(metadata, dataset_table, is_versioned_dataset, db_schema_name)
+    for table_db_name, through_indexes in m2m_indexes.items():
         indexes[table_db_name].extend(through_indexes)
 
     return dict(indexes)