Skip to content

Commit

Permalink
Apply manual ruff fixes
Browse files Browse the repository at this point in the history
* requests timeout
* date.today() as argument!
* open() without close()

Minor:

* exception "from <cause>"
* type -> isinstance
* combine if-statements
* Django model method ordering
* noqa changes for security checks
  • Loading branch information
vdboor committed Jul 9, 2024
1 parent 68af32f commit abfc935
Show file tree
Hide file tree
Showing 24 changed files with 83 additions and 79 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ known-first-party = ["schematools"]
required-imports = ["from __future__ import annotations"]

[tool.ruff.lint.mccabe]
max-complexity = 10
max-complexity = 20 # TODO: lower this

[tool.ruff.lint.per-file-ignores]
"**/migrations/*.py" = ["E501"] # line too long
"tests/**/*.py" = ["S101", "S105", "S106", "S314"] # allow asserts, hardcoded passwords
"tests/**/*.py" = ["S101", "S105", "S106", "S314", "S608"] # allow asserts, hardcoded passwords, SQL injection
2 changes: 1 addition & 1 deletion src/schematools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from typing import Final, List
from typing import Final

# Internal conventions
RELATION_INDICATOR: Final[str] = "_"
Expand Down
10 changes: 5 additions & 5 deletions src/schematools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def _schema_fetch_url_file(schema_url_file: str) -> dict[str, Any]:
with open(schema_url_file) as f:
schema_data = json.load(f)
else:
response = requests.get(schema_url_file)
response = requests.get(schema_url_file, timeout=60)
response.raise_for_status()
schema_data = response.json()

Expand Down Expand Up @@ -375,7 +375,7 @@ def _fetch_json(location: str) -> dict[str, Any]:
with open(location) as f:
json_obj = json.load(f)
else:
response = requests.get(location)
response = requests.get(location, timeout=60)
response.raise_for_status()
json_obj = response.json()
return json_obj
Expand Down Expand Up @@ -562,7 +562,7 @@ def batch_validate(
try:
datasets_idx = path_parts.index("datasets")
except ValueError:
raise ValueError("dataset files do not live in a common 'datasets' dir")
raise ValueError("dataset files do not live in a common 'datasets' dir") from None

# Find out if we need to go up the directory tree to get at `datasets` dir
# This could be needed if we are only checking one dataset,
Expand Down Expand Up @@ -684,13 +684,13 @@ def to_ckan(schema_url: str, upload_url: str):
for ds in data:
ident = ds["identifier"]
url = f"{upload_url}/api/3/action/package_update?id={ident}"
response = requests.post(url, headers=headers, json=ds)
response = requests.post(url, headers=headers, json=ds, timeout=60)
logger.debug("%s: %d, %s", url, response.status_code, response.json())

if response.status_code == 404:
# 404 *can* mean no such dataset. Try again with package_create.
url = upload_url + "/api/3/action/package_create"
response = requests.post(url, headers=headers, json=ds)
response = requests.post(url, headers=headers, json=ds, timeout=60)
logger.debug("%s: %d, %s", url, response.status_code, response.json())

if not (200 <= response.status_code < 300):
Expand Down
2 changes: 1 addition & 1 deletion src/schematools/contrib/django/faker/providers/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, has_z=False):
try:
shape_cls = getattr(geos, class_name)
except AttributeError:
raise UnsupportedGEOTypeException(f"Class {class_name} is not allowed.")
raise UnsupportedGEOTypeException(f"Class {class_name} is not allowed.") from None
self.has_z = has_z
self.shape = shape_cls(*self.get_coordinates(), srid=SRID_RD_NEW)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def create_tables(
# the datasets cache (the DatasetSchema.dataset_collection)
# by accessing the `Dataset.schema` attribute.
for dataset in datasets:
dataset.schema
dataset.schema # noqa: B018

for dataset in datasets:
if not dataset.enable_db or dataset.name in to_be_skipped:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def create_views(
# the datasets cache (the DatasetSchema.dataset_collection)
# by accessing the `Dataset.schema` attribute.
for dataset in datasets:
dataset.schema
dataset.schema # noqa: B018

for dataset in datasets:
if not dataset.enable_db:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,23 +141,17 @@ def update_dataset_version(self, dataset: Dataset, schema: DatasetSchema) -> Non
"""
Perform dataset version update, including changes to dataset tables.
"""
if not dataset.is_default_version:
# Dataset is currently not default. Can not be safely renamed.
if schema.is_default_version:
# Dataset is promoted to default. We need to rename current default,
# if it was not done yet.
try:
current_default = Dataset.objects.get(name=Dataset.name_from_schema(schema))
except Dataset.DoesNotExist:
pass
else:
# Update current default dataset name to expected name.
if current_default.version:
current_default.name = to_snake_case(
f"{schema.id}_{current_default.version}"
)

current_default.save()
if not dataset.is_default_version and schema.is_default_version:
try:
current_default = Dataset.objects.get(name=Dataset.name_from_schema(schema))
except Dataset.DoesNotExist:
pass
else:
# Update current default dataset name to expected name.
if current_default.version:
current_default.name = to_snake_case(f"{schema.id}_{current_default.version}")

current_default.save()

dataset.name = Dataset.name_from_schema(schema)
dataset.is_default_version = schema.is_default_version
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def handle(self, *args, **options) -> None:
), "The --from-files can only work with a SCHEMA_URL on the local filesystem."
with tempfile.TemporaryDirectory() as tmpdir:
schemas_root = Path(options["schema_url"]).parent
subprocess.run( # nosec
["git", "clone", schemas_root, tmpdir],
subprocess.run(
["git", "clone", schemas_root, tmpdir], # noqa: S603
)
table1 = self._load_table_from_checkout(
dataset.id, options["table"], tmpdir, options["version1"]
Expand Down Expand Up @@ -142,7 +142,9 @@ def _load_table_from_checkout(
self, dataset_id: str, table_id: str, tmpdir: str, version_ref: str
) -> DatasetTableSchema:
"""Load a DatasetTableSchema for the specified git reference."""
subprocess.run(["git", "checkout", version_ref], cwd=tmpdir, stdout=subprocess.DEVNULL)
subprocess.run(
["git", "checkout", version_ref], cwd=tmpdir, stdout=subprocess.DEVNULL # noqa: S603
)
tmp_schema_path = Path(tmpdir) / "datasets"
# We create a specific schema loader, because it has to read in the data
# associated with a specific git checkout.
Expand Down
34 changes: 17 additions & 17 deletions src/schematools/contrib/django/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,20 @@ class Meta:
def __str__(self):
return self.name

def save(self, *args, **kwargs):
"""Perform a final data validation check, and additional updates."""
if self.schema_data_changed() and (self.schema_data or not self._state.adding):
self.__dict__.pop("schema", None) # clear cached property
# The extra "and" above avoids the transaction savepoint for an empty dataset.
# Ensure both changes are saved together
with transaction.atomic():
super().save(*args, **kwargs)
self.save_schema_tables()
else:
super().save(*args, **kwargs)

save.alters_data = True

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._dataset_collection = None
Expand Down Expand Up @@ -247,20 +261,6 @@ def save_for_schema(self, schema: DatasetSchema):
self.__dict__["schema"] = schema # Avoid serializing/deserializing the schema data
return changed

def save(self, *args, **kwargs):
"""Perform a final data validation check, and additional updates."""
if self.schema_data_changed() and (self.schema_data or not self._state.adding):
self.__dict__.pop("schema", None) # clear cached property
# The extra "and" above avoids the transaction savepoint for an empty dataset.
# Ensure both changes are saved together
with transaction.atomic():
super().save(*args, **kwargs)
self.save_schema_tables()
else:
super().save(*args, **kwargs)

save.alters_data = True

def save_schema_tables(self):
"""Expose the schema data to the DatasetTable.
This allows other projects (e.g. geosearch) to process our dynamic tables.
Expand Down Expand Up @@ -486,6 +486,9 @@ class Profile(models.Model):
scopes = models.CharField(max_length=255)
schema_data = models.TextField(_("Amsterdam Schema Contents"), validators=[validate_json])

def __str__(self):
return self.name

@cached_property
def schema(self) -> ProfileSchema:
"""Provide access to the schema data"""
Expand All @@ -512,9 +515,6 @@ def save_for_schema(self, profile_schema: ProfileSchema) -> Profile:
self.save()
return self

def __str__(self):
return self.name


class LooseRelationField(models.ForeignKey):
"""A relation that points to one part of the composite key.
Expand Down
2 changes: 1 addition & 1 deletion src/schematools/contrib/django/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ def validate_json(value: str) -> None:
_("Value must be valid JSON text."),
code="invalid",
params={"value": value},
)
) from None
6 changes: 3 additions & 3 deletions src/schematools/exports/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from collections.abc import Iterable
from datetime import date
from datetime import datetime
from pathlib import Path
from typing import IO

Expand Down Expand Up @@ -51,7 +51,7 @@ def __init__(
table_ids: list[str] | None = None,
scopes: list[str] | None = None,
size: int | None = None,
temporal_date: date = date.today(),
temporal_date: datetime | None = None,
):
"""Constructor.
Expand All @@ -70,7 +70,7 @@ def __init__(
self.table_ids = table_ids
self.scopes = set(scopes)
self.size = size
self.temporal_date = temporal_date
self.temporal_date = temporal_date or datetime.now().astimezone()

self.base_dir = Path(output)
self.tables = (
Expand Down
4 changes: 2 additions & 2 deletions src/schematools/exports/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import csv
from collections.abc import Iterable
from datetime import date
from datetime import datetime
from typing import IO

from geoalchemy2 import functions as gfunc # ST_AsEWKT
Expand Down Expand Up @@ -75,7 +75,7 @@ def export_csvs(
table_ids: list[str],
scopes: list[str],
size: int,
temporal_date: date = date.today(),
temporal_date: datetime | None = None,
):
"""Utility function to wrap the Exporter."""
enable_datetime_cast()
Expand Down
2 changes: 1 addition & 1 deletion src/schematools/exports/geopackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ def export_geopackages(
query = f"{query} LIMIT {size}"
sql_stmt = query.as_string(connection.connection.cursor())
os.system( # noqa: S605 # nosec: B605
command.format(output_path=output_path, db_url=db_url, sql=sql_stmt)
command.format(output_path=output_path, db_url=db_url, sql=sql_stmt) # noqa: S605
)
6 changes: 2 additions & 4 deletions src/schematools/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,10 +354,8 @@ def index_factory(
*build_temporal_indexes(table, dataset_table, db_table_name),
]

through_indexes = _build_m2m_indexes(
metadata, dataset_table, is_versioned_dataset, db_schema_name
)
for table_db_name, through_indexes in through_indexes.items():
m2m_indexes = _build_m2m_indexes(metadata, dataset_table, is_versioned_dataset, db_schema_name)
for table_db_name, through_indexes in m2m_indexes.items():
indexes[table_db_name].extend(through_indexes)

return dict(indexes)
Expand Down
8 changes: 4 additions & 4 deletions src/schematools/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def get_dataset(self, dataset_id: str, prefetch_related: bool = False) -> Datase
return self._cache[dataset_id]
except KeyError:
if self._loader is None:
raise RuntimeError("This dataset collection can't retrieve new datasets")
raise RuntimeError("This dataset collection can't retrieve new datasets") from None
dataset = self._loader.get_dataset(dataset_id, prefetch_related=prefetch_related)
self.add_dataset(dataset)
return dataset
Expand Down Expand Up @@ -235,7 +235,7 @@ def _as_dataset(
self._loaded_callback(dataset_schema)

if prefetch_related:
dataset_schema.tables # noqa: ensure versioned tables are prefetched
dataset_schema.tables # noqa: B018, ensure versioned tables are prefetched

# Make sure the related datasets are read.
for dataset_id in dataset_schema.related_dataset_schema_ids:
Expand Down Expand Up @@ -278,7 +278,7 @@ def get_all_datasets(self) -> dict[str, DatasetSchema]:
datasets = {}
for dataset_id, dataset_path in sorted(self._dataset_paths.items()):
dataset = self.get_dataset(dataset_id, prefetch_related=False)
dataset.tables # noqa: ensure versioned tables are still prefetched
dataset.tables # noqa: B018, ensure versioned tables are still prefetched
datasets[dataset_path] = dataset
return datasets

Expand Down Expand Up @@ -365,7 +365,7 @@ def get_root(cls, dataset_file: Path | str) -> Path:
try:
return next(dir for dir in dataset_file.parents if dir.name == "datasets")
except StopIteration:
raise ValueError(f"No 'datasets' root found for file '{dataset_file}'.")
raise ValueError(f"No 'datasets' root found for file '{dataset_file}'.") from None

def get_dataset_from_file(
self, dataset_file: Path | str, prefetch_related: bool = False, allow_external_files=False
Expand Down
3 changes: 2 additions & 1 deletion src/schematools/maps/interfaces/json_.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def __call__(self, uri: str, **load_params):

def local_strategy(uri: str) -> str:
if uri.startswith("./"):
return open(uri).read()
with open(uri) as f:
return f.read()
raise ValueError


Expand Down
4 changes: 3 additions & 1 deletion src/schematools/maps/interfaces/mapfile/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ class JinjaSerializer:

@property
def env(self) -> Environment:
env = Environment(loader=FileSystemLoader(self.template_dir)) # nosec
env = Environment(
loader=FileSystemLoader(self.template_dir), autoescape=False # noqa: S701
)
env.trim_blocks = True
env.lstrip_blocks = True
return env
Expand Down
2 changes: 1 addition & 1 deletion src/schematools/provenance/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def set_number_of_tables(self, dataschema: DatasetSchema):
def set_dataset_for_final_listing(self, dataschema: DatasetSchema):
"""Setting dataset level to add later als wrapper"""

if type(dataschema) is dict:
if isinstance(dataschema, dict):

# At first make root branch dataset
# and take id field as value for dataschema attribute
Expand Down
6 changes: 3 additions & 3 deletions src/schematools/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def status(self) -> DatasetSchema.Status:
try:
return DatasetSchema.Status[value]
except KeyError:
raise ParserError(f"Status field contains an unknown value: {value}")
raise ParserError(f"Status field contains an unknown value: {value}") from None

def _get_dataset_schema(self, dataset_id: str) -> DatasetSchema:
"""Internal function to retrieve a (related) dataset from the shared cache."""
Expand All @@ -401,7 +401,7 @@ def _get_dataset_schema(self, dataset_id: str) -> DatasetSchema:
raise RuntimeError(
f"{self!r} has no dataset collection defined,"
f" can't resolve relation to '{dataset_id}'."
)
) from None

# It's assumed here that the loader is a CachedSchemaLoader,
# do data can be fetched multiple times.
Expand Down Expand Up @@ -444,7 +444,7 @@ def table_versions(self) -> dict[str, TableVersions]:
def publisher(self) -> Publisher | None:
"""Access the publisher within the file."""
raw_publisher = self["publisher"]
if type(raw_publisher) == str:
if isinstance(raw_publisher, str):
# Compatibility with meta-schemas prior to 2.0
if self.loader is None:
return None
Expand Down
Loading

0 comments on commit abfc935

Please sign in to comment.