Skip to content

Commit

Permalink
Apply manual ruff fixes
Browse files Browse the repository at this point in the history
* requests timeout
* date.today() as argument!
* open() without close()

Minor:

* assert -> raise
* exception "from <cause>"
* type -> isinstance
* combine if-statements
* Django model method ordering
* noqa changes for security checks
* string line lengths
  • Loading branch information
vdboor committed Jul 9, 2024
1 parent 9b48c7c commit 42ce06d
Show file tree
Hide file tree
Showing 25 changed files with 109 additions and 92 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ known-first-party = ["schematools"]
required-imports = ["from __future__ import annotations"]

[tool.ruff.lint.mccabe]
max-complexity = 10
max-complexity = 20 # TODO: lower this

[tool.ruff.lint.per-file-ignores]
"**/migrations/*.py" = ["E501"] # line too long
"tests/**/*.py" = ["S101", "S105", "S106", "S314"] # allow asserts, hardcoded passwords
"tests/**/*.py" = ["S101", "S105", "S106", "S314", "S608"] # allow asserts, hardcoded passwords, SQL injection
2 changes: 1 addition & 1 deletion src/schematools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from typing import Final, List
from typing import Final

# Internal conventions
RELATION_INDICATOR: Final[str] = "_"
Expand Down
3 changes: 2 additions & 1 deletion src/schematools/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
def cached_method(*lru_args, **lru_kwargs):
"""A simple lru-cache per object.
This removed the need for methodtools.lru_cache(), which uses wirerope for purity.
The usage of wirerope started showing up as 5% of the request time, hence it's significant to remove.
The usage of wirerope started showing up as 5% of the request time,
hence it's significant to remove.
"""

def decorator(func):
Expand Down
12 changes: 6 additions & 6 deletions src/schematools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def _schema_fetch_url_file(schema_url_file: str) -> dict[str, Any]:
with open(schema_url_file) as f:
schema_data = json.load(f)
else:
response = requests.get(schema_url_file)
response = requests.get(schema_url_file, timeout=60)
response.raise_for_status()
schema_data = response.json()

Expand Down Expand Up @@ -375,7 +375,7 @@ def _fetch_json(location: str) -> dict[str, Any]:
with open(location) as f:
json_obj = json.load(f)
else:
response = requests.get(location)
response = requests.get(location, timeout=60)
response.raise_for_status()
json_obj = response.json()
return json_obj
Expand Down Expand Up @@ -562,7 +562,7 @@ def batch_validate(
try:
datasets_idx = path_parts.index("datasets")
except ValueError:
raise ValueError("dataset files do not live in a common 'datasets' dir")
raise ValueError("dataset files do not live in a common 'datasets' dir") from None

# Find out if we need to go up the directory tree to get at `datasets` dir
# This could be needed if we are only checking one dataset,
Expand Down Expand Up @@ -671,7 +671,7 @@ def to_ckan(schema_url: str, upload_url: str):
continue
try:
data.append(ckan.from_dataset(ds, path))
except Exception as e:
except Exception as e: # noqa: BLE001
logger.error("in dataset %s: %s", ds.identifier, str(e)) # noqa: G200
status = 1

Expand All @@ -684,13 +684,13 @@ def to_ckan(schema_url: str, upload_url: str):
for ds in data:
ident = ds["identifier"]
url = f"{upload_url}/api/3/action/package_update?id={ident}"
response = requests.post(url, headers=headers, json=ds)
response = requests.post(url, headers=headers, json=ds, timeout=60)
logger.debug("%s: %d, %s", url, response.status_code, response.json())

if response.status_code == 404:
# 404 *can* mean no such dataset. Try again with package_create.
url = upload_url + "/api/3/action/package_create"
response = requests.post(url, headers=headers, json=ds)
response = requests.post(url, headers=headers, json=ds, timeout=60)
logger.debug("%s: %d, %s", url, response.status_code, response.json())

if not (200 <= response.status_code < 300):
Expand Down
2 changes: 1 addition & 1 deletion src/schematools/contrib/django/faker/providers/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, has_z=False):
try:
shape_cls = getattr(geos, class_name)
except AttributeError:
raise UnsupportedGEOTypeException(f"Class {class_name} is not allowed.")
raise UnsupportedGEOTypeException(f"Class {class_name} is not allowed.") from None
self.has_z = has_z
self.shape = shape_cls(*self.get_coordinates(), srid=SRID_RD_NEW)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def create_tables(
# the datasets cache (the DatasetSchema.dataset_collection)
# by accessing the `Dataset.schema` attribute.
for dataset in datasets:
dataset.schema
dataset.schema # noqa: B018

for dataset in datasets:
if not dataset.enable_db or dataset.name in to_be_skipped:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _is_valid_sql(view_sql: str, view_name: str, write_role_name: str) -> bool:
cursor.execute("SET statement_timeout = 5000;")
cursor.execute(sql.SQL(view_sql))
transaction.savepoint_rollback(sid)
except Exception as e: # noqa: F841
except Exception as e: # noqa: F841, BLE001
return False
return True

Expand Down Expand Up @@ -109,7 +109,7 @@ def create_views(
# the datasets cache (the DatasetSchema.dataset_collection)
# by accessing the `Dataset.schema` attribute.
for dataset in datasets:
dataset.schema
dataset.schema # noqa: B018

for dataset in datasets:
if not dataset.enable_db:
Expand Down Expand Up @@ -190,9 +190,9 @@ def create_views(
)

# Remove the view if it exists
# Due to the large costs of recreating materialized views, we only create
# and not drop them. When changes are made to the materialized view the view
# must be droped manually.
# Due to the large costs of recreating materialized views,
# we only create and not drop them. When changes are made
# to the materialized view the view must be dropped manually.
if view_type != "materialized":
cursor.execute(
sql.SQL("DROP VIEW IF EXISTS {view_name} CASCADE").format(
Expand All @@ -219,7 +219,8 @@ def create_views(
errors += 1
else:
command.stderr.write(
f" Required permissions for view {table.db_name} are not in the view dataset auth"
f" Required permissions for view {table.db_name}"
" are not in the view dataset auth"
)

if errors:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,23 +141,17 @@ def update_dataset_version(self, dataset: Dataset, schema: DatasetSchema) -> Non
"""
Perform dataset version update, including changes to dataset tables.
"""
if not dataset.is_default_version:
# Dataset is currently not default. Can not be safely renamed.
if schema.is_default_version:
# Dataset is promoted to default. We need to rename current default,
# if it was not done yet.
try:
current_default = Dataset.objects.get(name=Dataset.name_from_schema(schema))
except Dataset.DoesNotExist:
pass
else:
# Update current default dataset name to expected name.
if current_default.version:
current_default.name = to_snake_case(
f"{schema.id}_{current_default.version}"
)

current_default.save()
if not dataset.is_default_version and schema.is_default_version:
try:
current_default = Dataset.objects.get(name=Dataset.name_from_schema(schema))
except Dataset.DoesNotExist:
pass
else:
# Update current default dataset name to expected name.
if current_default.version:
current_default.name = to_snake_case(f"{schema.id}_{current_default.version}")

current_default.save()

dataset.name = Dataset.name_from_schema(schema)
dataset.is_default_version = schema.is_default_version
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"--from-files",
action="store_true",
help="Get the tables from the filesystem. NB. the SCHEMA_URL also needs to be file-based!",
help=(
"Get the tables from the filesystem. "
"Note the SCHEMA_URL also needs to be file-based!"
),
)
parser.add_argument("schema", help="Schema name")
parser.add_argument("table", help="Table name")
Expand All @@ -68,12 +71,18 @@ def add_arguments(self, parser: CommandParser) -> None:
parser.add_argument(
"version1",
metavar="OLDVERSION",
help="Old table version, e.g. v1.0.0, or a git ref like `master`, `tag`, `branch` or `hash` with --from-files",
help=(
"Old table version, e.g. v1.0.0, or a git ref like"
" `master`, `tag`, `branch` or `hash` with --from-files"
),
)
parser.add_argument(
"version2",
metavar="NEWVERSION",
help="New table version, e.g. v1.1.0, , or a git ref like `master`, `tag`, `branch` or `hash` with --from-files",
help=(
"New table version, e.g. v1.1.0, , or a git ref like"
" `master`, `tag`, `branch` or `hash` with --from-files"
),
)

def handle(self, *args, **options) -> None:
Expand All @@ -93,13 +102,15 @@ def handle(self, *args, **options) -> None:
# obtain the tables for these specific references
# for comparison and sql generation.
if options["from_files"]:
assert not options["schema_url"].startswith(
"http"
), "The --from-files can only work with a SCHEMA_URL on the local filesystem."
if options["schema_url"].startswith("http"):
raise CommandError(
"The --from-files can only work with a SCHEMA_URL on the local filesystem."
)

with tempfile.TemporaryDirectory() as tmpdir:
schemas_root = Path(options["schema_url"]).parent
subprocess.run( # nosec
["git", "clone", schemas_root, tmpdir],
subprocess.run(
["git", "clone", schemas_root, tmpdir], # noqa: S603
)
table1 = self._load_table_from_checkout(
dataset.id, options["table"], tmpdir, options["version1"]
Expand Down Expand Up @@ -142,7 +153,9 @@ def _load_table_from_checkout(
self, dataset_id: str, table_id: str, tmpdir: str, version_ref: str
) -> DatasetTableSchema:
"""Load a DatasetTableSchema for the specified git reference."""
subprocess.run(["git", "checkout", version_ref], cwd=tmpdir, stdout=subprocess.DEVNULL)
subprocess.run(
["git", "checkout", version_ref], cwd=tmpdir, stdout=subprocess.DEVNULL # noqa: S603
)
tmp_schema_path = Path(tmpdir) / "datasets"
# We create a specific schema loader, because it has to read in the data
# associated with a specific git checkout.
Expand Down
34 changes: 17 additions & 17 deletions src/schematools/contrib/django/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,20 @@ class Meta:
def __str__(self):
return self.name

def save(self, *args, **kwargs):
"""Perform a final data validation check, and additional updates."""
if self.schema_data_changed() and (self.schema_data or not self._state.adding):
self.__dict__.pop("schema", None) # clear cached property
# The extra "and" above avoids the transaction savepoint for an empty dataset.
# Ensure both changes are saved together
with transaction.atomic():
super().save(*args, **kwargs)
self.save_schema_tables()
else:
super().save(*args, **kwargs)

save.alters_data = True

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._dataset_collection = None
Expand Down Expand Up @@ -247,20 +261,6 @@ def save_for_schema(self, schema: DatasetSchema):
self.__dict__["schema"] = schema # Avoid serializing/deserializing the schema data
return changed

def save(self, *args, **kwargs):
"""Perform a final data validation check, and additional updates."""
if self.schema_data_changed() and (self.schema_data or not self._state.adding):
self.__dict__.pop("schema", None) # clear cached property
# The extra "and" above avoids the transaction savepoint for an empty dataset.
# Ensure both changes are saved together
with transaction.atomic():
super().save(*args, **kwargs)
self.save_schema_tables()
else:
super().save(*args, **kwargs)

save.alters_data = True

def save_schema_tables(self):
"""Expose the schema data to the DatasetTable.
This allows other projects (e.g. geosearch) to process our dynamic tables.
Expand Down Expand Up @@ -486,6 +486,9 @@ class Profile(models.Model):
scopes = models.CharField(max_length=255)
schema_data = models.TextField(_("Amsterdam Schema Contents"), validators=[validate_json])

def __str__(self):
return self.name

@cached_property
def schema(self) -> ProfileSchema:
"""Provide access to the schema data"""
Expand All @@ -512,9 +515,6 @@ def save_for_schema(self, profile_schema: ProfileSchema) -> Profile:
self.save()
return self

def __str__(self):
return self.name


class LooseRelationField(models.ForeignKey):
"""A relation that points to one part of the composite key.
Expand Down
2 changes: 1 addition & 1 deletion src/schematools/contrib/django/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ def validate_json(value: str) -> None:
_("Value must be valid JSON text."),
code="invalid",
params={"value": value},
)
) from None
6 changes: 3 additions & 3 deletions src/schematools/exports/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from collections.abc import Iterable
from datetime import date
from datetime import datetime
from pathlib import Path
from typing import IO

Expand Down Expand Up @@ -51,7 +51,7 @@ def __init__(
table_ids: list[str] | None = None,
scopes: list[str] | None = None,
size: int | None = None,
temporal_date: date = date.today(),
temporal_date: datetime | None = None,
):
"""Constructor.
Expand All @@ -70,7 +70,7 @@ def __init__(
self.table_ids = table_ids
self.scopes = set(scopes)
self.size = size
self.temporal_date = temporal_date
self.temporal_date = temporal_date or datetime.now().astimezone()

self.base_dir = Path(output)
self.tables = (
Expand Down
4 changes: 2 additions & 2 deletions src/schematools/exports/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import csv
from collections.abc import Iterable
from datetime import date
from datetime import datetime
from typing import IO

from geoalchemy2 import functions as gfunc # ST_AsEWKT
Expand Down Expand Up @@ -75,7 +75,7 @@ def export_csvs(
table_ids: list[str],
scopes: list[str],
size: int,
temporal_date: date = date.today(),
temporal_date: datetime | None = None,
):
"""Utility function to wrap the Exporter."""
enable_datetime_cast()
Expand Down
2 changes: 1 addition & 1 deletion src/schematools/exports/geopackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ def export_geopackages(
query = f"{query} LIMIT {size}"
sql_stmt = query.as_string(connection.connection.cursor())
os.system( # noqa: S605 # nosec: B605
command.format(output_path=output_path, db_url=db_url, sql=sql_stmt)
command.format(output_path=output_path, db_url=db_url, sql=sql_stmt) # noqa: S605
)
6 changes: 2 additions & 4 deletions src/schematools/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,10 +354,8 @@ def index_factory(
*build_temporal_indexes(table, dataset_table, db_table_name),
]

through_indexes = _build_m2m_indexes(
metadata, dataset_table, is_versioned_dataset, db_schema_name
)
for table_db_name, through_indexes in through_indexes.items():
m2m_indexes = _build_m2m_indexes(metadata, dataset_table, is_versioned_dataset, db_schema_name)
for table_db_name, through_indexes in m2m_indexes.items():
indexes[table_db_name].extend(through_indexes)

return dict(indexes)
Expand Down
Loading

0 comments on commit 42ce06d

Please sign in to comment.