Skip to content

Commit

Permalink
Merge pull request #137 from Urban-Analytics-Technology-Platform/fix-…
Browse files Browse the repository at this point in the history
…metadata-ids

Fixes metadata IDs to ensure that dates are included in hash
  • Loading branch information
penelopeysm authored Jul 5, 2024
2 parents f93b592 + 988ac3c commit 59cde7f
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 10 deletions.
16 changes: 8 additions & 8 deletions python/popgetter/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,15 @@ def serializable_vars(obj: object) -> dict:
# Check if variables are serializable
for key, val in vars(obj).items():
try:
jcs.canonicalize(val)
variables[key] = val
# Python doesn't serialise dates to JSON, have to convert to ISO 8601 first
new_val = val.isoformat() if isinstance(val, date) else val
# Try to serialise
jcs.canonicalize(new_val)
# Store in dict if serialisable
variables[key] = new_val
except Exception:
pass

# Python doesn't serialise dates to JSON, have to convert to ISO 8601 first
for key, val in variables.items():
if isinstance(val, date):
variables[key] = val.isoformat()
# If cannot serialise, continue
continue

return variables

Expand Down
71 changes: 69 additions & 2 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@

import pytest

from popgetter.metadata import COL, DataPublisher, SourceDataRelease
from popgetter.metadata import (
COL,
CountryMetadata,
DataPublisher,
GeometryMetadata,
SourceDataRelease,
)


def test_column_name_uniqueness():
Expand Down Expand Up @@ -63,7 +69,7 @@ def test_source_data_release_hash():
)
assert (
source_data_release.id
== "4d61bfe401ba17becd02d6b3912152c135daa9ecaebc9bd45a589dc831a85217"
== "9ec7e234d73664339e4c1f04bfa485dbb17e204dd72dc3ffbb9cab6870475597"
)

source_data_release2 = SourceDataRelease(
Expand Down Expand Up @@ -101,3 +107,64 @@ def test_data_publisher_hash():
countries_of_interest=["GBR"],
)
assert data_publisher.id != data_publisher2.id


def test_geometry_hash():
country_metadata = CountryMetadata(
name_short_en="United States",
name_official="United States of America",
iso2="US",
iso3="USA",
iso3166_2=None,
)
geometry_metadata = GeometryMetadata(
country_metadata=country_metadata,
validity_period_start=date(2021, 1, 1),
validity_period_end=date(2021, 1, 1),
level="tract",
hxl_tag="tract",
)
assert (
geometry_metadata.id
== "082cfebd7348ca2d06353ff1d73e6096a60960f9795a26de54faeda777cd7f5d"
)
geometry_metadata1 = GeometryMetadata(
country_metadata=country_metadata,
validity_period_start=date(2021, 1, 1),
validity_period_end=date(2021, 1, 1),
level="tract",
hxl_tag="tract",
)
geometry_metadata2 = GeometryMetadata(
country_metadata=country_metadata,
validity_period_start=date(2020, 1, 1),
validity_period_end=date(2021, 1, 1),
level="tract",
hxl_tag="tract",
)
geometry_metadata3 = GeometryMetadata(
country_metadata=country_metadata,
validity_period_start=date(2021, 1, 1),
validity_period_end=date(2021, 2, 1),
level="tract",
hxl_tag="tract",
)
geometry_metadata4 = GeometryMetadata(
country_metadata=country_metadata,
validity_period_start=date(2021, 1, 1),
validity_period_end=date(2021, 1, 1),
level="block_group",
hxl_tag="tract",
)
geometry_metadata5 = GeometryMetadata(
country_metadata=country_metadata,
validity_period_start=date(2021, 1, 1),
validity_period_end=date(2021, 1, 1),
level="tract",
hxl_tag="block_group",
)
assert geometry_metadata.id == geometry_metadata1.id
assert geometry_metadata.id != geometry_metadata2.id
assert geometry_metadata.id != geometry_metadata3.id
assert geometry_metadata.id != geometry_metadata4.id
assert geometry_metadata.id != geometry_metadata5.id

0 comments on commit 59cde7f

Please sign in to comment.