Skip to content

Commit

Permalink
Merge pull request #143 from Urban-Analytics-Technology-Platform/sepe…
Browse files Browse the repository at this point in the history
…rate-EAW-and-UK

Separate out England and Wales census from UK wide assets
  • Loading branch information
andrewphilipsmith authored Jul 10, 2024
2 parents ff0d019 + 334402f commit 8683f79
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 17 deletions.
4 changes: 2 additions & 2 deletions python/popgetter/assets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from . import bel, gb_nir, gb_sct, uk, us
from . import bel, gb_eaw, gb_nir, gb_sct, uk, us

countries = [
(mod, mod.__name__.split(".")[-1]) for mod in [bel, gb_nir, uk, us, gb_sct]
(mod, mod.__name__.split(".")[-1]) for mod in [bel, gb_nir, uk, us, gb_eaw, gb_sct]
]

__all__ = ["countries"]
5 changes: 5 additions & 0 deletions python/popgetter/assets/gb_eaw/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations

from . import (
england_wales_census, # noqa: F401
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
import re
import zipfile
from collections.abc import Callable, Iterable
Expand Down Expand Up @@ -38,16 +39,14 @@
markdown_from_plot,
)

# TODO:
# Overview
# - Create a asset which is a catalog of the available data / tables / metrics
# - This catalog must include a field which is the smallest geometry level where the data is available
# - The geometry level is only discoverable after downloading the zip file
# - The available geometry levels are only discoverable after downloading the zip file
# - The zip files can contain multiple CSV files, one for each geometry level
# - Some of the downloaded files mistakenly have two consecutive `.` in the filename, e.g. `census2021-ts002-lsoa..csv`. We need to be able to gracefully handle this
# - The catalog must to parsed into an Dagster Partition, so that
# - individual tables can be uploaded to the cloud table sensor
# - the metadata object can be created for each table/metric
from .united_kingdom import country


@dataclass
Expand Down Expand Up @@ -156,8 +155,8 @@ def census_table_metadata(

# TODO - this is probably only required for tests,
# hence would be best move to a test fixture
# REQUIRED_TABLES = ["TS009"] if os.getenv("ENV") == "dev" else None
REQUIRED_TABLES = None
REQUIRED_TABLES = ["TS009"] if os.getenv("ENV") == "dev" else None


# TODO - these regexes are probably only useful for table TS009.
# At present that is the only table we use using for any of the derived metrics
Expand Down Expand Up @@ -271,10 +270,13 @@ def columns_selector(
class EnglandAndWales(Country):
geo_levels: ClassVar[list[str]] = list(EW_CENSUS_GEO_LEVELS.keys())
required_tables: list[str] | None = REQUIRED_TABLES
country_metadata: ClassVar[CountryMetadata] = country

def _country_metadata(self, _context) -> CountryMetadata:
return country
country_metadata: ClassVar[CountryMetadata] = CountryMetadata(
name_short_en="England and Wales",
name_official="England and Wales",
iso3="GBR",
iso2="GB",
iso3166_2="GB-EAW",
)

def _data_publisher(
self, _context, _country_metdata: CountryMetadata
Expand All @@ -283,7 +285,7 @@ def _data_publisher(
name="Office for National Statistics",
url="https://www.nomisweb.co.uk/sources/census_2021_bulk",
description="We are the UK's largest independent producer of official statistics and its recognised national statistical institute. We are responsible for collecting and publishing statistics related to the economy, population and society at national, regional and local levels. We also conduct the census in England and Wales every 10 years.",
countries_of_interest=[country.id],
countries_of_interest=[self.country_metadata.id],
)

def _catalog(self, context) -> pd.DataFrame:
Expand Down Expand Up @@ -632,7 +634,7 @@ def _geometry(self, context) -> list[GeometryOutput]:
validity_period_end=CENSUS_COLLECTION_DATE,
level=level_details.level,
hxl_tag=level_details.hxl_tag,
country_metadata=country,
country_metadata=self.country_metadata,
)
geometries_raw: gpd.GeoDataFrame = gpd.read_file(
level_details.data_download_url
Expand Down
2 changes: 0 additions & 2 deletions python/popgetter/assets/uk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from pathlib import Path

from . import (
england_wales_census, # noqa: F401
# uk_os_opendata,
united_kingdom, # noqa: F401
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_uk.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dagster import build_asset_context
from icecream import ic

from popgetter.assets.uk import england_wales_census as ew_census
from popgetter.assets.gb_eaw import england_wales_census as ew_census
from popgetter.metadata import MetricMetadata


Expand Down

0 comments on commit 8683f79

Please sign in to comment.