From e92b280c5f9a1888cefac3b9fe31ff9e60bc436d Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 13 May 2020 16:00:21 +0100 Subject: [PATCH 01/25] switch settings.json to feather --- scripts/settings.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/settings.json b/scripts/settings.json index 04e65970..361edfce 100644 --- a/scripts/settings.json +++ b/scripts/settings.json @@ -2,7 +2,7 @@ "settings": { "Raw Census Extract location": "Scout Census Data/Census 2020 Extract (1).csv", - "Scout Census location": "Scout Census Data/Census 2020 Extract (1) with May 2019 fields.csv", + "Scout Census location": "Scout Census Data/Census 2020 Extract (1) with May 2019 fields.feather", "Full ONS PD location": "ONSPD_MAY_2019_UK/Data/ONSPD_MAY_2019_UK.csv", "Reduced ONS PD location": "ONSPD_MAY_2019_UK/Data/ONSPD_MAY_2019_UK reduced.csv", "ONS Names and codes folder": "ONSPD_MAY_2019_UK/Documents/", @@ -19,7 +19,7 @@ "codes": {"path": "../data/Scout Census Data/district_id_mapping.csv", "key": "D_ID", "key_type": "Int32", "name": "D_name"}, "boundary": { - "shapefiles": ["../src/scripts/districts_buffered.geojson"], "key": "id", "name": "name" + "shapefile": "../src/scripts/districts_buffered.geojson", "key": "id", "name": "name" }, "age_profile": null, "age_profile_code_col": null From 72897c87867cbc4a9bee68f82fc953aeea384b16 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 13 May 2020 17:38:17 +0100 Subject: [PATCH 02/25] Reformat settings.json --- scripts/settings.json | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/scripts/settings.json b/scripts/settings.json index 361edfce..aa1d5046 100644 --- a/scripts/settings.json +++ b/scripts/settings.json @@ -1,9 +1,9 @@ { "settings": { - "Raw Census Extract location": "Scout Census Data/Census 2020 Extract (1).csv", - "Scout Census location": "Scout Census Data/Census 2020 Extract (1) with May 2019 fields.feather", - "Full ONS PD location": "ONSPD_MAY_2019_UK/Data/ONSPD_MAY_2019_UK.csv", + "Raw Census Extract location": "Scout Census Data/Census 2020 Extract (1).csv", + "Scout Census location": "Scout Census Data/Census 2020 Extract (1) with May 2019 fields.feather", + "Full ONS PD location": "ONSPD_MAY_2019_UK/Data/ONSPD_MAY_2019_UK.csv", "Reduced ONS PD location": "ONSPD_MAY_2019_UK/Data/ONSPD_MAY_2019_UK reduced.csv", "ONS Names and codes folder": "ONSPD_MAY_2019_UK/Documents/", "National Statistical folder": "National Statistical data/", @@ -17,10 +17,7 @@ { "name": "D_ID", "codes": {"path": "../data/Scout Census Data/district_id_mapping.csv", "key": "D_ID", "key_type": "Int32", "name": "D_name"}, - "boundary": - { - "shapefile": "../src/scripts/districts_buffered.geojson", "key": "id", "name": "name" - }, + "boundary": {"shapefile": "../src/scripts/districts_buffered.geojson", "key": "id", "name": "name"}, "age_profile": null, "age_profile_code_col": null }, From a7c7448c1ec17048fbf699f6e55d0f54686bec68 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Wed, 13 May 2020 22:11:41 +0100 Subject: [PATCH 03/25] Adjust DATA_ROOT usage --- scripts/setup_reduce_onspd.py | 5 ++--- src/data/ons_pd_may_18.py | 5 ++--- src/data/ons_pd_may_19.py | 5 ++--- src/data/scout_data.py | 4 ++-- src/geographies/geography.py | 19 +++++++++++++------ src/maps/map_plotter.py | 8 ++++---- src/reports/reports.py | 7 +++---- 7 files changed, 28 insertions(+), 25 deletions(-) diff --git a/scripts/setup_reduce_onspd.py b/scripts/setup_reduce_onspd.py index b66ac755..37b4350b 100644 --- a/scripts/setup_reduce_onspd.py +++ b/scripts/setup_reduce_onspd.py @@ -1,15 +1,14 @@ import json import src.utility as utility -from src.utility import SCRIPTS_ROOT, DATA_ROOT from src.data.ons_pd_may_19 import ONSPostcodeDirectoryMay19 if __name__ == "__main__": - with open(SCRIPTS_ROOT.joinpath("settings.json"), "r") as read_file: + with open(utility.SCRIPTS_ROOT.joinpath("settings.json"), "r") as read_file: settings = json.load(read_file)["settings"] print("Starting") - ons_pd_location = DATA_ROOT / settings["Full ONS PD location"] + ons_pd_location = utility.DATA_ROOT / settings["Full ONS PD location"] # Load Full ONS Postcode Directory ons_pd = ONSPostcodeDirectoryMay19(ons_pd_location, load_data=True) diff --git a/src/data/ons_pd_may_18.py b/src/data/ons_pd_may_18.py index 2fc7fe56..12c2033a 100644 --- a/src/data/ons_pd_may_18.py +++ b/src/data/ons_pd_may_18.py @@ -1,4 +1,3 @@ -from src.utility import DATA_ROOT from src.data.ons_pd import ONSPostcodeDirectory @@ -55,8 +54,8 @@ def __init__(self, ons_pd_csv_path, load_data=True): ) # Folder within the ONS Postcode Directory archive holding names and codes files - names_codes_root = DATA_ROOT / self.settings["ONS Names and codes folder"] - boundaries_root = DATA_ROOT / self.settings["Boundaries folder"] + names_codes_root = self.settings["ONS Names and codes folder"] + boundaries_root = self.settings["Boundaries folder"] # Paths to all shapefiles within the Boundaries folder # fmt: off diff --git a/src/data/ons_pd_may_19.py b/src/data/ons_pd_may_19.py index 69ffee76..14eaabd5 100644 --- a/src/data/ons_pd_may_19.py +++ b/src/data/ons_pd_may_19.py @@ -1,4 +1,3 @@ -from src.utility import DATA_ROOT from src.data.ons_pd import ONSPostcodeDirectory from pathlib import Path @@ -55,8 +54,8 @@ def __init__(self, ons_pd_csv_path, load_data=True): ) # Folder within the ONS Postcode Directory archive holding names and codes files - names_codes_root = DATA_ROOT / Path(self.settings["ONS Names and codes folder"]).resolve() - boundaries_dir = DATA_ROOT / Path(self.settings["Boundaries folder"]).resolve() + names_codes_root = Path(self.settings["ONS Names and codes folder"]).resolve() + boundaries_dir = Path(self.settings["Boundaries folder"]).resolve() # Paths to all shapefiles within the Boundaries folder # fmt: off diff --git a/src/data/scout_data.py b/src/data/scout_data.py index eb460762..3e698509 100644 --- a/src/data/scout_data.py +++ b/src/data/scout_data.py @@ -40,7 +40,7 @@ def __init__(self, merged_csv=True, load_ons_pd_data=False, census_path=None): self.logger.info("Loading Scout Census data") # Loads Scout Census Data from a path to a .csv file that contains Scout Census data # We assume no custom path has been passed, but allow for one to be used - census_path = utility.DATA_ROOT / self.settings["Scout Census location"] if not census_path else census_path + census_path = self.settings["Scout Census location"] if not census_path else census_path self.scout_census: ScoutCensus = ScoutCensus(utility.DATA_ROOT / census_path) self.data: pd.DataFrame = self.scout_census.data self.logger.finished(f"Loading Scout Census data", start_time=self.start_time) @@ -131,7 +131,7 @@ def save_merged_data(self, ons_pd_publication_date: str): self.data.to_csv(output_path.with_suffix(".csv"), index=False, encoding="utf-8-sig") self.data.to_feather(output_path.with_suffix(".feather")) - def filter_records(self: ScoutDataInterface, field: str, value_list: list, mask: bool = False, exclusion_analysis: bool = False): + def filter_records(self, field: str, value_list: list, mask: bool = False, exclusion_analysis: bool = False): """Filters the Census records by any field in ONS PD. :param str field: The field on which to filter diff --git a/src/geographies/geography.py b/src/geographies/geography.py index 0033c942..37cc749c 100644 --- a/src/geographies/geography.py +++ b/src/geographies/geography.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING from src.base import Base +from src.utility import DATA_ROOT # For type hints if TYPE_CHECKING: @@ -39,6 +40,14 @@ def type(self) -> str: def codes_map_key(self) -> str: return self.geography_metadata_dict["codes"]["key"] + @property + def codes_map_key_type(self) -> str: + return self.geography_metadata_dict["codes"]["key_type"] + + @property + def codes_map_path(self) -> Path: + return DATA_ROOT / self.geography_metadata_dict["codes"].get("path") + @property def codes_map_name(self) -> str: return self.geography_metadata_dict["codes"]["name"] @@ -53,11 +62,11 @@ def shapefile_name(self) -> str: @property def shapefile_path(self) -> Path: - return self.geography_metadata_dict["boundary"]["shapefile"] + return DATA_ROOT / self.geography_metadata_dict["boundary"]["shapefile"] @property def age_profile_path(self) -> Path: - return self.geography_metadata_dict["age_profile"]["path"] + return DATA_ROOT / self.settings["National Statistical folder"] / self.geography_metadata_dict["age_profile"]["path"] @property def age_profile_key(self) -> str: @@ -80,11 +89,9 @@ def _set_boundary(self, geography_name: str, ons_pd: ONSPostcodeDirectory): boundaries_dict = {**ons_pd.BOUNDARIES, **self.settings["Scout Mappings"]} if geography_name in boundaries_dict.keys(): self.geography_metadata_dict = boundaries_dict[geography_name] - boundary_codes_dict = self.geography_metadata_dict["codes"] - self.geography_region_ids_mapping = pd.read_csv( - boundary_codes_dict.get("path"), dtype={boundary_codes_dict["key"]: boundary_codes_dict["key_type"], boundary_codes_dict["name"]: "object",}, - ) # Names & Codes file path + # Names & Codes file path + self.geography_region_ids_mapping = pd.read_csv(self.codes_map_path, dtype={self.codes_map_key: self.codes_map_key_type, self.codes_map_name: "string"}) else: raise Exception(f"{geography_name} is an invalid boundary.\nValid boundaries include: {boundaries_dict.keys()}") diff --git a/src/maps/map_plotter.py b/src/maps/map_plotter.py index 5be52956..83fee736 100644 --- a/src/maps/map_plotter.py +++ b/src/maps/map_plotter.py @@ -41,9 +41,9 @@ def __init__(self, out_file: Path): self.SCORE_COL: dict = {} self.layers: dict = {} - self.score_col_label: str = None - self.code_name: str = None - self.CODE_COL: str = None + self.score_col_label: str = "" + self.code_name: str = "" + self.CODE_COL: str = "" self.map_data: pd.DataFrame = pd.DataFrame() self.geo_data = None @@ -109,7 +109,7 @@ def _filter_shape_file(self, shape_file_path: Path): self.logger.info(f"Filtering {original_number_of_shapes} shapes by {self.code_name} being in the {self.CODE_COL} of the map_data") self.logger.debug(f"Filtering {original_number_of_shapes} shapes by {self.code_name} being in \n{self.map_data[self.CODE_COL]}") - list_codes = self.map_data[self.CODE_COL].astype(str).to_list() + list_codes = self.map_data[self.CODE_COL].drop_duplicates().astype(str).to_list() all_shapes = all_shapes.loc[all_shapes[self.code_name].isin(list_codes)] self.logger.info(f"Resulting in {len(all_shapes.index)} shapes") diff --git a/src/reports/reports.py b/src/reports/reports.py index 86474b88..5d6e9942 100644 --- a/src/reports/reports.py +++ b/src/reports/reports.py @@ -291,8 +291,8 @@ def create_uptake_report(self, report_name: str = None) -> pd.DataFrame: """ geog_name: str = self.geography.type try: - age_profile_path: str = self.geography.age_profile_path - age_profile_key: str = self.geography.age_profile_key + age_profile_path = self.geography.age_profile_path + age_profile_key = self.geography.age_profile_key except KeyError: raise AttributeError(f"Population by age data not present for this {geog_name}") @@ -303,8 +303,7 @@ def create_uptake_report(self, report_name: str = None) -> pd.DataFrame: data_types = {str(key): "Int16" for key in range(5, 26)} try: - full_age_profile_path = utility.DATA_ROOT / self.settings["National Statistical folder"] / age_profile_path - age_profile_pd = pd.read_csv(full_age_profile_path, dtype=data_types) + age_profile_pd = pd.read_csv(age_profile_path, dtype=data_types) except TypeError: self.logger.error("Age profiles must be integers in each age category") raise From 187f78f305e02e9bfc2c28013218195ed6853506 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:47:37 +0100 Subject: [PATCH 04/25] Use super() calls in ONS PD classes --- src/data/ons_pd_may_18.py | 4 +--- src/data/ons_pd_may_19.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/data/ons_pd_may_18.py b/src/data/ons_pd_may_18.py index 12c2033a..61e76fee 100644 --- a/src/data/ons_pd_may_18.py +++ b/src/data/ons_pd_may_18.py @@ -49,9 +49,7 @@ class ONSPostcodeDirectoryMay18(ONSPostcodeDirectory): } def __init__(self, ons_pd_csv_path, load_data=True): - ONSPostcodeDirectory.__init__( - self, ons_pd_csv_path, load_data, ONSPostcodeDirectoryMay18.index_column, ONSPostcodeDirectoryMay18.fields, ONSPostcodeDirectoryMay18.data_types, - ) + super().__init__(ons_pd_csv_path, load_data, ONSPostcodeDirectoryMay18.index_column, ONSPostcodeDirectoryMay18.fields, ONSPostcodeDirectoryMay18.data_types) # Folder within the ONS Postcode Directory archive holding names and codes files names_codes_root = self.settings["ONS Names and codes folder"] diff --git a/src/data/ons_pd_may_19.py b/src/data/ons_pd_may_19.py index 14eaabd5..070a6ac4 100644 --- a/src/data/ons_pd_may_19.py +++ b/src/data/ons_pd_may_19.py @@ -49,9 +49,7 @@ class ONSPostcodeDirectoryMay19(ONSPostcodeDirectory): } def __init__(self, ons_pd_csv_path, load_data=True): - ONSPostcodeDirectory.__init__( - self, ons_pd_csv_path, load_data, ONSPostcodeDirectoryMay19.index_column, ONSPostcodeDirectoryMay19.fields, ONSPostcodeDirectoryMay19.data_types, - ) + super().__init__(ons_pd_csv_path, load_data, self.index_column, ONSPostcodeDirectoryMay19.fields, ONSPostcodeDirectoryMay19.data_types) # Folder within the ONS Postcode Directory archive holding names and codes files names_codes_root = Path(self.settings["ONS Names and codes folder"]).resolve() From f9e1f9b9ff30d457933cd1877d45fc17325aaa08 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:48:42 +0100 Subject: [PATCH 05/25] Add load census data flag --- src/data/scout_census.py | 6 +++++- src/data/scout_data.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/data/scout_census.py b/src/data/scout_census.py index e254703a..507db6d8 100644 --- a/src/data/scout_census.py +++ b/src/data/scout_census.py @@ -97,7 +97,11 @@ class ScoutCensus: UNIT_LEVEL_GROUP = "Group" UNIT_LEVEL_DISTRICT = "District" - def __init__(self, census_file_path: Path): + def __init__(self, census_file_path: Path, load_data=True): + if not load_data: + self.data = pd.DataFrame() + return + cols_int_32 = ["Object_ID", "G_ID", "D_ID", "C_ID", "R_ID", "X_ID", "imd"] cols_categorical = ["compass", "type", "name", "G_name", "D_name", "C_name", "R_name", "X_name", "postcode", "Young_Leader_Unit"] # fmt: off diff --git a/src/data/scout_data.py b/src/data/scout_data.py index 3e698509..0e9a975c 100644 --- a/src/data/scout_data.py +++ b/src/data/scout_data.py @@ -32,7 +32,7 @@ def columns(self): DEFAULT_VALUE = ScoutCensus.DEFAULT_VALUE - def __init__(self, merged_csv=True, load_ons_pd_data=False, census_path=None): + def __init__(self, merged_csv=True, load_ons_pd_data=False, census_path=None, load_census_data=True): super().__init__(settings=True, log_path=str(utility.LOGS_ROOT.joinpath("geo_mapping.log"))) self.logger.info(f"Starting at {datetime.now().time()}") self.logger.finished(f"Logging setup", start_time=self.start_time) @@ -41,7 +41,7 @@ def __init__(self, merged_csv=True, load_ons_pd_data=False, census_path=None): # Loads Scout Census Data from a path to a .csv file that contains Scout Census data # We assume no custom path has been passed, but allow for one to be used census_path = self.settings["Scout Census location"] if not census_path else census_path - self.scout_census: ScoutCensus = ScoutCensus(utility.DATA_ROOT / census_path) + self.scout_census: ScoutCensus = ScoutCensus(utility.DATA_ROOT / census_path, load_data=load_census_data) self.data: pd.DataFrame = self.scout_census.data self.logger.finished(f"Loading Scout Census data", start_time=self.start_time) From c52d3d38478e7cbb1f07956eddd80c0b23edff37 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:51:04 +0100 Subject: [PATCH 06/25] Use TYPE_CHECKING instead of if False --- src/maps/map_plotter.py | 5 +++-- src/reports/history_summary.py | 5 +++-- src/reports/reports.py | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/maps/map_plotter.py b/src/maps/map_plotter.py index 83fee736..e2f3a7c4 100644 --- a/src/maps/map_plotter.py +++ b/src/maps/map_plotter.py @@ -9,8 +9,9 @@ from src.reports.reports import Reports from src.base import Base -# noinspection PyUnreachableCode -if False: +from typing import TYPE_CHECKING + +if TYPE_CHECKING: from pathlib import Path from branca import colormap diff --git a/src/reports/history_summary.py b/src/reports/history_summary.py index 8f2ff921..0c95d53e 100644 --- a/src/reports/history_summary.py +++ b/src/reports/history_summary.py @@ -5,8 +5,9 @@ from src.data.scout_census import ScoutCensus import src.utility as utility -# noinspection PyUnreachableCode -if False: +from typing import TYPE_CHECKING + +if TYPE_CHECKING: from src.data.scout_data import ScoutData diff --git a/src/reports/reports.py b/src/reports/reports.py index 5d6e9942..47d747af 100644 --- a/src/reports/reports.py +++ b/src/reports/reports.py @@ -8,8 +8,9 @@ from src.data.scout_census import ScoutCensus import src.utility as utility -# noinspection PyUnreachableCode -if False: +from typing import TYPE_CHECKING + +if TYPE_CHECKING: from pathlib import Path from src.data.ons_pd import ONSPostcodeDirectory From 9f2637a8d4b3d6c99cb323ecc20f4c524ad2a343 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:51:57 +0100 Subject: [PATCH 07/25] Move ONSPDStub to subclass ONSPD --- tests/test_utility.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_utility.py b/tests/test_utility.py index c618d5fb..a511e629 100644 --- a/tests/test_utility.py +++ b/tests/test_utility.py @@ -1,9 +1,11 @@ import src.utility as utility +import src.data.ons_pd as ons_pd import pandas as pd -class ONSPDStub: +class ONSPostcodeDirectoryStub(ons_pd.ONSPostcodeDirectory): def __init__(self): + super().__init__(load_data=False, ons_pd_csv_path="") self.IMD_MAX = {"England": 32844, "Wales": 1909, "Scotland": 6976, "Northern Ireland": 890} self.COUNTRY_CODES = {"E92000001": "England", "W92000004": "Wales", "S92000003": "Scotland", "N92000002": "Northern Ireland"} @@ -12,7 +14,7 @@ def test_calc_imd_decile(): data = {"row_1": [1, "E92000001", 32844], "row_2": [2, "W92000004", 1]} frame = pd.DataFrame.from_dict(data, orient="index", columns=["id", "ctry", "imd"]) - imd_decile_data: pd.Series = utility.calc_imd_decile(frame["imd"], frame["ctry"], ONSPDStub()) + imd_decile_data: pd.Series = utility.calc_imd_decile(frame["imd"], frame["ctry"], ONSPostcodeDirectoryStub()) predicted_result = pd.Series(data=[10, 1], index=["row_1", "row_2"], name="imd_decile") assert isinstance(imd_decile_data, pd.Series) From 32bf48110589fb27fdeafe1678a2fbd42fc69737 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:53:15 +0100 Subject: [PATCH 08/25] Avoid shadowing builtin id --- src/reports/reports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reports/reports.py b/src/reports/reports.py index 47d747af..f060bd64 100644 --- a/src/reports/reports.py +++ b/src/reports/reports.py @@ -205,7 +205,7 @@ def _awards_groupby(group_df: pd.DataFrame, awards_data: pd.DataFrame) -> dict: # Divides total # of awards by the number of Scout Districts that the ONS Region is in code = group_df.name district_ids = awards_mapping.get(code, {}) if not geog_name == "D_ID" else {code: 1} - awards_regions_data = awards_data.loc[[id for id in district_ids.keys()]].sum() + awards_regions_data = awards_data.loc[[d_id for d_id in district_ids.keys()]].sum() output["QSA"] = awards_regions_data["QSA"] if awards_regions_data["qsa_eligible"] > 0: From f73fc8676bcaa8f21e92cd82f8a76916c3880dc0 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:54:09 +0100 Subject: [PATCH 09/25] Add add_shape_data method --- src/data/scout_data.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/data/scout_data.py b/src/data/scout_data.py index 0e9a975c..7a920e14 100644 --- a/src/data/scout_data.py +++ b/src/data/scout_data.py @@ -2,6 +2,7 @@ from datetime import datetime from pathlib import Path import pandas as pd +import geopandas as gpd import time from typing import TYPE_CHECKING @@ -15,6 +16,8 @@ if TYPE_CHECKING: from src.data.ons_pd import ONSPostcodeDirectory +WGS_84 = 4326 + class ScoutData(Base): """Provides access to manipulate and process data @@ -43,6 +46,7 @@ def __init__(self, merged_csv=True, load_ons_pd_data=False, census_path=None, lo census_path = self.settings["Scout Census location"] if not census_path else census_path self.scout_census: ScoutCensus = ScoutCensus(utility.DATA_ROOT / census_path, load_data=load_census_data) self.data: pd.DataFrame = self.scout_census.data + self.points_data: gpd.GeoDataFrame = gpd.GeoDataFrame() self.logger.finished(f"Loading Scout Census data", start_time=self.start_time) if merged_csv: @@ -142,3 +146,20 @@ def filter_records(self, field: str, value_list: list, mask: bool = False, exclu :returns None: Nothing """ self.data = utility.filter_records(self.data, field, value_list, self.logger, mask, exclusion_analysis) + + def add_shape_data(self, shapes_key: str, path: str = None, gdf: gpd.GeoDataFrame = None): + if self.points_data.empty: + self.points_data = gpd.GeoDataFrame(geometry=gpd.points_from_xy(self.data.long, self.data.lat)) + self.points_data.crs = WGS_84 + + if path: + shapes = gpd.GeoDataFrame.from_file(path) + elif gdf is not None: + shapes = gdf + else: + raise ValueError("A path to a shapefile or a Ge") + + geo_merged = gpd.sjoin(self.points_data, shapes.to_crs(f"epsg:{WGS_84}"), how="left", op="intersects") + merged = self.data.merge(geo_merged[[shapes_key]], how="left", left_index=True, right_index=True) + assert self.data.equals(merged[self.data.columns]) + self.data = merged From 1d9e9f7608a622d9db2dc42c2b09608c96bc5b49 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:58:29 +0100 Subject: [PATCH 10/25] Test using hypothesis --- environment.yml | 1 + setup.py | 2 +- tests/test_scout_data.py | 99 +++++++++++++++++++++++++++------------- 3 files changed, 69 insertions(+), 33 deletions(-) diff --git a/environment.yml b/environment.yml index 6e9862bb..622cbcce 100644 --- a/environment.yml +++ b/environment.yml @@ -15,6 +15,7 @@ dependencies: - pyarrow # Development requirements: - pytest + - hypothesis - pytest-cov - pre-commit - black diff --git a/setup.py b/setup.py index ad396589..54992c32 100644 --- a/setup.py +++ b/setup.py @@ -6,6 +6,6 @@ version="0.2.0", packages=find_namespace_packages(), install_requires=["pandas", "numpy", "folium", "branca", "geopandas", "shapely", "dash", "pyarrow"], - extras_require={"dev": ["pytest", "pytest-cov", "pre-commit", "black"]}, + extras_require={"dev": ["pytest", "hypothesis", "pytest-cov", "pre-commit", "black"]}, python_requires=">=3.7", ) diff --git a/tests/test_scout_data.py b/tests/test_scout_data.py index 0b2f0575..65abf22e 100644 --- a/tests/test_scout_data.py +++ b/tests/test_scout_data.py @@ -1,35 +1,70 @@ -from src.data.scout_data import ScoutData -from src.base import Base -import src.utility as utility import pandas as pd +import geopandas as gpd + +import pytest +import hypothesis +import hypothesis.strategies as st +from hypothesis.extra.pandas import data_frames, column, range_indexes + +from data.scout_census import ScoutCensus +from src.data.scout_data import ScoutData + + +COLUMN_NAME = "ctry" + + +@pytest.fixture(scope="module") +def scout_data_factory(): + """Returns a ScoutData factory""" + + def _scout_data_factory(data_df: pd.DataFrame): + sd = ScoutData(load_census_data=False, load_ons_pd_data=False, merged_csv=False) + sd.data = data_df + return sd + + return _scout_data_factory + + +@pytest.fixture(scope="module") +def blank_geo_data_frame(): + gdf = gpd.GeoDataFrame(geometry=gpd.points_from_xy(*zip([0] * 2))) + gdf["id"] = 0 + gdf.crs = 4326 + return gdf + + +CountryDataFrame = data_frames(columns=[column(name=COLUMN_NAME, elements=st.from_regex(r"^[A-Za-z]{2}[0-9]{8}\Z"))], index=range_indexes(min_size=2),) + +LocationDataFrame = data_frames( + columns=[column(name="lat", elements=st.floats(min_value=-85, max_value=85)), column(name="long", elements=st.floats(min_value=-180, max_value=180)),], + index=range_indexes(min_size=2), +) + + +def test_scout_data_columns(scout_data_factory): + scout_data_stub = scout_data_factory(pd.DataFrame()) + + column_labels = ScoutCensus.column_labels + columns = [*column_labels["id"].values(), *column_labels["name"].values()] + + assert scout_data_stub.columns == columns + + +@hypothesis.given(CountryDataFrame) +def test_filter_records_inclusion(scout_data_factory, data): + first_country_code = data.loc[0, COLUMN_NAME] + scout_data_stub = scout_data_factory(data) + scout_data_stub.filter_records(field=COLUMN_NAME, value_list=[first_country_code], mask=True, exclusion_analysis=False) + + expected_outcome = data.loc[~(data[COLUMN_NAME] == first_country_code)] + assert scout_data_stub.data.equals(expected_outcome) + +@hypothesis.given(CountryDataFrame) +def test_filter_records_exclusion(scout_data_factory, data): + first_country_code = data.loc[0, COLUMN_NAME] + scout_data_stub = scout_data_factory(data) + scout_data_stub.filter_records(field=COLUMN_NAME, value_list=[first_country_code], mask=False, exclusion_analysis=False) -class ScoutDataStub(ScoutData): - def __init__(self): - Base.__init__(self, settings=True, log_path=str(utility.LOGS_ROOT.joinpath("geo_mapping.log"))) - data = {"row_1": [1, "E92000001", 32844], "row_2": [2, "W92000004", 1]} - self.data = pd.DataFrame.from_dict(data, orient="index", columns=["id", "ctry", "imd"]) - - -def test_filter_records_inclusion(): - scout_data_stub = ScoutDataStub() - scout_data_stub.filter_records(field="ctry", value_list=["E92000001"], mask=True, exclusion_analysis=False) - predicted_data = {"row_2": [2, "W92000004", 1]} - predicted_result = pd.DataFrame.from_dict(predicted_data, orient="index", columns=["id", "ctry", "imd"]) - answer = scout_data_stub.data.equals(predicted_result) - if not answer: - print(scout_data_stub.data) - print(predicted_result) - assert scout_data_stub.data.equals(predicted_result) - - -def test_filter_records_exclusion(): - scout_data_stub = ScoutDataStub() - scout_data_stub.filter_records(field="ctry", value_list=["E92000001"], mask=False, exclusion_analysis=False) - predicted_data = {"row_1": [1, "E92000001", 32844]} - predicted_result = pd.DataFrame.from_dict(predicted_data, orient="index", columns=["id", "ctry", "imd"]) - answer = scout_data_stub.data.equals(predicted_result) - if not answer: - print(scout_data_stub.data) - print(predicted_result) - assert scout_data_stub.data.equals(predicted_result) + expected_outcome = data.loc[data[COLUMN_NAME] == first_country_code] + assert scout_data_stub.data.equals(expected_outcome) From fcf5bf00256035af9fc81d5e4a5e5093651db30b Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:59:07 +0100 Subject: [PATCH 11/25] Add ScoutData.filter_records test --- tests/test_scout_data.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_scout_data.py b/tests/test_scout_data.py index 65abf22e..e2e0c2d9 100644 --- a/tests/test_scout_data.py +++ b/tests/test_scout_data.py @@ -68,3 +68,13 @@ def test_filter_records_exclusion(scout_data_factory, data): expected_outcome = data.loc[data[COLUMN_NAME] == first_country_code] assert scout_data_stub.data.equals(expected_outcome) + + +@hypothesis.given(CountryDataFrame) +def test_filter_records_exclusion_analysis_with_incorrect_columns(scout_data_factory, data): + first_country_code = data.loc[0, COLUMN_NAME] + scout_data_stub = scout_data_factory(data) + + with pytest.raises(ValueError): + scout_data_stub.filter_records(field=COLUMN_NAME, value_list=[first_country_code], mask=False, exclusion_analysis=True) + scout_data_stub.filter_records(field=COLUMN_NAME, value_list=[first_country_code], mask=True, exclusion_analysis=True) From 2cc3451a050152bd421eeb343142599b97dc72dd Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 18:59:29 +0100 Subject: [PATCH 12/25] Add tests for ScoutData.add_shape_data --- tests/test_scout_data.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_scout_data.py b/tests/test_scout_data.py index e2e0c2d9..34f007be 100644 --- a/tests/test_scout_data.py +++ b/tests/test_scout_data.py @@ -78,3 +78,22 @@ def test_filter_records_exclusion_analysis_with_incorrect_columns(scout_data_fac with pytest.raises(ValueError): scout_data_stub.filter_records(field=COLUMN_NAME, value_list=[first_country_code], mask=False, exclusion_analysis=True) scout_data_stub.filter_records(field=COLUMN_NAME, value_list=[first_country_code], mask=True, exclusion_analysis=True) + + +@hypothesis.given(LocationDataFrame) +def test_add_shape_data_points_data(scout_data_factory, blank_geo_data_frame, data): + sd = scout_data_factory(data) + sd.add_shape_data("id", gdf=blank_geo_data_frame) + + points_data = gpd.GeoDataFrame(geometry=gpd.points_from_xy(data.long, data.lat)) + assert points_data.equals(sd.points_data[points_data.columns]) + + +@hypothesis.given(LocationDataFrame) +def test_add_shape_data_merge(scout_data_factory, blank_geo_data_frame, data): + sd = scout_data_factory(data) + sd.add_shape_data("id", gdf=blank_geo_data_frame) + + points_data = gpd.GeoDataFrame(geometry=gpd.points_from_xy(data.long, data.lat)) + merged = data.merge(gpd.sjoin(points_data, blank_geo_data_frame, how="left", op="intersects")[["id"]], how="left", left_index=True, right_index=True) + assert sd.data.equals(merged) From b88edf4c35154e1e85d3fec4ccf0452fed48d577 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 19:00:23 +0100 Subject: [PATCH 13/25] Error explicitly for exclusion analysis --- src/utility.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/utility.py b/src/utility.py index 8dd2e4b3..b0c6eec0 100644 --- a/src/utility.py +++ b/src/utility.py @@ -56,6 +56,11 @@ def filter_records(data: pd.DataFrame, field: str, value_list: list, logger: log logger.info(f"Resulting in {remaining_records} records remaining.") if exclusion_analysis: + tuples = [(sections_dict[section]["type"], sections_dict[section]["total"]) for section in sections_dict.keys()] + cols = [ScoutCensus.column_labels["UNIT_TYPE"]] + [item for sub in tuples for item in sub] + if not all([col in data.columns for col in cols]): + raise ValueError("Required columns are not in dataset!\n" f"Required columns are: {cols}.\n" f"Your columns are: {data.columns.to_list()}") + # Calculate the number of records that have been filtered out excluded_records = original_records - remaining_records logger.info(f"{excluded_records} records were removed ({excluded_records / original_records * 100}% of total)") From a037b11e2df1384f119c3d485d5aaba011d17528 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 19:01:22 +0100 Subject: [PATCH 14/25] Fix path for district mapping --- scripts/settings.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/settings.json b/scripts/settings.json index aa1d5046..7e45d6ee 100644 --- a/scripts/settings.json +++ b/scripts/settings.json @@ -17,7 +17,7 @@ { "name": "D_ID", "codes": {"path": "../data/Scout Census Data/district_id_mapping.csv", "key": "D_ID", "key_type": "Int32", "name": "D_name"}, - "boundary": {"shapefile": "../src/scripts/districts_buffered.geojson", "key": "id", "name": "name"}, + "boundary": {"shapefile": "../scripts/districts_buffered.geojson", "key": "id", "name": "name"}, "age_profile": null, "age_profile_code_col": null }, From 216f8eecbc8e99ab51c3b6f3cf49398666243332 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 19:46:54 +0100 Subject: [PATCH 15/25] Quick fix in filter_records --- src/utility.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/utility.py b/src/utility.py index b0c6eec0..693b614f 100644 --- a/src/utility.py +++ b/src/utility.py @@ -56,8 +56,7 @@ def filter_records(data: pd.DataFrame, field: str, value_list: list, logger: log logger.info(f"Resulting in {remaining_records} records remaining.") if exclusion_analysis: - tuples = [(sections_dict[section]["type"], sections_dict[section]["total"]) for section in sections_dict.keys()] - cols = [ScoutCensus.column_labels["UNIT_TYPE"]] + [item for sub in tuples for item in sub] + cols = [ScoutCensus.column_labels["UNIT_TYPE"]] + [sections_dict[section]["total"] for section in sections_dict.keys()] if not all([col in data.columns for col in cols]): raise ValueError("Required columns are not in dataset!\n" f"Required columns are: {cols}.\n" f"Your columns are: {data.columns.to_list()}") From 85d0eee533da329527c02b052531d73ebfcd9b41 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 19:52:19 +0100 Subject: [PATCH 16/25] Add add_shapefile_data function --- src/reports/reports.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/reports/reports.py b/src/reports/reports.py index f060bd64..b6e1f86c 100644 --- a/src/reports/reports.py +++ b/src/reports/reports.py @@ -52,6 +52,10 @@ def __init__(self, geography_name: str, scout_data_object: ScoutData, ons_pd_obj "Explorers": {"ages": ["14", "15", "16", "17"]}, } + def add_shapefile_data(self, shapefile_key): + self.scout_data.add_shape_data(shapefile_key, path=self.geography.shapefile_path) + self.scout_data.data = self.scout_data.data.rename(columns={shapefile_key: self.geography_type}) + @time_function def filter_boundaries(self, field: str, value_list: list, boundary: str = "", distance: int = 3000, near: bool = False): @@ -95,8 +99,9 @@ def _ons_to_district_mapping(self, ons_code: str) -> dict: count_by_district_by_region = count_by_district_by_region.set_index([region_type, district_id_column]) + count_col: pd.Series = count_by_district_by_region["count"] nested_dict = collections.defaultdict(dict) - for keys, value in count_by_district_by_region["count"].iteritems(): + for keys, value in count_col.iteritems(): nested_dict[keys[0]][keys[1]] = value self.logger.debug("Finished mapping from ons boundary to district") @@ -134,7 +139,7 @@ def create_boundary_report(self, options: list = None, historical: bool = False, True if "waiting list total" in options else False # fmt: on - geog_name = self.geography.type # e.g oslaua osward pcon lsoa11 + geog_name = self.geography_type # e.g oslaua osward pcon lsoa11 if not geog_name: raise Exception("Geography type has not been set. Try calling _set_boundary") @@ -196,7 +201,7 @@ def _year_groupby(group_df: pd.DataFrame) -> dict: return output def _awards_groupby(group_df: pd.DataFrame, awards_data: pd.DataFrame) -> dict: - summed = group_df[[award_name, award_eligible,]].sum() + summed = group_df[[award_name, award_eligible]].sum() output = summed.to_dict() if summed[award_eligible] > 0: output[f"%-{award_name}"] = (summed[award_name] * 100) / summed[award_eligible] @@ -290,7 +295,7 @@ def create_uptake_report(self, report_name: str = None) -> pd.DataFrame: :returns pd.DataFrame: Uptake data of Scouts in the boundary """ - geog_name: str = self.geography.type + geog_name: str = self.geography_type try: age_profile_path = self.geography.age_profile_path age_profile_key = self.geography.age_profile_key From 72cbbd9af935940b18804563bfbb2846d7b54df4 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 19:57:42 +0100 Subject: [PATCH 17/25] Use paths for ScoutData.add_shape_data --- src/data/scout_data.py | 2 +- src/reports/reports.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data/scout_data.py b/src/data/scout_data.py index 7a920e14..61ec2f18 100644 --- a/src/data/scout_data.py +++ b/src/data/scout_data.py @@ -147,7 +147,7 @@ def filter_records(self, field: str, value_list: list, mask: bool = False, exclu """ self.data = utility.filter_records(self.data, field, value_list, self.logger, mask, exclusion_analysis) - def add_shape_data(self, shapes_key: str, path: str = None, gdf: gpd.GeoDataFrame = None): + def add_shape_data(self, shapes_key: str, path: Path = None, gdf: gpd.GeoDataFrame = None): if self.points_data.empty: self.points_data = gpd.GeoDataFrame(geometry=gpd.points_from_xy(self.data.long, self.data.lat)) self.points_data.crs = WGS_84 diff --git a/src/reports/reports.py b/src/reports/reports.py index b6e1f86c..5142b4d3 100644 --- a/src/reports/reports.py +++ b/src/reports/reports.py @@ -53,7 +53,7 @@ def __init__(self, geography_name: str, scout_data_object: ScoutData, ons_pd_obj } def add_shapefile_data(self, shapefile_key): - self.scout_data.add_shape_data(shapefile_key, path=self.geography.shapefile_path) + self.scout_data.add_shape_data(shapefile_key, path=self.shapefile_path) self.scout_data.data = self.scout_data.data.rename(columns={shapefile_key: self.geography_type}) @time_function From 276cc591077532d8e791d572fb6944f44e206741 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 19:58:09 +0100 Subject: [PATCH 18/25] Minor update --- src/reports/reports.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/reports/reports.py b/src/reports/reports.py index 5142b4d3..5e89e8a0 100644 --- a/src/reports/reports.py +++ b/src/reports/reports.py @@ -321,8 +321,9 @@ def create_uptake_report(self, report_name: str = None) -> pd.DataFrame: age_profile_pd["Pop_All"] = age_profile_pd[[f"{age}" for age in range(6, 17 + 1)]].sum(axis=1) # merge population data - cols = [f"Pop_{section}" for section in Reports.SECTION_AGES.keys()] + ["Pop_All"] + [age_profile_key] - uptake_report = boundary_report.merge(age_profile_pd[cols], how="left", left_on=geog_name, right_on=age_profile_key, sort=False) + cols = [age_profile_key] + [f"Pop_{section}" for section in Reports.SECTION_AGES.keys()] + ["Pop_All"] + reduced_age_profile_pd = age_profile_pd[cols] + uptake_report = boundary_report.merge(reduced_age_profile_pd, how="left", left_on=geog_name, right_on=age_profile_key, sort=False) del uptake_report[age_profile_key] years = self.scout_data.data["Year"].drop_duplicates().dropna().sort_values() From 4db6dcb912c967592334512694c06645f2b98dd7 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 20:15:05 +0100 Subject: [PATCH 19/25] Add age profile pivoting option --- src/reports/reports.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/reports/reports.py b/src/reports/reports.py index 5e89e8a0..397798dd 100644 --- a/src/reports/reports.py +++ b/src/reports/reports.py @@ -295,7 +295,7 @@ def create_uptake_report(self, report_name: str = None) -> pd.DataFrame: :returns pd.DataFrame: Uptake data of Scouts in the boundary """ - geog_name: str = self.geography_type + geog_name = self.geography_type try: age_profile_path = self.geography.age_profile_path age_profile_key = self.geography.age_profile_key @@ -316,15 +316,27 @@ def create_uptake_report(self, report_name: str = None) -> pd.DataFrame: # population data for section, ages in Reports.SECTION_AGES.items(): - age_profile_pd[f"Pop_{section}"] = age_profile_pd[ages["ages"]].sum(axis=1) - age_profile_pd[f"Pop_{section}"] += age_profile_pd[ages["halves"]].sum(axis=1) // 2 if ages.get("halves") else 0 - age_profile_pd["Pop_All"] = age_profile_pd[[f"{age}" for age in range(6, 17 + 1)]].sum(axis=1) + section_population = age_profile_pd[ages["ages"]].sum(axis=1) + section_population += age_profile_pd[ages["halves"]].sum(axis=1) // 2 if ages.get("halves") else 0 + age_profile_pd[f"Pop_{section}"] = section_population.astype("UInt32") + age_profile_pd["Pop_All"] = age_profile_pd[[f"{age}" for age in range(6, 17 + 1)]].sum(axis=1).astype("UInt32") # merge population data cols = [age_profile_key] + [f"Pop_{section}" for section in Reports.SECTION_AGES.keys()] + ["Pop_All"] reduced_age_profile_pd = age_profile_pd[cols] - uptake_report = boundary_report.merge(reduced_age_profile_pd, how="left", left_on=geog_name, right_on=age_profile_key, sort=False) - del uptake_report[age_profile_key] + + # Pivot age profile to current geography type if needed + if self.geography.age_profile_pivot and self.geography.age_profile_pivot != geog_name: + pivot_key = self.geography.age_profile_pivot + + ons_data_subset = self.ons_pd.data[[geog_name, pivot_key]] + merged_age_profile = reduced_age_profile_pd.merge(ons_data_subset, how="left", left_on=age_profile_key, right_on=pivot_key).drop(pivot_key, axis=1) + merged_age_profile = merged_age_profile.dropna(subset=[geog_name]) + pivoted_age_profile = merged_age_profile.groupby(geog_name).sum().astype("UInt32") + uptake_report = boundary_report.merge(pivoted_age_profile, how="left", left_on=geog_name, right_index=True, sort=False) + else: + uptake_report = boundary_report.merge(reduced_age_profile_pd, how="left", left_on=geog_name, right_on=age_profile_key, sort=False) + del uptake_report[age_profile_key] years = self.scout_data.data["Year"].drop_duplicates().dropna().sort_values() From 05d68b3a70e158c882d9e0b499efa267aa95101d Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 23:22:12 +0100 Subject: [PATCH 20/25] Add sanity checks to populations --- src/reports/reports.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/reports/reports.py b/src/reports/reports.py index 397798dd..cce1a58d 100644 --- a/src/reports/reports.py +++ b/src/reports/reports.py @@ -331,8 +331,12 @@ def create_uptake_report(self, report_name: str = None) -> pd.DataFrame: ons_data_subset = self.ons_pd.data[[geog_name, pivot_key]] merged_age_profile = reduced_age_profile_pd.merge(ons_data_subset, how="left", left_on=age_profile_key, right_on=pivot_key).drop(pivot_key, axis=1) - merged_age_profile = merged_age_profile.dropna(subset=[geog_name]) - pivoted_age_profile = merged_age_profile.groupby(geog_name).sum().astype("UInt32") + merged_age_profile_no_na = merged_age_profile.dropna(subset=[geog_name]) + pivoted_age_profile = merged_age_profile_no_na.groupby(geog_name).sum().astype("UInt32") + + # Check we did not accidentally expand the population! + assert merged_age_profile["Pop_All"].sum() == reduced_age_profile_pd["Pop_All"].sum() # this will fail + assert pivoted_age_profile["Pop_All"].sum() == merged_age_profile_no_na["Pop_All"].sum() uptake_report = boundary_report.merge(pivoted_age_profile, how="left", left_on=geog_name, right_index=True, sort=False) else: uptake_report = boundary_report.merge(reduced_age_profile_pd, how="left", left_on=geog_name, right_on=age_profile_key, sort=False) From aaecb1e9a3c8f975b15559f0ffbd893cd4d2f42a Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 14 May 2020 23:23:13 +0100 Subject: [PATCH 21/25] Update age profile properties --- src/geographies/geography.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/geographies/geography.py b/src/geographies/geography.py index 37cc749c..e1c114fb 100644 --- a/src/geographies/geography.py +++ b/src/geographies/geography.py @@ -66,11 +66,15 @@ def shapefile_path(self) -> Path: @property def age_profile_path(self) -> Path: - return DATA_ROOT / self.settings["National Statistical folder"] / self.geography_metadata_dict["age_profile"]["path"] + return DATA_ROOT / self.settings["National Statistical folder"] / self.geography_metadata_dict["age_profile"].get("path") @property def age_profile_key(self) -> str: - return self.geography_metadata_dict["age_profile"]["key"] + return self.geography_metadata_dict["age_profile"].get("key") + + @property + def age_profile_pivot(self) -> str: + return self.geography_metadata_dict["age_profile"].get("pivot_key") def _set_boundary(self, geography_name: str, ons_pd: ONSPostcodeDirectory): """Sets the geography_metadata_dict and geography_region_ids_mapping members From 3340293bfced423421ee0307eb9330fd1805a011 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+AA-Turner@users.noreply.github.com> Date: Mon, 28 Dec 2020 12:14:50 +0000 Subject: [PATCH 22/25] Type hints in test_base.py --- tests/test_base.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/test_base.py b/tests/test_base.py index 569719dd..a1d37385 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -1,24 +1,25 @@ import json import logging +from numbers import Real import pytest from src.base import Base, time_function from src.utility import LOGS_ROOT, SCRIPTS_ROOT -def example_function(number1, number2): +def add(number1: Real, number2: Real) -> Real: return number1 + number2 class ExampleClassLogger(Base): - def __init__(self, path=True): + def __init__(self, path: bool = True): if path: - super().__init__(log_path=str(LOGS_ROOT.joinpath("tests.log"))) + super().__init__(log_path=str(LOGS_ROOT / "tests.log")) else: super().__init__() @time_function - def example_function(self, number1, number2): + def add(number1: Real, number2: Real) -> Real: self.logger.info("Example Function") return number1 + number2 @@ -32,7 +33,7 @@ def __init__(self): def ec_logger(): """Returns an ExampleClassLogger instance""" - def _instantiator(path=True): + def _instantiator(path: bool = True) -> ExampleClassLogger: return ExampleClassLogger(path) return _instantiator @@ -45,7 +46,7 @@ def ec_settings(): def test_time_function_wraps_function(): - assert time_function(example_function)(2, 2) == example_function(2, 2) + assert time_function(add)(2, 2) == add(2, 2) # noinspection PyTypeChecker @@ -67,17 +68,17 @@ def test_base_settings_are_accurate(ec_settings): def test_time_function_no_logger_entity(): try: - time_function(example_function)(2, 2) + time_function(add)(2, 2) except AttributeError: pytest.fail(f"Unexpected AttributeError in base.test_function") def test_time_function_logger_output(caplog, ec_logger): caplog.set_level(logging.INFO) - ec_logger().example_function(2, 2) + ec_logger().add(2, 2) - assert "Calling function example_function" in caplog.text - assert "example_function took 0.0" in caplog.text + assert "Calling function add" in caplog.text + assert "add took 0.0" in caplog.text def test_base_logger_creation(ec_logger): From 0ff2a8b972d284412b7679aac158b659d2daad6f Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+AA-Turner@users.noreply.github.com> Date: Fri, 1 Jan 2021 13:21:55 +0000 Subject: [PATCH 23/25] Simplify pyproject.toml --- pyproject.toml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 04456503..c6545d48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,18 +1,2 @@ [tool.black] line-length = 180 -exclude = ''' -/( - \.eggs - | \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | venv - | _build - | buck-out - | build - | dist - | setup.py -)/ -''' From 533cfc9e26422db58a4dda20df80d8ff046ee1ff Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+AA-Turner@users.noreply.github.com> Date: Sat, 2 Jan 2021 14:19:12 +0000 Subject: [PATCH 24/25] Delete pytest.ini --- pytest.ini | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index 25e73fa4..00000000 --- a/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -python_files=test_*.py From 941af90e1f2db32a015e888a111a8416c3960457 Mon Sep 17 00:00:00 2001 From: AA-Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 7 Jan 2021 17:33:37 +0000 Subject: [PATCH 25/25] Bump python to 3.8 --- .pre-commit-config.yaml | 2 +- .readthedocs.yml | 2 +- .travis.yml | 8 ++++---- README.md | 4 ++-- environment.yml | 2 +- setup.py | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c86af76e..21c7bc01 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,4 +3,4 @@ repos: rev: stable hooks: - id: black - language_version: python3.7 + language_version: python3.8 diff --git a/.readthedocs.yml b/.readthedocs.yml index cc1afa13..f3bd2dc1 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -3,7 +3,7 @@ sphinx: configuration: docs/source/conf.py fail_on_warning: false python: - version: 3.7 + version: 3.8 install: - method: setuptools path: package diff --git a/.travis.yml b/.travis.yml index c1239293..7263ea19 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,12 +9,12 @@ cache: # run tests and linting separately matrix: include: - - name: "3.7 lint" - python: 3.7 + - name: "3.8 lint" + python: 3.8 env: - TEST_CMD="pre-commit run --all-files" - - name: "3.7 tests" - python: 3.7 + - name: "3.8 tests" + python: 3.8 env: - TEST_CMD="pytest --cov=./" diff --git a/README.md b/README.md index d1750ab2..a2bf09ee 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Mapping Scouts data to UK administrative regions. [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ## Prerequisites: -This is written and tested in Python 3.7. +This is written and tested in Python 3.8. This project is largely dependent on `geopandas` and `pandas`, along with `folium`, `dash`, and `shapely`. @@ -45,7 +45,7 @@ To install geopandas and its dependencies, follow below It is highly recommended to use conda to install geopandas. However, to install geopandas using pip on windows, follow the following steps: -* Download the wheels for [GDAL](http://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal), [Fiona](http://www.lfd.uci.edu/~gohlke/pythonlibs/#fiona), and [Rtree](http://www.lfd.uci.edu/~gohlke/pythonlibs/#rtree). Choose the correct python version (currently 3.7) and platform +* Download the wheels for [GDAL](http://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal), [Fiona](http://www.lfd.uci.edu/~gohlke/pythonlibs/#fiona), and [Rtree](http://www.lfd.uci.edu/~gohlke/pythonlibs/#rtree). Choose the correct python version (currently 3.8) and platform * Install any prerequisites listed on Gohlke's site (e.g. C++ redistributables) * `pip install` the wheels in the following order (preferably in a Virtual Environment) 1. [GDAL](http://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal) diff --git a/environment.yml b/environment.yml index 622cbcce..ef7d88b2 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - python>=3.7 + - python>=3.8 - pip # Core requirements - branca diff --git a/setup.py b/setup.py index 54992c32..03e7ff43 100644 --- a/setup.py +++ b/setup.py @@ -7,5 +7,5 @@ packages=find_namespace_packages(), install_requires=["pandas", "numpy", "folium", "branca", "geopandas", "shapely", "dash", "pyarrow"], extras_require={"dev": ["pytest", "hypothesis", "pytest-cov", "pre-commit", "black"]}, - python_requires=">=3.7", + python_requires=">=3.8", )