Skip to content

Commit

Permalink
Merge pull request #54 from the-scouts/testing-and-reports
Browse files Browse the repository at this point in the history
  • Loading branch information
AA-Turner authored Mar 8, 2021
2 parents 4c6484d + 941af90 commit 280e1ac
Show file tree
Hide file tree
Showing 22 changed files with 236 additions and 132 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ repos:
rev: stable
hooks:
- id: black
language_version: python3.7
language_version: python3.8
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ sphinx:
configuration: docs/source/conf.py
fail_on_warning: false
python:
version: 3.7
version: 3.8
install:
- method: setuptools
path: package
Expand Down
8 changes: 4 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ cache:
# run tests and linting separately
matrix:
include:
- name: "3.7 lint"
python: 3.7
- name: "3.8 lint"
python: 3.8
env:
- TEST_CMD="pre-commit run --all-files"
- name: "3.7 tests"
python: 3.7
- name: "3.8 tests"
python: 3.8
env:
- TEST_CMD="pytest --cov=./"

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Mapping Scouts data to UK administrative regions.
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

## Prerequisites:
This is written and tested in Python 3.7.
This is written and tested in Python 3.8.

This project is largely dependent on `geopandas` and `pandas`, along with `folium`, `dash`, and `shapely`.

Expand Down Expand Up @@ -45,7 +45,7 @@ To install geopandas and its dependencies, follow below
It is highly recommended to use conda to install geopandas.

However, to install geopandas using pip on windows, follow the following steps:
* Download the wheels for [GDAL](http://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal), [Fiona](http://www.lfd.uci.edu/~gohlke/pythonlibs/#fiona), and [Rtree](http://www.lfd.uci.edu/~gohlke/pythonlibs/#rtree). Choose the correct python version (currently 3.7) and platform
* Download the wheels for [GDAL](http://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal), [Fiona](http://www.lfd.uci.edu/~gohlke/pythonlibs/#fiona), and [Rtree](http://www.lfd.uci.edu/~gohlke/pythonlibs/#rtree). Choose the correct python version (currently 3.8) and platform
* Install any prerequisites listed on Gohlke's site (e.g. C++ redistributables)
* `pip install` the wheels in the following order (preferably in a Virtual Environment)
1. [GDAL](http://www.lfd.uci.edu/~gohlke/pythonlibs/#gdal)
Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
- defaults
dependencies:
- python>=3.7
- python>=3.8
- pip
# Core requirements
- branca
Expand All @@ -15,6 +15,7 @@ dependencies:
- pyarrow
# Development requirements:
- pytest
- hypothesis
- pytest-cov
- pre-commit
- black
Expand Down
16 changes: 0 additions & 16 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,18 +1,2 @@
[tool.black]
line-length = 180
exclude = '''
/(
\.eggs
| \.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| venv
| _build
| buck-out
| build
| dist
| setup.py
)/
'''
2 changes: 0 additions & 2 deletions pytest.ini

This file was deleted.

11 changes: 4 additions & 7 deletions scripts/settings.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"settings":
{
"Raw Census Extract location": "Scout Census Data/Census 2020 Extract (1).csv",
"Scout Census location": "Scout Census Data/Census 2020 Extract (1) with May 2019 fields.csv",
"Full ONS PD location": "ONSPD_MAY_2019_UK/Data/ONSPD_MAY_2019_UK.csv",
"Raw Census Extract location": "Scout Census Data/Census 2020 Extract (1).csv",
"Scout Census location": "Scout Census Data/Census 2020 Extract (1) with May 2019 fields.feather",
"Full ONS PD location": "ONSPD_MAY_2019_UK/Data/ONSPD_MAY_2019_UK.csv",
"Reduced ONS PD location": "ONSPD_MAY_2019_UK/Data/ONSPD_MAY_2019_UK reduced.csv",
"ONS Names and codes folder": "ONSPD_MAY_2019_UK/Documents/",
"National Statistical folder": "National Statistical data/",
Expand All @@ -17,10 +17,7 @@
{
"name": "D_ID",
"codes": {"path": "../data/Scout Census Data/district_id_mapping.csv", "key": "D_ID", "key_type": "Int32", "name": "D_name"},
"boundary":
{
"shapefiles": ["../src/scripts/districts_buffered.geojson"], "key": "id", "name": "name"
},
"boundary": {"shapefile": "../scripts/districts_buffered.geojson", "key": "id", "name": "name"},
"age_profile": null,
"age_profile_code_col": null
},
Expand Down
5 changes: 2 additions & 3 deletions scripts/setup_reduce_onspd.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import json

import src.utility as utility
from src.utility import SCRIPTS_ROOT, DATA_ROOT
from src.data.ons_pd_may_19 import ONSPostcodeDirectoryMay19

if __name__ == "__main__":
with open(SCRIPTS_ROOT.joinpath("settings.json"), "r") as read_file:
with open(utility.SCRIPTS_ROOT.joinpath("settings.json"), "r") as read_file:
settings = json.load(read_file)["settings"]
print("Starting")

ons_pd_location = DATA_ROOT / settings["Full ONS PD location"]
ons_pd_location = utility.DATA_ROOT / settings["Full ONS PD location"]

# Load Full ONS Postcode Directory
ons_pd = ONSPostcodeDirectoryMay19(ons_pd_location, load_data=True)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
version="0.2.0",
packages=find_namespace_packages(),
install_requires=["pandas", "numpy", "folium", "branca", "geopandas", "shapely", "dash", "pyarrow"],
extras_require={"dev": ["pytest", "pytest-cov", "pre-commit", "black"]},
python_requires=">=3.7",
extras_require={"dev": ["pytest", "hypothesis", "pytest-cov", "pre-commit", "black"]},
python_requires=">=3.8",
)
9 changes: 3 additions & 6 deletions src/data/ons_pd_may_18.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from src.utility import DATA_ROOT
from src.data.ons_pd import ONSPostcodeDirectory


Expand Down Expand Up @@ -50,13 +49,11 @@ class ONSPostcodeDirectoryMay18(ONSPostcodeDirectory):
}

def __init__(self, ons_pd_csv_path, load_data=True):
ONSPostcodeDirectory.__init__(
self, ons_pd_csv_path, load_data, ONSPostcodeDirectoryMay18.index_column, ONSPostcodeDirectoryMay18.fields, ONSPostcodeDirectoryMay18.data_types,
)
super().__init__(ons_pd_csv_path, load_data, ONSPostcodeDirectoryMay18.index_column, ONSPostcodeDirectoryMay18.fields, ONSPostcodeDirectoryMay18.data_types)

# Folder within the ONS Postcode Directory archive holding names and codes files
names_codes_root = DATA_ROOT / self.settings["ONS Names and codes folder"]
boundaries_root = DATA_ROOT / self.settings["Boundaries folder"]
names_codes_root = self.settings["ONS Names and codes folder"]
boundaries_root = self.settings["Boundaries folder"]

# Paths to all shapefiles within the Boundaries folder
# fmt: off
Expand Down
9 changes: 3 additions & 6 deletions src/data/ons_pd_may_19.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from src.utility import DATA_ROOT
from src.data.ons_pd import ONSPostcodeDirectory
from pathlib import Path

Expand Down Expand Up @@ -50,13 +49,11 @@ class ONSPostcodeDirectoryMay19(ONSPostcodeDirectory):
}

def __init__(self, ons_pd_csv_path, load_data=True):
ONSPostcodeDirectory.__init__(
self, ons_pd_csv_path, load_data, ONSPostcodeDirectoryMay19.index_column, ONSPostcodeDirectoryMay19.fields, ONSPostcodeDirectoryMay19.data_types,
)
super().__init__(ons_pd_csv_path, load_data, self.index_column, ONSPostcodeDirectoryMay19.fields, ONSPostcodeDirectoryMay19.data_types)

# Folder within the ONS Postcode Directory archive holding names and codes files
names_codes_root = DATA_ROOT / Path(self.settings["ONS Names and codes folder"]).resolve()
boundaries_dir = DATA_ROOT / Path(self.settings["Boundaries folder"]).resolve()
names_codes_root = Path(self.settings["ONS Names and codes folder"]).resolve()
boundaries_dir = Path(self.settings["Boundaries folder"]).resolve()

# Paths to all shapefiles within the Boundaries folder
# fmt: off
Expand Down
6 changes: 5 additions & 1 deletion src/data/scout_census.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,11 @@ class ScoutCensus:
UNIT_LEVEL_GROUP = "Group"
UNIT_LEVEL_DISTRICT = "District"

def __init__(self, census_file_path: Path):
def __init__(self, census_file_path: Path, load_data=True):
if not load_data:
self.data = pd.DataFrame()
return

cols_int_32 = ["Object_ID", "G_ID", "D_ID", "C_ID", "R_ID", "X_ID", "imd"]
cols_categorical = ["compass", "type", "name", "G_name", "D_name", "C_name", "R_name", "X_name", "postcode", "Young_Leader_Unit"]
# fmt: off
Expand Down
29 changes: 25 additions & 4 deletions src/data/scout_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from datetime import datetime
from pathlib import Path
import pandas as pd
import geopandas as gpd
import time
from typing import TYPE_CHECKING

Expand All @@ -15,6 +16,8 @@
if TYPE_CHECKING:
from src.data.ons_pd import ONSPostcodeDirectory

WGS_84 = 4326


class ScoutData(Base):
"""Provides access to manipulate and process data
Expand All @@ -32,17 +35,18 @@ def columns(self):

DEFAULT_VALUE = ScoutCensus.DEFAULT_VALUE

def __init__(self, merged_csv=True, load_ons_pd_data=False, census_path=None):
def __init__(self, merged_csv=True, load_ons_pd_data=False, census_path=None, load_census_data=True):
super().__init__(settings=True, log_path=str(utility.LOGS_ROOT.joinpath("geo_mapping.log")))
self.logger.info(f"Starting at {datetime.now().time()}")
self.logger.finished(f"Logging setup", start_time=self.start_time)

self.logger.info("Loading Scout Census data")
# Loads Scout Census Data from a path to a .csv file that contains Scout Census data
# We assume no custom path has been passed, but allow for one to be used
census_path = utility.DATA_ROOT / self.settings["Scout Census location"] if not census_path else census_path
self.scout_census: ScoutCensus = ScoutCensus(utility.DATA_ROOT / census_path)
census_path = self.settings["Scout Census location"] if not census_path else census_path
self.scout_census: ScoutCensus = ScoutCensus(utility.DATA_ROOT / census_path, load_data=load_census_data)
self.data: pd.DataFrame = self.scout_census.data
self.points_data: gpd.GeoDataFrame = gpd.GeoDataFrame()
self.logger.finished(f"Loading Scout Census data", start_time=self.start_time)

if merged_csv:
Expand Down Expand Up @@ -131,7 +135,7 @@ def save_merged_data(self, ons_pd_publication_date: str):
self.data.to_csv(output_path.with_suffix(".csv"), index=False, encoding="utf-8-sig")
self.data.to_feather(output_path.with_suffix(".feather"))

def filter_records(self: ScoutDataInterface, field: str, value_list: list, mask: bool = False, exclusion_analysis: bool = False):
def filter_records(self, field: str, value_list: list, mask: bool = False, exclusion_analysis: bool = False):
"""Filters the Census records by any field in ONS PD.
:param str field: The field on which to filter
Expand All @@ -142,3 +146,20 @@ def filter_records(self: ScoutDataInterface, field: str, value_list: list, mask:
:returns None: Nothing
"""
self.data = utility.filter_records(self.data, field, value_list, self.logger, mask, exclusion_analysis)

def add_shape_data(self, shapes_key: str, path: Path = None, gdf: gpd.GeoDataFrame = None):
if self.points_data.empty:
self.points_data = gpd.GeoDataFrame(geometry=gpd.points_from_xy(self.data.long, self.data.lat))
self.points_data.crs = WGS_84

if path:
shapes = gpd.GeoDataFrame.from_file(path)
elif gdf is not None:
shapes = gdf
else:
raise ValueError("A path to a shapefile or a Ge")

geo_merged = gpd.sjoin(self.points_data, shapes.to_crs(f"epsg:{WGS_84}"), how="left", op="intersects")
merged = self.data.merge(geo_merged[[shapes_key]], how="left", left_index=True, right_index=True)
assert self.data.equals(merged[self.data.columns])
self.data = merged
25 changes: 18 additions & 7 deletions src/geographies/geography.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import TYPE_CHECKING

from src.base import Base
from src.utility import DATA_ROOT

# For type hints
if TYPE_CHECKING:
Expand Down Expand Up @@ -39,6 +40,14 @@ def type(self) -> str:
def codes_map_key(self) -> str:
return self.geography_metadata_dict["codes"]["key"]

@property
def codes_map_key_type(self) -> str:
return self.geography_metadata_dict["codes"]["key_type"]

@property
def codes_map_path(self) -> Path:
return DATA_ROOT / self.geography_metadata_dict["codes"].get("path")

@property
def codes_map_name(self) -> str:
return self.geography_metadata_dict["codes"]["name"]
Expand All @@ -53,15 +62,19 @@ def shapefile_name(self) -> str:

@property
def shapefile_path(self) -> Path:
return self.geography_metadata_dict["boundary"]["shapefile"]
return DATA_ROOT / self.geography_metadata_dict["boundary"]["shapefile"]

@property
def age_profile_path(self) -> Path:
return self.geography_metadata_dict["age_profile"]["path"]
return DATA_ROOT / self.settings["National Statistical folder"] / self.geography_metadata_dict["age_profile"].get("path")

@property
def age_profile_key(self) -> str:
return self.geography_metadata_dict["age_profile"]["key"]
return self.geography_metadata_dict["age_profile"].get("key")

@property
def age_profile_pivot(self) -> str:
return self.geography_metadata_dict["age_profile"].get("pivot_key")

def _set_boundary(self, geography_name: str, ons_pd: ONSPostcodeDirectory):
"""Sets the geography_metadata_dict and geography_region_ids_mapping members
Expand All @@ -80,11 +93,9 @@ def _set_boundary(self, geography_name: str, ons_pd: ONSPostcodeDirectory):
boundaries_dict = {**ons_pd.BOUNDARIES, **self.settings["Scout Mappings"]}
if geography_name in boundaries_dict.keys():
self.geography_metadata_dict = boundaries_dict[geography_name]
boundary_codes_dict = self.geography_metadata_dict["codes"]

self.geography_region_ids_mapping = pd.read_csv(
boundary_codes_dict.get("path"), dtype={boundary_codes_dict["key"]: boundary_codes_dict["key_type"], boundary_codes_dict["name"]: "object",},
) # Names & Codes file path
# Names & Codes file path
self.geography_region_ids_mapping = pd.read_csv(self.codes_map_path, dtype={self.codes_map_key: self.codes_map_key_type, self.codes_map_name: "string"})
else:
raise Exception(f"{geography_name} is an invalid boundary.\nValid boundaries include: {boundaries_dict.keys()}")

Expand Down
13 changes: 7 additions & 6 deletions src/maps/map_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
from src.reports.reports import Reports
from src.base import Base

# noinspection PyUnreachableCode
if False:
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pathlib import Path
from branca import colormap

Expand Down Expand Up @@ -41,9 +42,9 @@ def __init__(self, out_file: Path):
self.SCORE_COL: dict = {}
self.layers: dict = {}

self.score_col_label: str = None
self.code_name: str = None
self.CODE_COL: str = None
self.score_col_label: str = ""
self.code_name: str = ""
self.CODE_COL: str = ""
self.map_data: pd.DataFrame = pd.DataFrame()

self.geo_data = None
Expand Down Expand Up @@ -109,7 +110,7 @@ def _filter_shape_file(self, shape_file_path: Path):
self.logger.info(f"Filtering {original_number_of_shapes} shapes by {self.code_name} being in the {self.CODE_COL} of the map_data")
self.logger.debug(f"Filtering {original_number_of_shapes} shapes by {self.code_name} being in \n{self.map_data[self.CODE_COL]}")

list_codes = self.map_data[self.CODE_COL].astype(str).to_list()
list_codes = self.map_data[self.CODE_COL].drop_duplicates().astype(str).to_list()
all_shapes = all_shapes.loc[all_shapes[self.code_name].isin(list_codes)]
self.logger.info(f"Resulting in {len(all_shapes.index)} shapes")

Expand Down
5 changes: 3 additions & 2 deletions src/reports/history_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from src.data.scout_census import ScoutCensus
import src.utility as utility

# noinspection PyUnreachableCode
if False:
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from src.data.scout_data import ScoutData


Expand Down
Loading

0 comments on commit 280e1ac

Please sign in to comment.