Skip to content

Commit 60a41b1

Browse files
committed
feat: add census data maps
* Command to import output areas and MSOA/LSOA * GraphQL query for choropleth data can be filtered by map bounds * Front-end automatically selects the tileset for the zoom level (census boundary type only)
1 parent aefc961 commit 60a41b1

32 files changed

+881
-335
lines changed

.DS_Store

-6 KB
Binary file not shown.

bin/ck_setup.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ echo '';
4343
echo 'Downloading LIH data...';
4444
echo '';
4545
echo '------------------------------------';
46-
python manage.py import_wards
4746
python manage.py import_areas
4847
python manage.py import_regions
4948
python manage.py import_mps

bin/import_areas_seed.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
if [ "$ENVIRONMENT" != "production" ]; then
44
unzip -o data/areas.psql.zip -d data
5-
PGPASSWORD=password psql -U postgres -h db test_local-intelligence < data/areas.psql
5+
PGPASSWORD=password psql -U postgres -h db test_postgres < data/areas.psql
66
else
77
echo "This command cannot run in production environments."
88
fi

docker-compose.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
services:
22
db:
3-
image: kartoza/postgis:latest
3+
image: kartoza/postgis:16-3
44
restart: always
55
volumes:
66
- .:/app
7-
- pgdata:/var/lib/postgresql/data
7+
- pgdata:/var/lib/postgresql
88
environment:
99
POSTGRES_USER: 'postgres'
1010
POSTGRES_PASSWORD: 'password'
11-
POSTGRES_DB: 'local-intelligence'
11+
POSTGRES_DB: 'postgres'
1212
POSTGRES_PORT: 5432
1313
healthcheck:
1414
test: ["CMD-SHELL", "pg_isready -U postgres -h 127.0.0.1"]
@@ -32,7 +32,7 @@ services:
3232
environment:
3333
SECRET_KEY: 'secret'
3434
DEBUG: 1
35-
DATABASE_URL: 'postgis://postgres:password@db/local-intelligence'
35+
DATABASE_URL: 'postgis://postgres:password@db/postgres'
3636
EMAIL_HOST: email.svc
3737
CACHE_FILE: 'data/cache'
3838
MAPIT_URL: 'https://mapit.mysociety.org/'

hub/data_imports/geocoding_config.py

Lines changed: 60 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -336,75 +336,81 @@ async def import_area_data(
336336
geocoding_data["area_fields"][area.area_type.code] = area.gss
337337
update_data["geocode_data"].update({"data": geocoding_data})
338338
if area is not None:
339-
sample_point = area.polygon.centroid
339+
postcode_data = await get_postcode_data_for_area(area, loaders, steps)
340+
update_data["postcode_data"] = postcode_data
341+
update_data["area"] = area
342+
update_data["point"] = area.point
343+
else:
344+
# Reset geocoding data
345+
update_data["postcode_data"] = None
340346

341-
# get postcodeIO result for area.coordinates
342-
try:
343-
postcode_data: PostcodesIOResult = await loaders[
344-
"postcodesIOFromPoint"
345-
].load(sample_point)
346-
except Exception as e:
347-
logger.error(f"Failed to get postcode data for {sample_point}: {e}")
348-
postcode_data = None
347+
# Update the geocode data regardless, for debugging purposes
348+
update_data["geocode_data"].update({"steps": steps})
349349

350+
await GenericData.objects.aupdate_or_create(
351+
data_type=data_type, data=source.get_record_id(record), defaults=update_data
352+
)
353+
354+
355+
async def get_postcode_data_for_area(area, loaders, steps):
356+
sample_point = area.polygon.centroid
357+
# get postcodeIO result for area.coordinates
358+
try:
359+
postcode_data: PostcodesIOResult = await loaders["postcodesIOFromPoint"].load(
360+
sample_point
361+
)
362+
except Exception as e:
363+
logger.error(f"Failed to get postcode data for {sample_point}: {e}")
364+
postcode_data = None
365+
366+
steps.append(
367+
{
368+
"task": "postcode_from_area_coordinates",
369+
"service": Geocoder.POSTCODES_IO.value,
370+
"result": "failed" if postcode_data is None else "success",
371+
}
372+
)
373+
374+
# Try a few other backup strategies (example postcode, another geocoder)
375+
# to get postcodes.io data
376+
if postcode_data is None:
377+
postcode = await get_example_postcode_from_area_gss(area.gss)
350378
steps.append(
351379
{
352-
"task": "postcode_from_area_coordinates",
353-
"service": Geocoder.POSTCODES_IO.value,
354-
"result": "failed" if postcode_data is None else "success",
380+
"task": "postcode_from_area",
381+
"service": Geocoder.FINDTHATPOSTCODE.value,
382+
"result": "failed" if postcode is None else "success",
355383
}
356384
)
357-
358-
# Try a few other backup strategies (example postcode, another geocoder)
359-
# to get postcodes.io data
360-
if postcode_data is None:
361-
postcode = await get_example_postcode_from_area_gss(area.gss)
385+
if postcode is not None:
386+
postcode_data = await loaders["postcodesIO"].load(postcode)
362387
steps.append(
363388
{
364-
"task": "postcode_from_area",
365-
"service": Geocoder.FINDTHATPOSTCODE.value,
366-
"result": "failed" if postcode is None else "success",
389+
"task": "data_from_postcode",
390+
"service": Geocoder.POSTCODES_IO.value,
391+
"result": ("failed" if postcode_data is None else "success"),
367392
}
368393
)
369-
if postcode is not None:
370-
postcode_data = await loaders["postcodesIO"].load(postcode)
371-
steps.append(
372-
{
373-
"task": "data_from_postcode",
374-
"service": Geocoder.POSTCODES_IO.value,
375-
"result": ("failed" if postcode_data is None else "success"),
376-
}
377-
)
378-
if postcode_data is None:
379-
postcode = await get_postcode_from_coords_ftp(sample_point)
394+
if postcode_data is None:
395+
postcode = await get_postcode_from_coords_ftp(sample_point)
396+
steps.append(
397+
{
398+
"task": "postcode_from_area_coordinates",
399+
"service": Geocoder.FINDTHATPOSTCODE.value,
400+
"result": "failed" if postcode is None else "success",
401+
}
402+
)
403+
if postcode is not None:
404+
postcode_data = await loaders["postcodesIO"].load(postcode)
380405
steps.append(
381406
{
382-
"task": "postcode_from_area_coordinates",
383-
"service": Geocoder.FINDTHATPOSTCODE.value,
384-
"result": "failed" if postcode is None else "success",
407+
"task": "data_from_postcode",
408+
"service": Geocoder.POSTCODES_IO.value,
409+
"result": ("failed" if postcode_data is None else "success"),
385410
}
386411
)
387-
if postcode is not None:
388-
postcode_data = await loaders["postcodesIO"].load(postcode)
389-
steps.append(
390-
{
391-
"task": "data_from_postcode",
392-
"service": Geocoder.POSTCODES_IO.value,
393-
"result": ("failed" if postcode_data is None else "success"),
394-
}
395-
)
396-
397-
update_data["postcode_data"] = postcode_data
398-
else:
399-
# Reset geocoding data
400-
update_data["postcode_data"] = None
401-
402-
# Update the geocode data regardless, for debugging purposes
403-
update_data["geocode_data"].update({"steps": steps})
404412

405-
await GenericData.objects.aupdate_or_create(
406-
data_type=data_type, data=source.get_record_id(record), defaults=update_data
407-
)
413+
return postcode_data
408414

409415

410416
async def import_address_data(

hub/graphql/types/model_types.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44
from enum import Enum
55
from typing import List, Optional, Union
66

7+
from django.contrib.gis.db.models import Union as GisUnion
8+
from django.contrib.gis.geos import Polygon
79
from django.db.models import F, Q
10+
from django.db.models.fields import FloatField
11+
from django.db.models.functions import Cast
812
from django.http import HttpRequest
913

1014
import numexpr as ne
@@ -48,6 +52,7 @@
4852
pd.core.computation.ops.MATHOPS = (*pd.core.computation.ops.MATHOPS, "where")
4953

5054
logger = logging.getLogger(__name__)
55+
pd.core.computation.ops.MATHOPS = (*pd.core.computation.ops.MATHOPS, "where")
5156

5257

5358
# Ideally we'd just import this from the library (procrastinate.jobs.Status) but
@@ -734,6 +739,9 @@ def query_filter(self) -> dict[str, str]:
734739
AnalyticalAreaType.admin_ward: AreaTypeFilter(lih_area_type="WD23"),
735740
AnalyticalAreaType.european_electoral_region: AreaTypeFilter(lih_area_type="EER"),
736741
AnalyticalAreaType.european_electoral_region: AreaTypeFilter(lih_area_type="CTRY"),
742+
AnalyticalAreaType.msoa: AreaTypeFilter(lih_area_type="MSOA"),
743+
AnalyticalAreaType.lsoa: AreaTypeFilter(lih_area_type="LSOA"),
744+
AnalyticalAreaType.output_area: AreaTypeFilter(lih_area_type="OA21"),
737745
}
738746

739747

@@ -1664,6 +1672,14 @@ def check_numeric(x):
16641672
return False
16651673

16661674

1675+
@strawberry.input
1676+
class MapBounds:
1677+
north: float
1678+
east: float
1679+
south: float
1680+
west: float
1681+
1682+
16671683
@strawberry_django.field()
16681684
def choropleth_data_for_source(
16691685
info: Info,
@@ -1672,6 +1688,7 @@ def choropleth_data_for_source(
16721688
# Field could be a column name or a Pandas formulaic expression
16731689
# or, if not provided, a count of records
16741690
field: Optional[str] = None,
1691+
map_bounds: Optional[MapBounds] = None,
16751692
) -> List[GroupedDataCount]:
16761693
# Check user can access the external data source
16771694
user = get_current_user(info)
@@ -1691,10 +1708,33 @@ def choropleth_data_for_source(
16911708
.annotate(
16921709
label=F(f"postcode_data__{analytical_area_key.value}"),
16931710
gss=F(f"postcode_data__codes__{analytical_area_key.value}"),
1711+
latitude=Cast("postcode_data__latitude", output_field=FloatField()),
1712+
longitude=Cast("postcode_data__longitude", output_field=FloatField()),
16941713
)
1695-
.values("json", "label", "gss")
16961714
)
16971715

1716+
if map_bounds:
1717+
area_type_filter = postcodeIOKeyAreaTypeLookup[analytical_area_key]
1718+
bbox_coords = (
1719+
(map_bounds.west, map_bounds.north), # Top left
1720+
(map_bounds.east, map_bounds.north), # Top right
1721+
(map_bounds.east, map_bounds.south), # Bottom right
1722+
(map_bounds.west, map_bounds.south), # Bottom left
1723+
(map_bounds.west, map_bounds.north), # Back to start to close polygon
1724+
)
1725+
bbox = Polygon(bbox_coords, srid=4326)
1726+
areas = models.Area.objects.filter(**area_type_filter.query_filter).filter(
1727+
point__within=bbox
1728+
)
1729+
combined_area = areas.aggregate(union=GisUnion("polygon"))["union"]
1730+
# all geocoded GenericData should have `point` set
1731+
qs = qs.filter(point__within=combined_area)
1732+
1733+
qs = qs.values("json", "label", "gss")
1734+
1735+
if not qs:
1736+
return []
1737+
16981738
# ingest the .json data into a pandas dataframe so we can do analytics
16991739
df = pd.DataFrame([record for record in qs])
17001740

hub/management/commands/export_areas_as_sql.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,7 @@ class Command(BaseCommand):
4040
without causing primary key conflicts.
4141
"""
4242

43-
def add_arguments(self, parser):
44-
parser.add_argument(
45-
"-a",
46-
"--all-names",
47-
action="store_true",
48-
help="Fetch alternative names from MapIt",
49-
)
50-
51-
def handle(self, all_names: bool = False, *args, **options):
43+
def handle(self, *args, **options):
5244
print("Exporting areas and area types from current database to data/areas.psql")
5345
count = 0
5446
output_file: Path = settings.BASE_DIR / "data" / "areas.psql"
@@ -101,6 +93,8 @@ def get_output_record(
10193
if column.name in table_config.output_column_templates:
10294
template = table_config.output_column_templates[column.name]
10395
value = self.template_output_value(template, row, columns)
96+
elif row[i] is None:
97+
value = "NULL"
10498
else:
10599
# output the value as a string, cast to the correct type in postgres
106100
value = f"'{self.escape_sql_string(row[i])}'::{column.type_display}"

hub/management/commands/import_areas.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,15 @@ def handle(self, quiet: bool = False, all_names: bool = False, *args, **options)
5757
def import_area(self, area, area_type, all_names):
5858
area_details = self.mapit_client.area_details(area["id"]) if all_names else {}
5959

60-
if "gss" not in area["codes"]:
60+
gss = area["codes"].get("gss") or area["codes"].get("ons")
61+
if not gss:
6162
# logger.debug(f"no gss code for {area['id']}")
6263
return
6364

6465
geom = None
6566
try:
6667
geom_already_loaded = Area.objects.filter(
67-
gss=area["codes"]["gss"], polygon__isnull=False
68+
gss=gss, polygon__isnull=False
6869
).exists()
6970
if geom_already_loaded:
7071
# Only fetch geometry data if required, to speed things up
@@ -77,19 +78,19 @@ def import_area(self, area, area_type, all_names):
7778
"type": "Feature",
7879
"geometry": geom,
7980
"properties": {
80-
"PCON13CD": area["codes"]["gss"],
81+
"PCON13CD": gss,
8182
"name": area["name"],
8283
"type": area_type.code,
8384
"mapit_type": area["type"],
8485
},
8586
}
8687
geom_str = json.dumps(geom)
87-
except mapit.NotFoundException: # pragma: no cover
88+
except (mapit.NotFoundException, mapit.BadRequestException): # pragma: no cover
8889
print(f"could not find mapit area for {area['name']}")
8990
geom = None
9091

9192
a, created = Area.objects.update_or_create(
92-
gss=area["codes"]["gss"],
93+
gss=gss,
9394
area_type=area_type,
9495
defaults={
9596
"mapit_id": area["id"],

0 commit comments

Comments
 (0)