Skip to content

Commit e620ede

Browse files
committed
Merge branch 'feat/import-postcode-areas'
2 parents d2741fa + fd2e1c7 commit e620ede

File tree

13 files changed

+337
-10
lines changed

13 files changed

+337
-10
lines changed

hub/data_imports/geocoding_config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,12 @@ async def get_postcode_data_for_area(area, loaders, steps):
483483
# Try a few other backup strategies (example postcode, another geocoder)
484484
# to get postcodes.io data
485485
if postcode_data is None:
486-
postcode = await get_example_postcode_from_area_gss(area.gss)
486+
try:
487+
postcode = await get_example_postcode_from_area_gss(area.gss)
488+
except Exception as e:
489+
logger.error(f"Failed to get example postcode for {area.gss}: {e}")
490+
postcode = None
491+
487492
steps.append(
488493
{
489494
"task": "postcode_from_area",

hub/graphql/types/model_types.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -590,8 +590,11 @@ def generic_data_for_hub(self, hostname: str) -> List["GenericData"]:
590590
async def sample_postcode(
591591
self, info: Info[HubDataLoaderContext]
592592
) -> Optional[PostcodesIOResult]:
593-
return await get_postcode_data_for_gss(self.gss)
594-
# return await info.context.area_coordinate_loader.load(self.point)
593+
try:
594+
return await get_postcode_data_for_gss(self.gss)
595+
except Exception as e:
596+
logger.error(f"Failed to get sample postcode for gss {self.gss}: {e}")
597+
return None
595598

596599

597600
@strawberry.type
@@ -1690,12 +1693,17 @@ def choropleth_data_for_source(
16901693
)
16911694

16921695
# Get the required data for the source
1696+
gss_field = (
1697+
"postcode_data__postcode"
1698+
if analytical_area_key == AnalyticalAreaType.postcode
1699+
else f"postcode_data__codes__{analytical_area_key.value}"
1700+
)
16931701
qs = (
16941702
external_data_source.get_import_data()
16951703
.filter(postcode_data__codes__isnull=False)
16961704
.annotate(
16971705
label=F(f"postcode_data__{analytical_area_key.value}"),
1698-
gss=F(f"postcode_data__codes__{analytical_area_key.value}"),
1706+
gss=F(gss_field),
16991707
latitude=Cast("postcode_data__latitude", output_field=FloatField()),
17001708
longitude=Cast("postcode_data__longitude", output_field=FloatField()),
17011709
)

hub/graphql/types/stats.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def fk_filter(self, field_name="") -> dict[str, str]:
6464
AnalyticalAreaType.msoa: AreaTypeFilter(lih_area_type="MSOA"),
6565
AnalyticalAreaType.lsoa: AreaTypeFilter(lih_area_type="LSOA"),
6666
AnalyticalAreaType.output_area: AreaTypeFilter(lih_area_type="OA21"),
67+
AnalyticalAreaType.postcode: AreaTypeFilter(lih_area_type="PC"),
68+
AnalyticalAreaType.postcode_area: AreaTypeFilter(lih_area_type="PCA"),
69+
AnalyticalAreaType.postcode_district: AreaTypeFilter(lih_area_type="PCD"),
70+
AnalyticalAreaType.postcode_sector: AreaTypeFilter(lih_area_type="PCS"),
6771
}
6872

6973

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import logging
2+
from pathlib import Path
3+
4+
from django.conf import settings
5+
6+
# from django postgis
7+
from django.core.management.base import BaseCommand
8+
9+
from tqdm import tqdm
10+
11+
from hub.models import Area
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
class Command(BaseCommand):
17+
help = "Export Postcodes to GeoJSONl (for Mapbox tileset source creation)"
18+
19+
def handle(self, *args, **options):
20+
area_type_codes = ["PCS", "PCD", "PCA"]
21+
for area_type_code in area_type_codes:
22+
output_file: Path = (
23+
settings.BASE_DIR / "data" / f"{area_type_code}.geojsonl"
24+
)
25+
area_geojsons = Area.objects.filter(
26+
area_type__code=area_type_code
27+
).values_list("geometry", flat=True)
28+
with output_file.open("w") as f:
29+
for geojson in tqdm(area_geojsons):
30+
f.write(geojson)
31+
f.write("\n")

hub/management/commands/import_msoas_and_lsoas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def import_area(self, area, area_type, property_prefix):
107107
if isinstance(polygon, Polygon):
108108
polygon = MultiPolygon([polygon])
109109

110-
geom["geometry"] = polygon.json
110+
geom["geometry"] = json.loads(polygon.json)
111111

112112
a.geometry = json.dumps(geom)
113113
a.polygon = polygon

hub/management/commands/import_output_areas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def import_area(self, area, area_type):
8888
# Transform the geometry
8989
polygon.transform(transform)
9090

91-
geom["geometry"] = polygon.json
91+
geom["geometry"] = json.loads(polygon.json)
9292

9393
a.geometry = json.dumps(geom)
9494
a.polygon = polygon

hub/management/commands/import_output_areas_scotland.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def import_area(self, area, area_type):
7878
if isinstance(polygon, Polygon):
7979
polygon = MultiPolygon([polygon])
8080

81-
geom["geometry"] = polygon.json
81+
geom["geometry"] = json.loads(polygon.json)
8282
a.geometry = json.dumps(geom)
8383
a.polygon = polygon
8484
a.point = a.polygon.centroid
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
import json
2+
import logging
3+
import re
4+
from pathlib import Path
5+
6+
from django.conf import settings
7+
from django.contrib.gis.db.models import Union as GisUnion
8+
9+
# from django postgis
10+
from django.contrib.gis.geos import GEOSGeometry, MultiPolygon, Polygon
11+
from django.core.management.base import BaseCommand
12+
from django.db.models import F, Q
13+
from django.db.models.expressions import Expression, RawSQL
14+
from django.db.models.functions import Length, Substr
15+
16+
from tqdm import tqdm
17+
18+
from hub.models import Area, AreaType
19+
20+
logger = logging.getLogger(__name__)
21+
22+
23+
class Command(BaseCommand):
24+
help = "Import Postcodes from GeoJSON"
25+
26+
def add_arguments(self, parser):
27+
parser.add_argument(
28+
"-o",
29+
"--only",
30+
help="""
31+
Pass one of {PC, PCS, PCD, PCA} to only import that level of area.
32+
Note that postcode units (PC) must be imported first.
33+
""",
34+
)
35+
36+
def handle(self, only=None, *args, **options):
37+
if not only or only == "PC":
38+
# E.G. N14 7LU
39+
self.import_postcode_units()
40+
if not only or only == "PCS":
41+
# E.G. N14 7
42+
self.import_postcode_sectors()
43+
if not only or only == "PCD":
44+
# E.G. N14
45+
self.import_postcode_districts()
46+
if not only or only == "PCA":
47+
# E.G. N
48+
self.import_postcode_areas()
49+
50+
def import_postcode_units(self):
51+
print("Importing postcode units")
52+
filepaths: list[Path] = [
53+
settings.BASE_DIR / "data" / f"postcodes_{i}.geojsonl" for i in range(1, 11)
54+
]
55+
for filepath in filepaths:
56+
if not filepath.exists():
57+
print(
58+
f'Missing {filepath.name}. Download from the Mapped MinIO console, "postcodes" bucket.'
59+
)
60+
return
61+
62+
print(f"Importing postcode file {filepath.name} of 10")
63+
64+
data = filepath.read_text()
65+
area_type, created = AreaType.objects.get_or_create(
66+
name="Postcodes",
67+
code="PC",
68+
area_type="Postcode",
69+
description="Postcodes",
70+
)
71+
72+
for line in tqdm(re.split(r"\r?\n", data)):
73+
if line.strip():
74+
area = json.loads(line)
75+
self.import_area(area, area_type)
76+
77+
def import_area(self, area, area_type):
78+
geom = None
79+
gss = area["properties"]["POSTCODE"]
80+
name = gss
81+
82+
geom_already_loaded = Area.objects.filter(
83+
gss=gss, polygon__isnull=False
84+
).exists()
85+
if geom_already_loaded:
86+
# Only fetch geometry data if required, to speed things up
87+
# logger.debug(f"skipping geometry for {area['name']}")
88+
pass
89+
else:
90+
geom = {
91+
"type": "Feature",
92+
"geometry": area["geometry"],
93+
"properties": {
94+
**area["properties"],
95+
"code": gss,
96+
"name": name,
97+
"type": area_type.code,
98+
},
99+
}
100+
101+
a, created = Area.objects.update_or_create(
102+
gss=gss,
103+
area_type=area_type,
104+
defaults={"name": name},
105+
)
106+
107+
if geom is not None:
108+
geos = json.dumps(geom["geometry"])
109+
polygon = GEOSGeometry(geos)
110+
if isinstance(polygon, Polygon):
111+
polygon = MultiPolygon([polygon])
112+
113+
geom["geometry"] = json.loads(polygon.json)
114+
115+
a.geometry = json.dumps(geom)
116+
a.polygon = polygon
117+
a.point = a.polygon.centroid
118+
a.save()
119+
120+
def import_postcode_sectors(self):
121+
print("Importing postcode sectors")
122+
area_type, created = AreaType.objects.get_or_create(
123+
name="Postcode Sectors",
124+
code="PCS",
125+
area_type="Postcode Sector",
126+
description="Postcode sectors",
127+
)
128+
self.import_postcode_grouping(
129+
area_type, prefix_expression=Substr(F("gss"), 1, Length(F("gss")) - 2)
130+
)
131+
132+
def import_postcode_districts(self):
133+
print("Importing postcode districts")
134+
area_type, created = AreaType.objects.get_or_create(
135+
name="Postcode Districts",
136+
code="PCD",
137+
area_type="Postcode District",
138+
description="Postcode districts",
139+
)
140+
self.import_postcode_grouping(
141+
area_type, prefix_expression=Substr(F("gss"), 1, Length(F("gss")) - 4)
142+
)
143+
144+
def import_postcode_areas(self):
145+
print("Importing postcode areas")
146+
area_type, created = AreaType.objects.get_or_create(
147+
name="Postcode Areas",
148+
code="PCA",
149+
area_type="Postcode Area",
150+
description="Postcode areas",
151+
)
152+
self.import_postcode_grouping(
153+
area_type,
154+
prefix_expression=RawSQL("SUBSTRING(gss FROM '[a-zA-Z]+')", tuple()),
155+
)
156+
157+
def import_postcode_grouping(
158+
self, area_type: AreaType, prefix_expression: Expression
159+
):
160+
"""
161+
Import postcodes grouped by prefix, calculated by the provided expression, into
162+
the provided area type.
163+
"""
164+
# Only consider postcodes that contain a space
165+
# The others are "vertical streets" which are not relevant
166+
prefixes = (
167+
Area.objects.filter(Q(gss__contains=" ") & Q(area_type__code="PC"))
168+
.annotate(prefix=prefix_expression)
169+
.values_list("prefix", flat=True)
170+
.distinct()
171+
)
172+
173+
for prefix in tqdm(prefixes):
174+
geom_already_loaded = Area.objects.filter(
175+
gss=prefix, polygon__isnull=False
176+
).exists()
177+
if geom_already_loaded:
178+
continue
179+
180+
postcodes = Area.objects.annotate(prefix=prefix_expression).filter(
181+
area_type__code="PC", prefix=prefix
182+
)
183+
polygon = postcodes.aggregate(union=GisUnion("polygon"))["union"]
184+
185+
if isinstance(polygon, Polygon):
186+
polygon = MultiPolygon([polygon])
187+
188+
geom = {
189+
"type": "Feature",
190+
"geometry": json.loads(polygon.json),
191+
"properties": {
192+
"code": prefix,
193+
"name": prefix,
194+
"type": area_type.code,
195+
},
196+
}
197+
198+
Area.objects.update_or_create(
199+
gss=prefix,
200+
area_type=area_type,
201+
defaults={
202+
"name": prefix,
203+
"geometry": json.dumps(geom),
204+
"polygon": polygon,
205+
"point": polygon.centroid,
206+
},
207+
)

nextjs/src/__generated__/graphql.ts

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nextjs/src/app/(logged-in)/data-sources/inspect/[externalDataSourceId]/InspectExternalDataSource.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ import { useRouter } from 'next/navigation'
5454
import pluralize from 'pluralize'
5555
import { useEffect, useState } from 'react'
5656
import { toast } from 'sonner'
57+
import toSpaceCase from 'to-space-case'
5758
import { CREATE_MAP_REPORT } from '../../../reports/ReportList/CreateReportCard'
5859
import ExternalDataSourceBadCredentials from './ExternalDataSourceBadCredentials'
5960
import { ManageSourceSharing } from './ManageSourceSharing'
@@ -304,7 +305,7 @@ export default function InspectExternalDataSource({
304305
{dataType === DataSourceType.Member
305306
? 'Membership list'
306307
: dataType
307-
? pluralize(dataType.toLowerCase())
308+
? pluralize(toSpaceCase(dataType.toLowerCase()))
308309
: 'Data source'}
309310
<span>&nbsp;&#x2022;&nbsp;</span>
310311
{crmInfo?.name || crmType}

0 commit comments

Comments
 (0)