diff --git a/Cargo.lock b/Cargo.lock index 45552cd..4115567 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,6 +168,7 @@ dependencies = [ "failure_derive", "flate2", "geo", + "geo-booleanop", "geo-types", "geojson", "geos", @@ -318,6 +319,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float_next_after" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fc612c5837986b7104a87a0df74a5460931f1c5274be12f8d0f40aa2f30d632" +dependencies = [ + "num-traits", +] + [[package]] name = "futures" version = "0.1.30" @@ -371,10 +381,22 @@ dependencies = [ "geo-types", "geographiclib-rs", "num-traits", - "robust", + "robust 0.2.2", "rstar", ] +[[package]] +name = "geo-booleanop" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b68ff17c819296610e9eee41bdf5eb37f71b09870a95f0086e07d8fcc1f45b" +dependencies = [ + "float_next_after", + "geo-types", + "num-traits", + "robust 0.1.2", +] + [[package]] name = "geo-types" version = "0.6.1" @@ -872,6 +894,12 @@ dependencies = [ "syn", ] +[[package]] +name = "robust" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222155e5550abd9100afdcdefbd08aa1b856d226b60e327044ec21b1ece2f78e" + [[package]] name = "robust" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 2985a5b..fe79cee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ flate2 = "1.0" rayon = "1.5" include_dir = "0.6" rstar = "0.8" +geo-booleanop = "0.3.0" [dev-dependencies] approx = "0.3" diff --git a/src/lib.rs b/src/lib.rs index 773ade9..9e17983 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,8 @@ mod additional_zones; mod country_finder; mod hierarchy_builder; pub mod merger; +mod postcode; +mod postcode_service; mod zone_ext; pub mod zone_typer; @@ -23,6 +25,7 @@ use std::path::Path; use cosmogony::{Zone, ZoneIndex}; +use crate::postcode_service::{assign_postcodes_to_zones}; use crate::zone_ext::ZoneExt; #[rustfmt::skip] @@ -39,6 +42,20 @@ pub fn is_admin(obj: &OsmObj) -> bool { } } +#[rustfmt::skip] +pub fn is_postal_code(obj: &OsmObj) -> bool { + match *obj { + OsmObj::Relation(ref rel) => { + rel.tags + .get("boundary") + .map_or(false, |v| v == "postal_code") + && + rel.tags.get("postal_code").is_some() + } + _ => false, + } +} + pub fn is_place(obj: &OsmObj) -> bool { match *obj { OsmObj::Node(ref node) => node @@ -212,11 +229,13 @@ pub fn build_cosmogony( let file = File::open(&path).context("no pbf file")?; let parsed_pbf = OsmPbfReader::new(file) - .get_objs_and_deps(|o| is_admin(o) || is_place(o)) + .get_objs_and_deps(|o| is_admin(o) || is_place(o) || is_postal_code(o)) .context("invalid osm file")?; info!("reading pbf done."); + info!("Starting to extract zones."); let (mut zones, mut stats) = get_zones_and_stats(&parsed_pbf)?; + info!("Finishing to extract zones."); create_ontology( &mut zones, @@ -227,6 +246,8 @@ pub fn build_cosmogony( filter_langs, )?; + assign_postcodes_to_zones(&mut zones, &parsed_pbf); + stats.compute(&zones); let cosmogony = Cosmogony { diff --git a/src/postcode.rs b/src/postcode.rs new file mode 100644 index 0000000..a04ea52 --- /dev/null +++ b/src/postcode.rs @@ -0,0 +1,100 @@ +use geo::algorithm::area::Area; +use geo::{Point, Rect}; +use geo_types::MultiPolygon; +use osm_boundaries_utils::build_boundary; +use osmpbfreader::{OsmId, OsmObj, Relation}; +use rstar::{RTreeObject, AABB}; +use std::collections::BTreeMap; + +#[derive(Debug, Clone)] +pub struct Postcode { + pub osm_id: String, + pub zipcode: String, + pub boundary: geo_types::MultiPolygon, + pub area: f64, +} + +impl Postcode { + pub fn get_boundary(&self) -> &geo_types::MultiPolygon { + &self.boundary + } + + pub fn unsigned_area(&self) -> f64 { + self.area + } + + /// create a zone from an osm relation and a geometry + pub fn from_osm_relation( + relation: &Relation, + objects: &BTreeMap, + ) -> Option { + // Skip postcode withjout postcode + let zipcode = match relation.tags.get("postal_code") { + Some(val) => val, + None => { + debug!( + "relation/{}: postcode region without name, skipped", + relation.id.0 + ); + "" + } + }; + + let osm_id = format!("relation:{}", relation.id.0.to_string()); + + let boundary = build_boundary(relation, objects); + + boundary.map(|boundary| { + let area = boundary.unsigned_area(); + Postcode { + osm_id, + zipcode: zipcode.to_string(), + boundary, + area, + } + }) + } +} + +impl Default for Postcode { + fn default() -> Self { + Postcode { + osm_id: "".into(), + boundary: MultiPolygon(vec![]), + zipcode: "".into(), + area: 0.0, + } + } +} + +#[derive(Debug)] +pub struct PostcodeBbox { + postcode: Postcode, + bbox: AABB>, +} + +impl PostcodeBbox { + pub fn new(postcode: Postcode, bbox: &Rect) -> Self { + PostcodeBbox { + postcode, + bbox: envelope(&bbox), + } + } + + pub fn get_postcode(&self) -> &Postcode { + &self.postcode + } +} + +impl RTreeObject for PostcodeBbox { + type Envelope = AABB>; + fn envelope(&self) -> Self::Envelope { + self.bbox + } +} + +fn envelope(bbox: &Rect) -> AABB> { + AABB::from_corners(bbox.min().into(), bbox.max().into()) +} + +impl Postcode {} diff --git a/src/postcode_service.rs b/src/postcode_service.rs new file mode 100644 index 0000000..48fd2b7 --- /dev/null +++ b/src/postcode_service.rs @@ -0,0 +1,78 @@ +use crate::is_postal_code; +use crate::postcode::{Postcode, PostcodeBbox}; +use cosmogony::Zone; +use geo::prelude::{Area, BoundingRect}; +use geo::{Point, Rect}; +use geo_booleanop::boolean::BooleanOp; +use osmpbfreader::{OsmId, OsmObj}; +use rstar::{RTree, AABB}; +use std::collections::BTreeMap; + +pub fn assign_postcodes_to_zones(zones: &mut Vec, pbf: &BTreeMap) { + use rayon::prelude::*; + + info!("Starting to extract postcodes."); + let postcodes = get_postcodes_from_pbf(pbf); + info!("Finished extracting {} postcodes, now starting to match postcodes and zones", postcodes.size()); + + zones.into_par_iter().for_each(|z| { + if let Some(boundary) = z.boundary.as_ref() { + if let Some(bbox) = z.bbox { + if z.zip_codes.is_empty() { + //info!("ZipCodes were empty for {:?}, trying to fill them", name); + z.zip_codes = postcodes + .locate_in_envelope_intersecting(&envelope(&bbox)) + .filter(|postcode_bbox| { + //info!(" - Candidate Postcode: {:?}", postcode_bbox.get_postcode().zipcode); + + let overlap_between_postcode_and_area = BooleanOp::intersection( + boundary, + postcode_bbox.get_postcode().get_boundary(), + ); + + // anteil überlappender Bereiches / Postcode: "Wieviel % des Postcodes sind von dieser Fläche befüllt" + let overlap_percentage_relative_to_postcode = + overlap_between_postcode_and_area.unsigned_area() + / postcode_bbox.get_postcode().unsigned_area(); + + //info!(" CHOSEN {} {:?}", overlap_percentage_relative_to_postcode, overlap_percentage_relative_to_postcode > 0.05); + // at least 5% des Postcodes müssen in der genannten Fläche liegen + overlap_percentage_relative_to_postcode > 0.05 + }) + .map(|x| x.get_postcode().zipcode.to_string()) + .collect(); + z.zip_codes.sort(); + } + } + } + }); + info!("Finished matching postcodes and zones."); +} + + +fn get_postcodes_from_pbf(pbf: &BTreeMap) -> RTree { + use rayon::prelude::*; + + let postcodes_list: Vec = pbf + .into_par_iter() + .filter_map(|(_, obj)| { + if !is_postal_code(obj) { + return None; + } + if let OsmObj::Relation(ref relation) = *obj { + if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { + // Ignore zone without boundary polygon for the moment + let bbox = postcode.boundary.bounding_rect().unwrap(); + return Some(PostcodeBbox::new(postcode, &bbox)); + }; + } + None + }) + .collect(); + + RTree::bulk_load(postcodes_list) +} + +fn envelope(bbox: &Rect) -> AABB> { + AABB::from_corners(bbox.min().into(), bbox.max().into()) +} diff --git a/src/zone_ext.rs b/src/zone_ext.rs index a78e229..8b3f367 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -14,6 +14,8 @@ use regex::Regex; use std::collections::{BTreeMap, BTreeSet}; use std::convert::TryInto; +use geo_types::MultiPolygon; + pub trait ZoneExt { /// create a zone from an osm node fn from_osm_node(node: &Node, index: ZoneIndex) -> Option; @@ -73,6 +75,7 @@ impl ZoneExt for Zone { .map(|s| s.to_string()) .sorted() .collect(); + let wikidata = tags.get("wikidata").map(|s| s.to_string()); let international_names = get_international_names(&tags, name); @@ -125,7 +128,11 @@ impl ZoneExt for Zone { .get("addr:postcode") .or_else(|| relation.tags.get("postal_code")) .map_or("", |val| &val[..]); - let zip_codes = zip_code + + let boundary: Option> = build_boundary(relation, objects); + let bbox = boundary.as_ref().and_then(|b| b.bounding_rect()); + + let zip_codes: Vec = zip_code .split(';') .filter(|s| !s.is_empty()) .map(|s| s.to_string()) @@ -152,9 +159,6 @@ impl ZoneExt for Zone { }) } - let boundary = build_boundary(relation, objects); - let bbox = boundary.as_ref().and_then(|b| b.bounding_rect()); - let refs = &relation.refs; let osm_center = refs .iter() @@ -213,9 +217,9 @@ impl ZoneExt for Zone { // In GEOS, "covers" is less strict than "contains". // eg: a polygon does NOT "contain" its boundary, but "covers" it. m_self.covers(m_other) - .map_err(|e| info!("impossible to compute geometries coverage for zone {:?}/{:?}: error {}", - &self.osm_id, &other.osm_id, e)) - .unwrap_or(false) + .map_err(|e| info!("impossible to compute geometries coverage for zone {:?}/{:?}: error {}", + &self.osm_id, &other.osm_id, e)) + .unwrap_or(false) } (&Err(ref e), _) => { info!(