From 80815189e7ca7a4907ea29362e12f0acf751717c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Thu, 26 Nov 2020 18:13:21 +0100 Subject: [PATCH 01/13] WIP - initially working --- cosmogony/src/lib.rs | 2 + cosmogony/src/postcode.rs | 30 ++++++++++++++ src/lib.rs | 70 ++++++++++++++++++++++++++++++--- src/postcode_ext.rs | 82 +++++++++++++++++++++++++++++++++++++++ src/zone_ext.rs | 57 ++++++++++++++++++++------- 5 files changed, 221 insertions(+), 20 deletions(-) create mode 100644 cosmogony/src/postcode.rs create mode 100644 src/postcode_ext.rs diff --git a/cosmogony/src/lib.rs b/cosmogony/src/lib.rs index 93d21c6..0480e71 100644 --- a/cosmogony/src/lib.rs +++ b/cosmogony/src/lib.rs @@ -3,7 +3,9 @@ mod model; pub mod mutable_slice; mod read; mod zone; +mod postcode; pub use model::{Cosmogony, CosmogonyMetadata, CosmogonyStats}; pub use read::{load_cosmogony_from_file, read_zones_from_file}; pub use zone::{Coord, Zone, ZoneIndex, ZoneType}; +pub use postcode::Postcode; diff --git a/cosmogony/src/postcode.rs b/cosmogony/src/postcode.rs new file mode 100644 index 0000000..a91640d --- /dev/null +++ b/cosmogony/src/postcode.rs @@ -0,0 +1,30 @@ +use crate::mutable_slice::MutableSlice; +use geo_types::{Coordinate, Geometry, MultiPolygon, Point, Rect}; +use log::warn; +use osmpbfreader::objects::Tags; +use serde::Serialize; +use serde_derive::*; +use std::collections::BTreeMap; +use std::fmt; + +pub type Coord = Point; + +#[derive(Debug, Clone)] +pub struct Postcode { + pub osm_id: String, + pub zipcode: String, + pub boundary: Option>, +} + +impl Default for Postcode { + fn default() -> Self { + Postcode { + osm_id: "".into(), + boundary: None, + zipcode: "".into(), + } + } +} + +impl Postcode { +} diff --git a/src/lib.rs b/src/lib.rs index 773ade9..85ae7ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,12 +7,13 @@ mod hierarchy_builder; pub mod merger; mod zone_ext; pub mod zone_typer; +mod postcode_ext; use crate::country_finder::CountryFinder; use crate::hierarchy_builder::{build_hierarchy, find_inclusions}; use additional_zones::compute_additional_cities; use cosmogony::mutable_slice::MutableSlice; -use cosmogony::{Cosmogony, CosmogonyMetadata, CosmogonyStats}; +use cosmogony::{Cosmogony, CosmogonyMetadata, CosmogonyStats, Postcode}; use failure::Error; use failure::ResultExt; use log::{debug, info}; @@ -24,6 +25,9 @@ use std::path::Path; use cosmogony::{Zone, ZoneIndex}; use crate::zone_ext::ZoneExt; +use crate::postcode_ext::{PostcodeExt, PostcodeBbox}; +use rstar::RTree; +use geo::bounding_rect::BoundingRect; #[rustfmt::skip] pub fn is_admin(obj: &OsmObj) -> bool { @@ -39,6 +43,21 @@ pub fn is_admin(obj: &OsmObj) -> bool { } } +#[rustfmt::skip] +pub fn is_postal_code(obj: &OsmObj) -> bool { + match *obj { + OsmObj::Relation(ref rel) => { + rel.tags + .get("boundary") + .map_or(false, |v| v == "postal_code") + && + rel.tags.get("postal_code").is_some() + } + _ => false, + } +} + + pub fn is_place(obj: &OsmObj) -> bool { match *obj { OsmObj::Node(ref node) => node @@ -49,19 +68,49 @@ pub fn is_place(obj: &OsmObj) -> bool { } } +pub fn get_postcodes( + pbf: &BTreeMap, +) -> Result<(RTree, CosmogonyStats), Error> { + + + let mut postcodes: Vec = Vec::with_capacity(1000); + + let stats = CosmogonyStats::default(); + + for obj in pbf.values() { + if let OsmObj::Relation(ref relation) = *obj { + if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { + // Ignore zone without boundary polygon for the moment + if postcode.boundary.is_some() { + let bbox = postcode.boundary.as_ref().and_then(|b| b.bounding_rect()).unwrap(); + postcodes.push(PostcodeBbox::new( + postcode, + &bbox + )); + } + }; + } + } + + let mut tree = RTree::bulk_load(postcodes); + + + + Ok((tree, stats)) +} + + pub fn get_zones_and_stats( pbf: &BTreeMap, + postcodes: &RTree ) -> Result<(Vec, CosmogonyStats), Error> { let stats = CosmogonyStats::default(); let mut zones = Vec::with_capacity(1000); for obj in pbf.values() { - if !is_admin(obj) { - continue; - } if let OsmObj::Relation(ref relation) = *obj { let next_index = ZoneIndex { index: zones.len() }; - if let Some(zone) = Zone::from_osm_relation(relation, pbf, next_index) { + if let Some(zone) = Zone::from_osm_relation(relation, pbf, next_index, postcodes) { // Ignore zone without boundary polygon for the moment if zone.boundary.is_some() { zones.push(zone); @@ -216,7 +265,16 @@ pub fn build_cosmogony( .context("invalid osm file")?; info!("reading pbf done."); - let (mut zones, mut stats) = get_zones_and_stats(&parsed_pbf)?; + info!("Reading postal codes"); + let file = File::open(&path).context("no pbf file")?; + let parsed_postal_code = OsmPbfReader::new(file) + .get_objs_and_deps(|o| is_postal_code(o)) + .context("invalid osm file")?; + info!("reading postal code from pbf done."); + + let (postcodes, mut stats2) = get_postcodes(&parsed_postal_code)?; + + let (mut zones, mut stats) = get_zones_and_stats(&parsed_pbf, &postcodes)?; create_ontology( &mut zones, diff --git a/src/postcode_ext.rs b/src/postcode_ext.rs new file mode 100644 index 0000000..3bdf65b --- /dev/null +++ b/src/postcode_ext.rs @@ -0,0 +1,82 @@ +// extends Zones to add some capabilities +// The Zone's capabilities have been split in order to hide some functions specific to cosmogony +// and that we do not want to expose in the model + +use cosmogony::{mutable_slice::MutableSlice, Coord, Zone, ZoneIndex, ZoneType, Postcode}; +use osm_boundaries_utils::build_boundary; +use osmpbfreader::objects::{OsmId, OsmObj, Relation}; +use std::collections::{BTreeMap, BTreeSet}; +use std::convert::TryInto; +use rstar::{RTreeObject, AABB}; +use geo::{Point, Rect}; + + +#[derive(Debug)] +pub struct PostcodeBbox { + postcode: Postcode, + bbox: AABB>, +} + +impl PostcodeBbox { + pub fn new(postcode: Postcode, bbox: &Rect) -> Self { + PostcodeBbox { + postcode, + bbox: envelope(&bbox), + } + } + + pub fn get_postcode(&self) -> &Postcode { + return &self.postcode; + } +} + + +impl RTreeObject for PostcodeBbox { + type Envelope = AABB>; + fn envelope(&self) -> Self::Envelope { + self.bbox + } +} + + +fn envelope(bbox: &Rect) -> AABB> { + AABB::from_corners(bbox.min().into(), bbox.max().into()) +} + +pub trait PostcodeExt { + /// create a zone from an osm relation and a geometry + fn from_osm_relation( + relation: &Relation, + objects: &BTreeMap, + ) -> Option; +} + +impl PostcodeExt for Postcode { + + fn from_osm_relation( + relation: &Relation, + objects: &BTreeMap, + ) -> Option { + // Skip postcode withjout postcode + let zipcode = match relation.tags.get("postal_code") { + Some(val) => val, + None => { + debug!( + "relation/{}: postcode region without name, skipped", + relation.id.0 + ); + "" + } + }; + + let osm_id = format!("relation:{}", relation.id.0.to_string()); + + let boundary = build_boundary(relation, objects); + + Some(Postcode { + osm_id, + zipcode: zipcode.to_string(), + boundary, + }) + } +} diff --git a/src/zone_ext.rs b/src/zone_ext.rs index a78e229..f2ec016 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -2,7 +2,7 @@ // The Zone's capabilities have been split in order to hide some functions specific to cosmogony // and that we do not want to expose in the model -use cosmogony::{mutable_slice::MutableSlice, Coord, Zone, ZoneIndex, ZoneType}; +use cosmogony::{mutable_slice::MutableSlice, Coord, Zone, ZoneIndex, ZoneType, Postcode}; use geo::algorithm::bounding_rect::BoundingRect; use geo::prelude::Contains; use geos::Geom; @@ -13,6 +13,10 @@ use osmpbfreader::objects::{Node, OsmId, OsmObj, Relation, Tags}; use regex::Regex; use std::collections::{BTreeMap, BTreeSet}; use std::convert::TryInto; +use rstar::{RTree, AABB, RTreeObject}; +use geo::{Rect, Point}; +use geo::intersects::Intersects; +use crate::postcode_ext::PostcodeBbox; pub trait ZoneExt { /// create a zone from an osm node @@ -23,6 +27,7 @@ pub trait ZoneExt { relation: &Relation, objects: &BTreeMap, index: ZoneIndex, + postcodes: &RTree, ) -> Option; /// check is a zone contains another zone @@ -101,6 +106,7 @@ impl ZoneExt for Zone { relation: &Relation, objects: &BTreeMap, index: ZoneIndex, + postcodes: &RTree, ) -> Option { use geo::centroid::Centroid; @@ -125,12 +131,33 @@ impl ZoneExt for Zone { .get("addr:postcode") .or_else(|| relation.tags.get("postal_code")) .map_or("", |val| &val[..]); - let zip_codes = zip_code + + let boundary = build_boundary(relation, objects); + let bbox = boundary.as_ref().and_then(|b| b.bounding_rect()); + + let mut zip_codes: Vec = zip_code .split(';') .filter(|s| !s.is_empty()) .map(|s| s.to_string()) .sorted() .collect(); + //if let Some(boundary) = boundary { + if let Some(bbox) = bbox { + if (zip_codes.is_empty()) { + zip_codes = postcodes.locate_in_envelope_intersecting(&envelope(bbox)) + // TODO: fine-grained intersection + /*.filter(|x| + x.get_postcode().boundary.and_then(|b| + b.intersects(boundary) + ) + )*/ + .map(|x| x.get_postcode().zipcode.to_string()) + .collect(); + + info!("ZipCodes were empty, trying to find it {:?}", zip_codes); + } + } + //} let wikidata = relation.tags.get("wikidata").map(|s| s.to_string()); let osm_id = format!("relation:{}", relation.id.0.to_string()); @@ -152,9 +179,6 @@ impl ZoneExt for Zone { }) } - let boundary = build_boundary(relation, objects); - let bbox = boundary.as_ref().and_then(|b| b.bounding_rect()); - let refs = &relation.refs; let osm_center = refs .iter() @@ -213,9 +237,9 @@ impl ZoneExt for Zone { // In GEOS, "covers" is less strict than "contains". // eg: a polygon does NOT "contain" its boundary, but "covers" it. m_self.covers(m_other) - .map_err(|e| info!("impossible to compute geometries coverage for zone {:?}/{:?}: error {}", - &self.osm_id, &other.osm_id, e)) - .unwrap_or(false) + .map_err(|e| info!("impossible to compute geometries coverage for zone {:?}/{:?}: error {}", + &self.osm_id, &other.osm_id, e)) + .unwrap_or(false) } (&Err(ref e), _) => { info!( @@ -303,7 +327,7 @@ impl ZoneExt for Zone { // * for all cities where these entities are not explicitly distinct if (self.wikidata.is_some() && self.wikidata == center_wikidata) || (self.zone_type == Some(ZoneType::City) - && (center_wikidata.is_none() || self.wikidata.is_none())) + && (center_wikidata.is_none() || self.wikidata.is_none())) { let center_names: Vec<_> = self .center_tags @@ -328,8 +352,8 @@ impl ZoneExt for Zone { } fn create_lbl<'a, F>(zone: &'a Zone, all_zones: &'a MutableSlice<'_>, f: F) -> String -where - F: Fn(&Zone) -> String, + where + F: Fn(&Zone) -> String, { let mut hierarchy: Vec = zone.iter_hierarchy(all_zones).map(f).dedup().collect(); @@ -359,6 +383,11 @@ fn format_zip_code(zip_codes: &[String]) -> String { } } +fn envelope(bbox: Rect) -> AABB> { + AABB::from_corners(bbox.min().into(), bbox.max().into()) +} + + /// get all the international names from the osm tags /// /// the names in osm are in a tag names `name:`, @@ -474,9 +503,9 @@ mod test { ("name", "bobito"), ("name:a_strange_lang_name", "bibi"), ] - .into_iter() - .map(|(k, v)| (k.into(), v.into())) - .collect(); + .into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(); let names = get_international_names(&tags, "bob"); From 34c11c4f4fb37a26f2ff510446ac4647fa9c87d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Thu, 26 Nov 2020 19:22:56 +0100 Subject: [PATCH 02/13] FEATURE: more fine grained intersection matching --- cosmogony/src/postcode.rs | 9 +++++++-- src/zone_ext.rs | 25 +++++++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/cosmogony/src/postcode.rs b/cosmogony/src/postcode.rs index a91640d..bf70ec1 100644 --- a/cosmogony/src/postcode.rs +++ b/cosmogony/src/postcode.rs @@ -16,6 +16,12 @@ pub struct Postcode { pub boundary: Option>, } +impl Postcode { + pub fn get_boundary(&self) -> Option<&geo_types::MultiPolygon> { + return self.boundary.as_ref() + } +} + impl Default for Postcode { fn default() -> Self { Postcode { @@ -26,5 +32,4 @@ impl Default for Postcode { } } -impl Postcode { -} +impl Postcode {} diff --git a/src/zone_ext.rs b/src/zone_ext.rs index f2ec016..f29d121 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -78,6 +78,7 @@ impl ZoneExt for Zone { .map(|s| s.to_string()) .sorted() .collect(); + let wikidata = tags.get("wikidata").map(|s| s.to_string()); let international_names = get_international_names(&tags, name); @@ -141,23 +142,27 @@ impl ZoneExt for Zone { .map(|s| s.to_string()) .sorted() .collect(); - //if let Some(boundary) = boundary { + if let Some(boundary) = boundary.as_ref() { if let Some(bbox) = bbox { if (zip_codes.is_empty()) { + info!("ZipCodes were empty for {:?}, trying to fill them", name); zip_codes = postcodes.locate_in_envelope_intersecting(&envelope(bbox)) - // TODO: fine-grained intersection - /*.filter(|x| - x.get_postcode().boundary.and_then(|b| - b.intersects(boundary) - ) - )*/ + .filter(|x| { + info!(" - Candidate Postcode: {:?}", x.get_postcode().zipcode); + + if let Some(b) = x.get_postcode().get_boundary() { + info!(" CHOSEN"); + boundary.intersects(b) + } else { + info!(" NOT CHOSEN"); + false + } + }) .map(|x| x.get_postcode().zipcode.to_string()) .collect(); - - info!("ZipCodes were empty, trying to find it {:?}", zip_codes); } } - //} + } let wikidata = relation.tags.get("wikidata").map(|s| s.to_string()); let osm_id = format!("relation:{}", relation.id.0.to_string()); From 2f7c67365c86cb9014abefbff04e8d4c405c7e52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Thu, 26 Nov 2020 21:05:20 +0100 Subject: [PATCH 03/13] FEATURE: intersection matching with % inclusion --- Cargo.lock | 30 +++++++++++++++++++++++++++++- Cargo.toml | 1 + cosmogony/src/postcode.rs | 10 +++++----- src/lib.rs | 12 +++++------- src/postcode_ext.rs | 2 +- src/zone_ext.rs | 26 ++++++++++++++++++++------ 6 files changed, 61 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45552cd..4115567 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,6 +168,7 @@ dependencies = [ "failure_derive", "flate2", "geo", + "geo-booleanop", "geo-types", "geojson", "geos", @@ -318,6 +319,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float_next_after" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fc612c5837986b7104a87a0df74a5460931f1c5274be12f8d0f40aa2f30d632" +dependencies = [ + "num-traits", +] + [[package]] name = "futures" version = "0.1.30" @@ -371,10 +381,22 @@ dependencies = [ "geo-types", "geographiclib-rs", "num-traits", - "robust", + "robust 0.2.2", "rstar", ] +[[package]] +name = "geo-booleanop" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b68ff17c819296610e9eee41bdf5eb37f71b09870a95f0086e07d8fcc1f45b" +dependencies = [ + "float_next_after", + "geo-types", + "num-traits", + "robust 0.1.2", +] + [[package]] name = "geo-types" version = "0.6.1" @@ -872,6 +894,12 @@ dependencies = [ "syn", ] +[[package]] +name = "robust" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222155e5550abd9100afdcdefbd08aa1b856d226b60e327044ec21b1ece2f78e" + [[package]] name = "robust" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 2985a5b..fe79cee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ flate2 = "1.0" rayon = "1.5" include_dir = "0.6" rstar = "0.8" +geo-booleanop = "0.3.0" [dev-dependencies] approx = "0.3" diff --git a/cosmogony/src/postcode.rs b/cosmogony/src/postcode.rs index bf70ec1..96f7ca0 100644 --- a/cosmogony/src/postcode.rs +++ b/cosmogony/src/postcode.rs @@ -1,5 +1,5 @@ use crate::mutable_slice::MutableSlice; -use geo_types::{Coordinate, Geometry, MultiPolygon, Point, Rect}; +use geo_types::{Coordinate, Geometry, MultiPolygon, Point, Rect, Polygon}; use log::warn; use osmpbfreader::objects::Tags; use serde::Serialize; @@ -13,12 +13,12 @@ pub type Coord = Point; pub struct Postcode { pub osm_id: String, pub zipcode: String, - pub boundary: Option>, + pub boundary: geo_types::MultiPolygon, } impl Postcode { - pub fn get_boundary(&self) -> Option<&geo_types::MultiPolygon> { - return self.boundary.as_ref() + pub fn get_boundary(&self) -> &geo_types::MultiPolygon { + return &self.boundary } } @@ -26,7 +26,7 @@ impl Default for Postcode { fn default() -> Self { Postcode { osm_id: "".into(), - boundary: None, + boundary: MultiPolygon(vec![]), zipcode: "".into(), } } diff --git a/src/lib.rs b/src/lib.rs index 85ae7ff..e99e5b7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,13 +81,11 @@ pub fn get_postcodes( if let OsmObj::Relation(ref relation) = *obj { if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { // Ignore zone without boundary polygon for the moment - if postcode.boundary.is_some() { - let bbox = postcode.boundary.as_ref().and_then(|b| b.bounding_rect()).unwrap(); - postcodes.push(PostcodeBbox::new( - postcode, - &bbox - )); - } + let bbox = postcode.boundary.bounding_rect().unwrap(); + postcodes.push(PostcodeBbox::new( + postcode, + &bbox + )); }; } } diff --git a/src/postcode_ext.rs b/src/postcode_ext.rs index 3bdf65b..a1ab1e5 100644 --- a/src/postcode_ext.rs +++ b/src/postcode_ext.rs @@ -73,7 +73,7 @@ impl PostcodeExt for Postcode { let boundary = build_boundary(relation, objects); - Some(Postcode { + boundary.map(|boundary| Postcode { osm_id, zipcode: zipcode.to_string(), boundary, diff --git a/src/zone_ext.rs b/src/zone_ext.rs index f29d121..02670f5 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -17,6 +17,12 @@ use rstar::{RTree, AABB, RTreeObject}; use geo::{Rect, Point}; use geo::intersects::Intersects; use crate::postcode_ext::PostcodeBbox; +use geo_booleanop::boolean::BooleanOp; + +use geo_booleanop; +use geo; +use geo_types::MultiPolygon; +use geo::algorithm::area::Area; pub trait ZoneExt { /// create a zone from an osm node @@ -133,7 +139,7 @@ impl ZoneExt for Zone { .or_else(|| relation.tags.get("postal_code")) .map_or("", |val| &val[..]); - let boundary = build_boundary(relation, objects); + let boundary:Option> = build_boundary(relation, objects); let bbox = boundary.as_ref().and_then(|b| b.bounding_rect()); let mut zip_codes: Vec = zip_code @@ -147,16 +153,24 @@ impl ZoneExt for Zone { if (zip_codes.is_empty()) { info!("ZipCodes were empty for {:?}, trying to fill them", name); zip_codes = postcodes.locate_in_envelope_intersecting(&envelope(bbox)) - .filter(|x| { - info!(" - Candidate Postcode: {:?}", x.get_postcode().zipcode); + .filter(|postcode| { + info!(" - Candidate Postcode: {:?}", postcode.get_postcode().zipcode); + + let postcodeBoundary = postcode.get_postcode().get_boundary(); + if boundary.intersects(postcodeBoundary) { + let x = BooleanOp::intersection(boundary, postcodeBoundary); - if let Some(b) = x.get_postcode().get_boundary() { - info!(" CHOSEN"); - boundary.intersects(b) + // anteil überlappender Bereiches / Postcode: "Wieviel % des Postcodes sind von dieser Fläche befüllt" + let percentage = x.unsigned_area() / postcodeBoundary.unsigned_area(); // TODO: cache postcodeBoundary size + + info!(" CHOSEN {} {:?}", percentage, percentage > 0.05); + // at least 5% des Postcodes müssen in der genannten Fläche liegen + percentage > 0.05 } else { info!(" NOT CHOSEN"); false } + }) .map(|x| x.get_postcode().zipcode.to_string()) .collect(); From 59d6358ee239520d35751d8983a9abf52cbe857b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Thu, 26 Nov 2020 21:12:11 +0100 Subject: [PATCH 04/13] TASK: code cleanups --- cosmogony/src/postcode.rs | 6 ++++++ src/postcode_ext.rs | 13 +++++++++---- src/zone_ext.rs | 30 ++++++++++++------------------ 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/cosmogony/src/postcode.rs b/cosmogony/src/postcode.rs index 96f7ca0..35ab840 100644 --- a/cosmogony/src/postcode.rs +++ b/cosmogony/src/postcode.rs @@ -14,12 +14,17 @@ pub struct Postcode { pub osm_id: String, pub zipcode: String, pub boundary: geo_types::MultiPolygon, + pub area: f64 } impl Postcode { pub fn get_boundary(&self) -> &geo_types::MultiPolygon { return &self.boundary } + + pub fn unsigned_area(&self) -> f64 { + return self.area; + } } impl Default for Postcode { @@ -28,6 +33,7 @@ impl Default for Postcode { osm_id: "".into(), boundary: MultiPolygon(vec![]), zipcode: "".into(), + area: 0.0 } } } diff --git a/src/postcode_ext.rs b/src/postcode_ext.rs index a1ab1e5..b392810 100644 --- a/src/postcode_ext.rs +++ b/src/postcode_ext.rs @@ -9,6 +9,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::convert::TryInto; use rstar::{RTreeObject, AABB}; use geo::{Point, Rect}; +use geo::algorithm::area::Area; #[derive(Debug)] @@ -73,10 +74,14 @@ impl PostcodeExt for Postcode { let boundary = build_boundary(relation, objects); - boundary.map(|boundary| Postcode { - osm_id, - zipcode: zipcode.to_string(), - boundary, + boundary.map(|boundary| { + let area = boundary.unsigned_area(); + Postcode { + osm_id, + zipcode: zipcode.to_string(), + boundary, + area + } }) } } diff --git a/src/zone_ext.rs b/src/zone_ext.rs index 02670f5..d72b8c5 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -151,25 +151,19 @@ impl ZoneExt for Zone { if let Some(boundary) = boundary.as_ref() { if let Some(bbox) = bbox { if (zip_codes.is_empty()) { - info!("ZipCodes were empty for {:?}, trying to fill them", name); + //info!("ZipCodes were empty for {:?}, trying to fill them", name); zip_codes = postcodes.locate_in_envelope_intersecting(&envelope(bbox)) - .filter(|postcode| { - info!(" - Candidate Postcode: {:?}", postcode.get_postcode().zipcode); - - let postcodeBoundary = postcode.get_postcode().get_boundary(); - if boundary.intersects(postcodeBoundary) { - let x = BooleanOp::intersection(boundary, postcodeBoundary); - - // anteil überlappender Bereiches / Postcode: "Wieviel % des Postcodes sind von dieser Fläche befüllt" - let percentage = x.unsigned_area() / postcodeBoundary.unsigned_area(); // TODO: cache postcodeBoundary size - - info!(" CHOSEN {} {:?}", percentage, percentage > 0.05); - // at least 5% des Postcodes müssen in der genannten Fläche liegen - percentage > 0.05 - } else { - info!(" NOT CHOSEN"); - false - } + .filter(|postcode_bbox| { + //info!(" - Candidate Postcode: {:?}", postcode_bbox.get_postcode().zipcode); + + let overlap_between_postcode_and_area = BooleanOp::intersection(boundary, postcode_bbox.get_postcode().get_boundary()); + + // anteil überlappender Bereiches / Postcode: "Wieviel % des Postcodes sind von dieser Fläche befüllt" + let overlap_percentage_relative_to_postcode = overlap_between_postcode_and_area.unsigned_area() / postcode_bbox.get_postcode().unsigned_area(); + + //info!(" CHOSEN {} {:?}", overlap_percentage_relative_to_postcode, overlap_percentage_relative_to_postcode > 0.05); + // at least 5% des Postcodes müssen in der genannten Fläche liegen + overlap_percentage_relative_to_postcode > 0.05 }) .map(|x| x.get_postcode().zipcode.to_string()) From d2e6baa690914b4e8d87f6354ac9e349273683a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Tue, 1 Dec 2020 21:25:03 +0100 Subject: [PATCH 05/13] TASK: move Postcode model around --- cosmogony/src/lib.rs | 2 -- src/lib.rs | 4 +++- {cosmogony/src => src}/postcode.rs | 2 +- src/postcode_ext.rs | 3 ++- src/zone_ext.rs | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) rename {cosmogony/src => src}/postcode.rs (95%) diff --git a/cosmogony/src/lib.rs b/cosmogony/src/lib.rs index 0480e71..93d21c6 100644 --- a/cosmogony/src/lib.rs +++ b/cosmogony/src/lib.rs @@ -3,9 +3,7 @@ mod model; pub mod mutable_slice; mod read; mod zone; -mod postcode; pub use model::{Cosmogony, CosmogonyMetadata, CosmogonyStats}; pub use read::{load_cosmogony_from_file, read_zones_from_file}; pub use zone::{Coord, Zone, ZoneIndex, ZoneType}; -pub use postcode::Postcode; diff --git a/src/lib.rs b/src/lib.rs index e99e5b7..f8a94e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,12 +8,13 @@ pub mod merger; mod zone_ext; pub mod zone_typer; mod postcode_ext; +mod postcode; use crate::country_finder::CountryFinder; use crate::hierarchy_builder::{build_hierarchy, find_inclusions}; use additional_zones::compute_additional_cities; use cosmogony::mutable_slice::MutableSlice; -use cosmogony::{Cosmogony, CosmogonyMetadata, CosmogonyStats, Postcode}; +use cosmogony::{Cosmogony, CosmogonyMetadata, CosmogonyStats}; use failure::Error; use failure::ResultExt; use log::{debug, info}; @@ -28,6 +29,7 @@ use crate::zone_ext::ZoneExt; use crate::postcode_ext::{PostcodeExt, PostcodeBbox}; use rstar::RTree; use geo::bounding_rect::BoundingRect; +use crate::postcode::Postcode; #[rustfmt::skip] pub fn is_admin(obj: &OsmObj) -> bool { diff --git a/cosmogony/src/postcode.rs b/src/postcode.rs similarity index 95% rename from cosmogony/src/postcode.rs rename to src/postcode.rs index 35ab840..ae69d69 100644 --- a/cosmogony/src/postcode.rs +++ b/src/postcode.rs @@ -1,4 +1,4 @@ -use crate::mutable_slice::MutableSlice; +use cosmogony::mutable_slice::MutableSlice; use geo_types::{Coordinate, Geometry, MultiPolygon, Point, Rect, Polygon}; use log::warn; use osmpbfreader::objects::Tags; diff --git a/src/postcode_ext.rs b/src/postcode_ext.rs index b392810..de667d2 100644 --- a/src/postcode_ext.rs +++ b/src/postcode_ext.rs @@ -2,7 +2,7 @@ // The Zone's capabilities have been split in order to hide some functions specific to cosmogony // and that we do not want to expose in the model -use cosmogony::{mutable_slice::MutableSlice, Coord, Zone, ZoneIndex, ZoneType, Postcode}; +use cosmogony::{mutable_slice::MutableSlice, Coord, Zone, ZoneIndex, ZoneType}; use osm_boundaries_utils::build_boundary; use osmpbfreader::objects::{OsmId, OsmObj, Relation}; use std::collections::{BTreeMap, BTreeSet}; @@ -10,6 +10,7 @@ use std::convert::TryInto; use rstar::{RTreeObject, AABB}; use geo::{Point, Rect}; use geo::algorithm::area::Area; +use crate::postcode::Postcode; #[derive(Debug)] diff --git a/src/zone_ext.rs b/src/zone_ext.rs index d72b8c5..9124ae8 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -2,7 +2,7 @@ // The Zone's capabilities have been split in order to hide some functions specific to cosmogony // and that we do not want to expose in the model -use cosmogony::{mutable_slice::MutableSlice, Coord, Zone, ZoneIndex, ZoneType, Postcode}; +use cosmogony::{mutable_slice::MutableSlice, Coord, Zone, ZoneIndex, ZoneType}; use geo::algorithm::bounding_rect::BoundingRect; use geo::prelude::Contains; use geos::Geom; From 26a360a39c53567ff6316db8d8c8ae62546115e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Tue, 1 Dec 2020 21:28:24 +0100 Subject: [PATCH 06/13] BUGFIX: fix compiler warnings --- src/lib.rs | 4 ++-- src/postcode.rs | 11 +---------- src/postcode_ext.rs | 4 +--- src/zone_ext.rs | 5 ++--- 4 files changed, 6 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f8a94e7..b969bb3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,7 +92,7 @@ pub fn get_postcodes( } } - let mut tree = RTree::bulk_load(postcodes); + let tree = RTree::bulk_load(postcodes); @@ -272,7 +272,7 @@ pub fn build_cosmogony( .context("invalid osm file")?; info!("reading postal code from pbf done."); - let (postcodes, mut stats2) = get_postcodes(&parsed_postal_code)?; + let (postcodes,_) = get_postcodes(&parsed_postal_code)?; let (mut zones, mut stats) = get_zones_and_stats(&parsed_pbf, &postcodes)?; diff --git a/src/postcode.rs b/src/postcode.rs index ae69d69..95ec285 100644 --- a/src/postcode.rs +++ b/src/postcode.rs @@ -1,13 +1,4 @@ -use cosmogony::mutable_slice::MutableSlice; -use geo_types::{Coordinate, Geometry, MultiPolygon, Point, Rect, Polygon}; -use log::warn; -use osmpbfreader::objects::Tags; -use serde::Serialize; -use serde_derive::*; -use std::collections::BTreeMap; -use std::fmt; - -pub type Coord = Point; +use geo_types::{MultiPolygon}; #[derive(Debug, Clone)] pub struct Postcode { diff --git a/src/postcode_ext.rs b/src/postcode_ext.rs index de667d2..0bf0182 100644 --- a/src/postcode_ext.rs +++ b/src/postcode_ext.rs @@ -2,11 +2,9 @@ // The Zone's capabilities have been split in order to hide some functions specific to cosmogony // and that we do not want to expose in the model -use cosmogony::{mutable_slice::MutableSlice, Coord, Zone, ZoneIndex, ZoneType}; use osm_boundaries_utils::build_boundary; use osmpbfreader::objects::{OsmId, OsmObj, Relation}; -use std::collections::{BTreeMap, BTreeSet}; -use std::convert::TryInto; +use std::collections::{BTreeMap}; use rstar::{RTreeObject, AABB}; use geo::{Point, Rect}; use geo::algorithm::area::Area; diff --git a/src/zone_ext.rs b/src/zone_ext.rs index 9124ae8..52c094c 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -13,9 +13,8 @@ use osmpbfreader::objects::{Node, OsmId, OsmObj, Relation, Tags}; use regex::Regex; use std::collections::{BTreeMap, BTreeSet}; use std::convert::TryInto; -use rstar::{RTree, AABB, RTreeObject}; +use rstar::{RTree, AABB}; use geo::{Rect, Point}; -use geo::intersects::Intersects; use crate::postcode_ext::PostcodeBbox; use geo_booleanop::boolean::BooleanOp; @@ -150,7 +149,7 @@ impl ZoneExt for Zone { .collect(); if let Some(boundary) = boundary.as_ref() { if let Some(bbox) = bbox { - if (zip_codes.is_empty()) { + if zip_codes.is_empty() { //info!("ZipCodes were empty for {:?}, trying to fill them", name); zip_codes = postcodes.locate_in_envelope_intersecting(&envelope(bbox)) .filter(|postcode_bbox| { From 0c73aab21004176f3341a77bc831ebd127951356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Tue, 1 Dec 2020 21:42:17 +0100 Subject: [PATCH 07/13] BUGFIX: do not read OSM file twice for getting postcodes --- src/lib.rs | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b969bb3..39243db 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,13 +73,14 @@ pub fn is_place(obj: &OsmObj) -> bool { pub fn get_postcodes( pbf: &BTreeMap, ) -> Result<(RTree, CosmogonyStats), Error> { - - let mut postcodes: Vec = Vec::with_capacity(1000); let stats = CosmogonyStats::default(); for obj in pbf.values() { + if !is_postal_code(obj) { + continue; + } if let OsmObj::Relation(ref relation) = *obj { if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { // Ignore zone without boundary polygon for the moment @@ -94,8 +95,6 @@ pub fn get_postcodes( let tree = RTree::bulk_load(postcodes); - - Ok((tree, stats)) } @@ -108,6 +107,9 @@ pub fn get_zones_and_stats( let mut zones = Vec::with_capacity(1000); for obj in pbf.values() { + if !is_admin(obj) { + continue; + } if let OsmObj::Relation(ref relation) = *obj { let next_index = ZoneIndex { index: zones.len() }; if let Some(zone) = Zone::from_osm_relation(relation, pbf, next_index, postcodes) { @@ -261,18 +263,13 @@ pub fn build_cosmogony( let file = File::open(&path).context("no pbf file")?; let parsed_pbf = OsmPbfReader::new(file) - .get_objs_and_deps(|o| is_admin(o) || is_place(o)) + .get_objs_and_deps(|o| is_admin(o) || is_place(o) || is_postal_code(o)) .context("invalid osm file")?; info!("reading pbf done."); - info!("Reading postal codes"); - let file = File::open(&path).context("no pbf file")?; - let parsed_postal_code = OsmPbfReader::new(file) - .get_objs_and_deps(|o| is_postal_code(o)) - .context("invalid osm file")?; - info!("reading postal code from pbf done."); - - let (postcodes,_) = get_postcodes(&parsed_postal_code)?; + info!("Starting to extract postcodes."); + let (postcodes,_) = get_postcodes(&parsed_pbf)?; + info!("Finished extracting postcodes."); let (mut zones, mut stats) = get_zones_and_stats(&parsed_pbf, &postcodes)?; From 0e16fb19f166df2d5d329070f4f490c4585fc4c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Tue, 1 Dec 2020 22:09:52 +0100 Subject: [PATCH 08/13] TASK: use rayon for reading postcodes --- src/lib.rs | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 39243db..8532dbc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,25 +73,28 @@ pub fn is_place(obj: &OsmObj) -> bool { pub fn get_postcodes( pbf: &BTreeMap, ) -> Result<(RTree, CosmogonyStats), Error> { - let mut postcodes: Vec = Vec::with_capacity(1000); + use rayon::prelude::*; let stats = CosmogonyStats::default(); - for obj in pbf.values() { - if !is_postal_code(obj) { - continue; - } - if let OsmObj::Relation(ref relation) = *obj { - if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { - // Ignore zone without boundary polygon for the moment - let bbox = postcode.boundary.bounding_rect().unwrap(); - postcodes.push(PostcodeBbox::new( - postcode, - &bbox - )); - }; - } - } + let postcodes: Vec = pbf.into_par_iter() + .filter_map(|(_, obj)| { + if !is_postal_code(obj) { + return None; + } + if let OsmObj::Relation(ref relation) = *obj { + if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { + // Ignore zone without boundary polygon for the moment + let bbox = postcode.boundary.bounding_rect().unwrap(); + return Some(PostcodeBbox::new( + postcode, + &bbox + )); + }; + } + return None; + }) + .collect(); let tree = RTree::bulk_load(postcodes); @@ -269,9 +272,11 @@ pub fn build_cosmogony( info!("Starting to extract postcodes."); let (postcodes,_) = get_postcodes(&parsed_pbf)?; - info!("Finished extracting postcodes."); + info!("Finished extracting postcodes {}", postcodes.size()); + info!("Starting to extract zones."); let (mut zones, mut stats) = get_zones_and_stats(&parsed_pbf, &postcodes)?; + info!("Finishing to extract zones."); create_ontology( &mut zones, @@ -282,7 +287,7 @@ pub fn build_cosmogony( filter_langs, )?; - stats.compute(&zones); + stats.compute(&zones); let cosmogony = Cosmogony { zones, From 20edf9e2b1357ca213392a51ac16154ab9a8e9a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Wed, 2 Dec 2020 20:32:10 +0100 Subject: [PATCH 09/13] TASK: restructure and parallelize postcode assignment --- src/lib.rs | 57 ++++++--------------------- src/postcode.rs | 73 ++++++++++++++++++++++++++++++++++ src/postcode_ext.rs | 86 ----------------------------------------- src/postcode_service.rs | 76 ++++++++++++++++++++++++++++++++++++ src/zone_ext.rs | 41 ++------------------ 5 files changed, 164 insertions(+), 169 deletions(-) delete mode 100644 src/postcode_ext.rs create mode 100644 src/postcode_service.rs diff --git a/src/lib.rs b/src/lib.rs index 8532dbc..bc9609d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,8 @@ mod hierarchy_builder; pub mod merger; mod zone_ext; pub mod zone_typer; -mod postcode_ext; mod postcode; +mod postcode_service; use crate::country_finder::CountryFinder; use crate::hierarchy_builder::{build_hierarchy, find_inclusions}; @@ -26,10 +26,7 @@ use std::path::Path; use cosmogony::{Zone, ZoneIndex}; use crate::zone_ext::ZoneExt; -use crate::postcode_ext::{PostcodeExt, PostcodeBbox}; -use rstar::RTree; -use geo::bounding_rect::BoundingRect; -use crate::postcode::Postcode; +use crate::postcode_service::{get_postcodes, assign_postcodes_to_zones}; #[rustfmt::skip] pub fn is_admin(obj: &OsmObj) -> bool { @@ -70,41 +67,9 @@ pub fn is_place(obj: &OsmObj) -> bool { } } -pub fn get_postcodes( - pbf: &BTreeMap, -) -> Result<(RTree, CosmogonyStats), Error> { - use rayon::prelude::*; - - let stats = CosmogonyStats::default(); - - let postcodes: Vec = pbf.into_par_iter() - .filter_map(|(_, obj)| { - if !is_postal_code(obj) { - return None; - } - if let OsmObj::Relation(ref relation) = *obj { - if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { - // Ignore zone without boundary polygon for the moment - let bbox = postcode.boundary.bounding_rect().unwrap(); - return Some(PostcodeBbox::new( - postcode, - &bbox - )); - }; - } - return None; - }) - .collect(); - - let tree = RTree::bulk_load(postcodes); - - Ok((tree, stats)) -} - pub fn get_zones_and_stats( - pbf: &BTreeMap, - postcodes: &RTree + pbf: &BTreeMap ) -> Result<(Vec, CosmogonyStats), Error> { let stats = CosmogonyStats::default(); let mut zones = Vec::with_capacity(1000); @@ -115,7 +80,7 @@ pub fn get_zones_and_stats( } if let OsmObj::Relation(ref relation) = *obj { let next_index = ZoneIndex { index: zones.len() }; - if let Some(zone) = Zone::from_osm_relation(relation, pbf, next_index, postcodes) { + if let Some(zone) = Zone::from_osm_relation(relation, pbf, next_index) { // Ignore zone without boundary polygon for the moment if zone.boundary.is_some() { zones.push(zone); @@ -270,12 +235,8 @@ pub fn build_cosmogony( .context("invalid osm file")?; info!("reading pbf done."); - info!("Starting to extract postcodes."); - let (postcodes,_) = get_postcodes(&parsed_pbf)?; - info!("Finished extracting postcodes {}", postcodes.size()); - info!("Starting to extract zones."); - let (mut zones, mut stats) = get_zones_and_stats(&parsed_pbf, &postcodes)?; + let (mut zones, mut stats) = get_zones_and_stats(&parsed_pbf)?; info!("Finishing to extract zones."); create_ontology( @@ -287,7 +248,13 @@ pub fn build_cosmogony( filter_langs, )?; - stats.compute(&zones); + info!("Starting to extract postcodes."); + let postcodes = get_postcodes(&parsed_pbf)?; + info!("Finished extracting postcodes {}", postcodes.size()); + + assign_postcodes_to_zones(&mut zones, &postcodes); + + stats.compute(&zones); let cosmogony = Cosmogony { zones, diff --git a/src/postcode.rs b/src/postcode.rs index 95ec285..d1612bd 100644 --- a/src/postcode.rs +++ b/src/postcode.rs @@ -1,4 +1,10 @@ use geo_types::{MultiPolygon}; +use osmpbfreader::{Relation, OsmId, OsmObj}; +use std::collections::BTreeMap; +use osm_boundaries_utils::build_boundary; +use geo::algorithm::area::Area; +use rstar::{AABB, RTreeObject}; +use geo::{Point, Rect}; #[derive(Debug, Clone)] pub struct Postcode { @@ -16,6 +22,38 @@ impl Postcode { pub fn unsigned_area(&self) -> f64 { return self.area; } + + /// create a zone from an osm relation and a geometry + pub fn from_osm_relation( + relation: &Relation, + objects: &BTreeMap, + ) -> Option { +// Skip postcode withjout postcode + let zipcode = match relation.tags.get("postal_code") { + Some(val) => val, + None => { + debug!( + "relation/{}: postcode region without name, skipped", + relation.id.0 + ); + "" + } + }; + + let osm_id = format!("relation:{}", relation.id.0.to_string()); + + let boundary = build_boundary(relation, objects); + + boundary.map(|boundary| { + let area = boundary.unsigned_area(); + Postcode { + osm_id, + zipcode: zipcode.to_string(), + boundary, + area + } + }) + } } impl Default for Postcode { @@ -29,4 +67,39 @@ impl Default for Postcode { } } + +#[derive(Debug)] +pub struct PostcodeBbox { + postcode: Postcode, + bbox: AABB>, +} + +impl PostcodeBbox { + pub fn new(postcode: Postcode, bbox: &Rect) -> Self { + PostcodeBbox { + postcode, + bbox: envelope(&bbox), + } + } + + pub fn get_postcode(&self) -> &Postcode { + return &self.postcode; + } +} + + +impl RTreeObject for PostcodeBbox { + type Envelope = AABB>; + fn envelope(&self) -> Self::Envelope { + self.bbox + } +} + + +fn envelope(bbox: &Rect) -> AABB> { + AABB::from_corners(bbox.min().into(), bbox.max().into()) +} + + + impl Postcode {} diff --git a/src/postcode_ext.rs b/src/postcode_ext.rs deleted file mode 100644 index 0bf0182..0000000 --- a/src/postcode_ext.rs +++ /dev/null @@ -1,86 +0,0 @@ -// extends Zones to add some capabilities -// The Zone's capabilities have been split in order to hide some functions specific to cosmogony -// and that we do not want to expose in the model - -use osm_boundaries_utils::build_boundary; -use osmpbfreader::objects::{OsmId, OsmObj, Relation}; -use std::collections::{BTreeMap}; -use rstar::{RTreeObject, AABB}; -use geo::{Point, Rect}; -use geo::algorithm::area::Area; -use crate::postcode::Postcode; - - -#[derive(Debug)] -pub struct PostcodeBbox { - postcode: Postcode, - bbox: AABB>, -} - -impl PostcodeBbox { - pub fn new(postcode: Postcode, bbox: &Rect) -> Self { - PostcodeBbox { - postcode, - bbox: envelope(&bbox), - } - } - - pub fn get_postcode(&self) -> &Postcode { - return &self.postcode; - } -} - - -impl RTreeObject for PostcodeBbox { - type Envelope = AABB>; - fn envelope(&self) -> Self::Envelope { - self.bbox - } -} - - -fn envelope(bbox: &Rect) -> AABB> { - AABB::from_corners(bbox.min().into(), bbox.max().into()) -} - -pub trait PostcodeExt { - /// create a zone from an osm relation and a geometry - fn from_osm_relation( - relation: &Relation, - objects: &BTreeMap, - ) -> Option; -} - -impl PostcodeExt for Postcode { - - fn from_osm_relation( - relation: &Relation, - objects: &BTreeMap, - ) -> Option { - // Skip postcode withjout postcode - let zipcode = match relation.tags.get("postal_code") { - Some(val) => val, - None => { - debug!( - "relation/{}: postcode region without name, skipped", - relation.id.0 - ); - "" - } - }; - - let osm_id = format!("relation:{}", relation.id.0.to_string()); - - let boundary = build_boundary(relation, objects); - - boundary.map(|boundary| { - let area = boundary.unsigned_area(); - Postcode { - osm_id, - zipcode: zipcode.to_string(), - boundary, - area - } - }) - } -} diff --git a/src/postcode_service.rs b/src/postcode_service.rs new file mode 100644 index 0000000..9713cf3 --- /dev/null +++ b/src/postcode_service.rs @@ -0,0 +1,76 @@ +use std::collections::BTreeMap; +use osmpbfreader::{OsmId, OsmObj}; +use rstar::{RTree, AABB}; +use crate::postcode::{PostcodeBbox, Postcode}; +use failure::Error; +use crate::is_postal_code; +use geo::prelude::{BoundingRect, Area}; +use geo_booleanop::boolean::BooleanOp; +use cosmogony::Zone; +use geo::{Rect, Point}; + +pub fn get_postcodes( + pbf: &BTreeMap, +) -> Result, Error> { + use rayon::prelude::*; + + let postcodes: Vec = pbf.into_par_iter() + .filter_map(|(_, obj)| { + if !is_postal_code(obj) { + return None; + } + if let OsmObj::Relation(ref relation) = *obj { + if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { + // Ignore zone without boundary polygon for the moment + let bbox = postcode.boundary.bounding_rect().unwrap(); + return Some(PostcodeBbox::new( + postcode, + &bbox, + )); + }; + } + return None; + }) + .collect(); + + let tree = RTree::bulk_load(postcodes); + + Ok(tree) +} + + +pub fn assign_postcodes_to_zones(zones: &mut Vec, + postcodes: &RTree) -> () { + use rayon::prelude::*; + zones + .into_par_iter() + .for_each(|z| { + if let Some(boundary) = z.boundary.as_ref() { + if let Some(bbox) = z.bbox { + if z.zip_codes.is_empty() { + //info!("ZipCodes were empty for {:?}, trying to fill them", name); + z.zip_codes = postcodes.locate_in_envelope_intersecting(&envelope(&bbox)) + .filter(|postcode_bbox| { + //info!(" - Candidate Postcode: {:?}", postcode_bbox.get_postcode().zipcode); + + let overlap_between_postcode_and_area = BooleanOp::intersection(boundary, postcode_bbox.get_postcode().get_boundary()); + + // anteil überlappender Bereiches / Postcode: "Wieviel % des Postcodes sind von dieser Fläche befüllt" + let overlap_percentage_relative_to_postcode = overlap_between_postcode_and_area.unsigned_area() / postcode_bbox.get_postcode().unsigned_area(); + + //info!(" CHOSEN {} {:?}", overlap_percentage_relative_to_postcode, overlap_percentage_relative_to_postcode > 0.05); + // at least 5% des Postcodes müssen in der genannten Fläche liegen + overlap_percentage_relative_to_postcode > 0.05 + }) + .map(|x| x.get_postcode().zipcode.to_string()) + .collect(); + } + } + } + }); +} + +fn envelope(bbox: &Rect) -> AABB> { + AABB::from_corners(bbox.min().into(), bbox.max().into()) +} + diff --git a/src/zone_ext.rs b/src/zone_ext.rs index 52c094c..0afaf8e 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -13,15 +13,9 @@ use osmpbfreader::objects::{Node, OsmId, OsmObj, Relation, Tags}; use regex::Regex; use std::collections::{BTreeMap, BTreeSet}; use std::convert::TryInto; -use rstar::{RTree, AABB}; -use geo::{Rect, Point}; -use crate::postcode_ext::PostcodeBbox; -use geo_booleanop::boolean::BooleanOp; -use geo_booleanop; use geo; use geo_types::MultiPolygon; -use geo::algorithm::area::Area; pub trait ZoneExt { /// create a zone from an osm node @@ -31,8 +25,7 @@ pub trait ZoneExt { fn from_osm_relation( relation: &Relation, objects: &BTreeMap, - index: ZoneIndex, - postcodes: &RTree, + index: ZoneIndex ) -> Option; /// check is a zone contains another zone @@ -111,8 +104,7 @@ impl ZoneExt for Zone { fn from_osm_relation( relation: &Relation, objects: &BTreeMap, - index: ZoneIndex, - postcodes: &RTree, + index: ZoneIndex ) -> Option { use geo::centroid::Centroid; @@ -141,35 +133,12 @@ impl ZoneExt for Zone { let boundary:Option> = build_boundary(relation, objects); let bbox = boundary.as_ref().and_then(|b| b.bounding_rect()); - let mut zip_codes: Vec = zip_code + let zip_codes: Vec = zip_code .split(';') .filter(|s| !s.is_empty()) .map(|s| s.to_string()) .sorted() .collect(); - if let Some(boundary) = boundary.as_ref() { - if let Some(bbox) = bbox { - if zip_codes.is_empty() { - //info!("ZipCodes were empty for {:?}, trying to fill them", name); - zip_codes = postcodes.locate_in_envelope_intersecting(&envelope(bbox)) - .filter(|postcode_bbox| { - //info!(" - Candidate Postcode: {:?}", postcode_bbox.get_postcode().zipcode); - - let overlap_between_postcode_and_area = BooleanOp::intersection(boundary, postcode_bbox.get_postcode().get_boundary()); - - // anteil überlappender Bereiches / Postcode: "Wieviel % des Postcodes sind von dieser Fläche befüllt" - let overlap_percentage_relative_to_postcode = overlap_between_postcode_and_area.unsigned_area() / postcode_bbox.get_postcode().unsigned_area(); - - //info!(" CHOSEN {} {:?}", overlap_percentage_relative_to_postcode, overlap_percentage_relative_to_postcode > 0.05); - // at least 5% des Postcodes müssen in der genannten Fläche liegen - overlap_percentage_relative_to_postcode > 0.05 - - }) - .map(|x| x.get_postcode().zipcode.to_string()) - .collect(); - } - } - } let wikidata = relation.tags.get("wikidata").map(|s| s.to_string()); let osm_id = format!("relation:{}", relation.id.0.to_string()); @@ -395,10 +364,6 @@ fn format_zip_code(zip_codes: &[String]) -> String { } } -fn envelope(bbox: Rect) -> AABB> { - AABB::from_corners(bbox.min().into(), bbox.max().into()) -} - /// get all the international names from the osm tags /// From 9e5af943897a59e77c91ad3449824be035b8d185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Wed, 2 Dec 2020 20:32:36 +0100 Subject: [PATCH 10/13] run cargo fmt --- src/lib.rs | 10 +++--- src/postcode.rs | 25 ++++++------- src/postcode_service.rs | 79 ++++++++++++++++++++--------------------- src/zone_ext.rs | 19 +++++----- 4 files changed, 61 insertions(+), 72 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bc9609d..947e443 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,10 +5,10 @@ mod additional_zones; mod country_finder; mod hierarchy_builder; pub mod merger; -mod zone_ext; -pub mod zone_typer; mod postcode; mod postcode_service; +mod zone_ext; +pub mod zone_typer; use crate::country_finder::CountryFinder; use crate::hierarchy_builder::{build_hierarchy, find_inclusions}; @@ -25,8 +25,8 @@ use std::path::Path; use cosmogony::{Zone, ZoneIndex}; +use crate::postcode_service::{assign_postcodes_to_zones, get_postcodes}; use crate::zone_ext::ZoneExt; -use crate::postcode_service::{get_postcodes, assign_postcodes_to_zones}; #[rustfmt::skip] pub fn is_admin(obj: &OsmObj) -> bool { @@ -56,7 +56,6 @@ pub fn is_postal_code(obj: &OsmObj) -> bool { } } - pub fn is_place(obj: &OsmObj) -> bool { match *obj { OsmObj::Node(ref node) => node @@ -67,9 +66,8 @@ pub fn is_place(obj: &OsmObj) -> bool { } } - pub fn get_zones_and_stats( - pbf: &BTreeMap + pbf: &BTreeMap, ) -> Result<(Vec, CosmogonyStats), Error> { let stats = CosmogonyStats::default(); let mut zones = Vec::with_capacity(1000); diff --git a/src/postcode.rs b/src/postcode.rs index d1612bd..64b68b5 100644 --- a/src/postcode.rs +++ b/src/postcode.rs @@ -1,22 +1,22 @@ -use geo_types::{MultiPolygon}; -use osmpbfreader::{Relation, OsmId, OsmObj}; -use std::collections::BTreeMap; -use osm_boundaries_utils::build_boundary; use geo::algorithm::area::Area; -use rstar::{AABB, RTreeObject}; use geo::{Point, Rect}; +use geo_types::MultiPolygon; +use osm_boundaries_utils::build_boundary; +use osmpbfreader::{OsmId, OsmObj, Relation}; +use rstar::{RTreeObject, AABB}; +use std::collections::BTreeMap; #[derive(Debug, Clone)] pub struct Postcode { pub osm_id: String, pub zipcode: String, pub boundary: geo_types::MultiPolygon, - pub area: f64 + pub area: f64, } impl Postcode { pub fn get_boundary(&self) -> &geo_types::MultiPolygon { - return &self.boundary + return &self.boundary; } pub fn unsigned_area(&self) -> f64 { @@ -28,7 +28,7 @@ impl Postcode { relation: &Relation, objects: &BTreeMap, ) -> Option { -// Skip postcode withjout postcode + // Skip postcode withjout postcode let zipcode = match relation.tags.get("postal_code") { Some(val) => val, None => { @@ -50,7 +50,7 @@ impl Postcode { osm_id, zipcode: zipcode.to_string(), boundary, - area + area, } }) } @@ -62,12 +62,11 @@ impl Default for Postcode { osm_id: "".into(), boundary: MultiPolygon(vec![]), zipcode: "".into(), - area: 0.0 + area: 0.0, } } } - #[derive(Debug)] pub struct PostcodeBbox { postcode: Postcode, @@ -87,7 +86,6 @@ impl PostcodeBbox { } } - impl RTreeObject for PostcodeBbox { type Envelope = AABB>; fn envelope(&self) -> Self::Envelope { @@ -95,11 +93,8 @@ impl RTreeObject for PostcodeBbox { } } - fn envelope(bbox: &Rect) -> AABB> { AABB::from_corners(bbox.min().into(), bbox.max().into()) } - - impl Postcode {} diff --git a/src/postcode_service.rs b/src/postcode_service.rs index 9713cf3..2d93ffd 100644 --- a/src/postcode_service.rs +++ b/src/postcode_service.rs @@ -1,20 +1,19 @@ -use std::collections::BTreeMap; -use osmpbfreader::{OsmId, OsmObj}; -use rstar::{RTree, AABB}; -use crate::postcode::{PostcodeBbox, Postcode}; -use failure::Error; use crate::is_postal_code; -use geo::prelude::{BoundingRect, Area}; -use geo_booleanop::boolean::BooleanOp; +use crate::postcode::{Postcode, PostcodeBbox}; use cosmogony::Zone; -use geo::{Rect, Point}; +use failure::Error; +use geo::prelude::{Area, BoundingRect}; +use geo::{Point, Rect}; +use geo_booleanop::boolean::BooleanOp; +use osmpbfreader::{OsmId, OsmObj}; +use rstar::{RTree, AABB}; +use std::collections::BTreeMap; -pub fn get_postcodes( - pbf: &BTreeMap, -) -> Result, Error> { +pub fn get_postcodes(pbf: &BTreeMap) -> Result, Error> { use rayon::prelude::*; - let postcodes: Vec = pbf.into_par_iter() + let postcodes: Vec = pbf + .into_par_iter() .filter_map(|(_, obj)| { if !is_postal_code(obj) { return None; @@ -23,10 +22,7 @@ pub fn get_postcodes( if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { // Ignore zone without boundary polygon for the moment let bbox = postcode.boundary.bounding_rect().unwrap(); - return Some(PostcodeBbox::new( - postcode, - &bbox, - )); + return Some(PostcodeBbox::new(postcode, &bbox)); }; } return None; @@ -38,39 +34,40 @@ pub fn get_postcodes( Ok(tree) } - -pub fn assign_postcodes_to_zones(zones: &mut Vec, - postcodes: &RTree) -> () { +pub fn assign_postcodes_to_zones(zones: &mut Vec, postcodes: &RTree) -> () { use rayon::prelude::*; - zones - .into_par_iter() - .for_each(|z| { - if let Some(boundary) = z.boundary.as_ref() { - if let Some(bbox) = z.bbox { - if z.zip_codes.is_empty() { - //info!("ZipCodes were empty for {:?}, trying to fill them", name); - z.zip_codes = postcodes.locate_in_envelope_intersecting(&envelope(&bbox)) - .filter(|postcode_bbox| { - //info!(" - Candidate Postcode: {:?}", postcode_bbox.get_postcode().zipcode); + zones.into_par_iter().for_each(|z| { + if let Some(boundary) = z.boundary.as_ref() { + if let Some(bbox) = z.bbox { + if z.zip_codes.is_empty() { + //info!("ZipCodes were empty for {:?}, trying to fill them", name); + z.zip_codes = postcodes + .locate_in_envelope_intersecting(&envelope(&bbox)) + .filter(|postcode_bbox| { + //info!(" - Candidate Postcode: {:?}", postcode_bbox.get_postcode().zipcode); - let overlap_between_postcode_and_area = BooleanOp::intersection(boundary, postcode_bbox.get_postcode().get_boundary()); + let overlap_between_postcode_and_area = BooleanOp::intersection( + boundary, + postcode_bbox.get_postcode().get_boundary(), + ); - // anteil überlappender Bereiches / Postcode: "Wieviel % des Postcodes sind von dieser Fläche befüllt" - let overlap_percentage_relative_to_postcode = overlap_between_postcode_and_area.unsigned_area() / postcode_bbox.get_postcode().unsigned_area(); + // anteil überlappender Bereiches / Postcode: "Wieviel % des Postcodes sind von dieser Fläche befüllt" + let overlap_percentage_relative_to_postcode = + overlap_between_postcode_and_area.unsigned_area() + / postcode_bbox.get_postcode().unsigned_area(); - //info!(" CHOSEN {} {:?}", overlap_percentage_relative_to_postcode, overlap_percentage_relative_to_postcode > 0.05); - // at least 5% des Postcodes müssen in der genannten Fläche liegen - overlap_percentage_relative_to_postcode > 0.05 - }) - .map(|x| x.get_postcode().zipcode.to_string()) - .collect(); - } + //info!(" CHOSEN {} {:?}", overlap_percentage_relative_to_postcode, overlap_percentage_relative_to_postcode > 0.05); + // at least 5% des Postcodes müssen in der genannten Fläche liegen + overlap_percentage_relative_to_postcode > 0.05 + }) + .map(|x| x.get_postcode().zipcode.to_string()) + .collect(); } } - }); + } + }); } fn envelope(bbox: &Rect) -> AABB> { AABB::from_corners(bbox.min().into(), bbox.max().into()) } - diff --git a/src/zone_ext.rs b/src/zone_ext.rs index 0afaf8e..135d49e 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -25,7 +25,7 @@ pub trait ZoneExt { fn from_osm_relation( relation: &Relation, objects: &BTreeMap, - index: ZoneIndex + index: ZoneIndex, ) -> Option; /// check is a zone contains another zone @@ -104,7 +104,7 @@ impl ZoneExt for Zone { fn from_osm_relation( relation: &Relation, objects: &BTreeMap, - index: ZoneIndex + index: ZoneIndex, ) -> Option { use geo::centroid::Centroid; @@ -130,7 +130,7 @@ impl ZoneExt for Zone { .or_else(|| relation.tags.get("postal_code")) .map_or("", |val| &val[..]); - let boundary:Option> = build_boundary(relation, objects); + let boundary: Option> = build_boundary(relation, objects); let bbox = boundary.as_ref().and_then(|b| b.bounding_rect()); let zip_codes: Vec = zip_code @@ -308,7 +308,7 @@ impl ZoneExt for Zone { // * for all cities where these entities are not explicitly distinct if (self.wikidata.is_some() && self.wikidata == center_wikidata) || (self.zone_type == Some(ZoneType::City) - && (center_wikidata.is_none() || self.wikidata.is_none())) + && (center_wikidata.is_none() || self.wikidata.is_none())) { let center_names: Vec<_> = self .center_tags @@ -333,8 +333,8 @@ impl ZoneExt for Zone { } fn create_lbl<'a, F>(zone: &'a Zone, all_zones: &'a MutableSlice<'_>, f: F) -> String - where - F: Fn(&Zone) -> String, +where + F: Fn(&Zone) -> String, { let mut hierarchy: Vec = zone.iter_hierarchy(all_zones).map(f).dedup().collect(); @@ -364,7 +364,6 @@ fn format_zip_code(zip_codes: &[String]) -> String { } } - /// get all the international names from the osm tags /// /// the names in osm are in a tag names `name:`, @@ -480,9 +479,9 @@ mod test { ("name", "bobito"), ("name:a_strange_lang_name", "bibi"), ] - .into_iter() - .map(|(k, v)| (k.into(), v.into())) - .collect(); + .into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(); let names = get_international_names(&tags, "bob"); From b9a4b4c4478357dd007f32a5483026898fb72ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Wed, 2 Dec 2020 20:35:19 +0100 Subject: [PATCH 11/13] fix clippy issues --- src/postcode.rs | 6 +++--- src/postcode_service.rs | 4 ++-- src/zone_ext.rs | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/postcode.rs b/src/postcode.rs index 64b68b5..a04ea52 100644 --- a/src/postcode.rs +++ b/src/postcode.rs @@ -16,11 +16,11 @@ pub struct Postcode { impl Postcode { pub fn get_boundary(&self) -> &geo_types::MultiPolygon { - return &self.boundary; + &self.boundary } pub fn unsigned_area(&self) -> f64 { - return self.area; + self.area } /// create a zone from an osm relation and a geometry @@ -82,7 +82,7 @@ impl PostcodeBbox { } pub fn get_postcode(&self) -> &Postcode { - return &self.postcode; + &self.postcode } } diff --git a/src/postcode_service.rs b/src/postcode_service.rs index 2d93ffd..6ed62ce 100644 --- a/src/postcode_service.rs +++ b/src/postcode_service.rs @@ -25,7 +25,7 @@ pub fn get_postcodes(pbf: &BTreeMap) -> Result) -> Result, postcodes: &RTree) -> () { +pub fn assign_postcodes_to_zones(zones: &mut Vec, postcodes: &RTree) { use rayon::prelude::*; zones.into_par_iter().for_each(|z| { if let Some(boundary) = z.boundary.as_ref() { diff --git a/src/zone_ext.rs b/src/zone_ext.rs index 135d49e..8b3f367 100644 --- a/src/zone_ext.rs +++ b/src/zone_ext.rs @@ -14,7 +14,6 @@ use regex::Regex; use std::collections::{BTreeMap, BTreeSet}; use std::convert::TryInto; -use geo; use geo_types::MultiPolygon; pub trait ZoneExt { From 75521edb1cffa744c57617b739f84b407082eccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Wed, 2 Dec 2020 20:47:17 +0100 Subject: [PATCH 12/13] TASK: minor refactorings for code clarity --- src/lib.rs | 8 ++---- src/postcode_service.rs | 54 ++++++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 947e443..9e17983 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,7 +25,7 @@ use std::path::Path; use cosmogony::{Zone, ZoneIndex}; -use crate::postcode_service::{assign_postcodes_to_zones, get_postcodes}; +use crate::postcode_service::{assign_postcodes_to_zones}; use crate::zone_ext::ZoneExt; #[rustfmt::skip] @@ -246,11 +246,7 @@ pub fn build_cosmogony( filter_langs, )?; - info!("Starting to extract postcodes."); - let postcodes = get_postcodes(&parsed_pbf)?; - info!("Finished extracting postcodes {}", postcodes.size()); - - assign_postcodes_to_zones(&mut zones, &postcodes); + assign_postcodes_to_zones(&mut zones, &parsed_pbf); stats.compute(&zones); diff --git a/src/postcode_service.rs b/src/postcode_service.rs index 6ed62ce..3277049 100644 --- a/src/postcode_service.rs +++ b/src/postcode_service.rs @@ -1,7 +1,6 @@ use crate::is_postal_code; use crate::postcode::{Postcode, PostcodeBbox}; use cosmogony::Zone; -use failure::Error; use geo::prelude::{Area, BoundingRect}; use geo::{Point, Rect}; use geo_booleanop::boolean::BooleanOp; @@ -9,33 +8,13 @@ use osmpbfreader::{OsmId, OsmObj}; use rstar::{RTree, AABB}; use std::collections::BTreeMap; -pub fn get_postcodes(pbf: &BTreeMap) -> Result, Error> { +pub fn assign_postcodes_to_zones(zones: &mut Vec, pbf: &BTreeMap) { use rayon::prelude::*; - let postcodes: Vec = pbf - .into_par_iter() - .filter_map(|(_, obj)| { - if !is_postal_code(obj) { - return None; - } - if let OsmObj::Relation(ref relation) = *obj { - if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { - // Ignore zone without boundary polygon for the moment - let bbox = postcode.boundary.bounding_rect().unwrap(); - return Some(PostcodeBbox::new(postcode, &bbox)); - }; - } - None - }) - .collect(); + info!("Starting to extract postcodes."); + let postcodes = get_postcodes_from_pbf(pbf); + info!("Finished extracting {} postcodes, now starting to match postcodes and zones", postcodes.size()); - let tree = RTree::bulk_load(postcodes); - - Ok(tree) -} - -pub fn assign_postcodes_to_zones(zones: &mut Vec, postcodes: &RTree) { - use rayon::prelude::*; zones.into_par_iter().for_each(|z| { if let Some(boundary) = z.boundary.as_ref() { if let Some(bbox) = z.bbox { @@ -66,6 +45,31 @@ pub fn assign_postcodes_to_zones(zones: &mut Vec, postcodes: &RTree) -> RTree { + use rayon::prelude::*; + + let postcodes_list: Vec = pbf + .into_par_iter() + .filter_map(|(_, obj)| { + if !is_postal_code(obj) { + return None; + } + if let OsmObj::Relation(ref relation) = *obj { + if let Some(postcode) = Postcode::from_osm_relation(relation, pbf) { + // Ignore zone without boundary polygon for the moment + let bbox = postcode.boundary.bounding_rect().unwrap(); + return Some(PostcodeBbox::new(postcode, &bbox)); + }; + } + None + }) + .collect(); + + RTree::bulk_load(postcodes_list) } fn envelope(bbox: &Rect) -> AABB> { From 5cfc98740d1d47b3fb2687242f4da68e1596eff5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Kurf=C3=BCrst?= Date: Wed, 19 May 2021 15:48:23 +0200 Subject: [PATCH 13/13] FEATURE: Sort Zip Codes --- src/postcode_service.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/postcode_service.rs b/src/postcode_service.rs index 3277049..48fd2b7 100644 --- a/src/postcode_service.rs +++ b/src/postcode_service.rs @@ -41,6 +41,7 @@ pub fn assign_postcodes_to_zones(zones: &mut Vec, pbf: &BTreeMap