Skip to content

Commit

Permalink
feat(validation): add some more validation of shapes
Browse files Browse the repository at this point in the history
Refs #167
  • Loading branch information
evansiroky committed May 2, 2019
1 parent ba9e195 commit 45e4c52
Show file tree
Hide file tree
Showing 17 changed files with 124 additions and 12 deletions.
4 changes: 3 additions & 1 deletion src/main/java/com/conveyal/gtfs/error/NewGTFSErrorType.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ public enum NewGTFSErrorType {
ROUTE_SHORT_NAME_TOO_LONG(Priority.MEDIUM, "The short name of a route is too long for display in standard GTFS consumer applications."),
SERVICE_NEVER_ACTIVE(Priority.MEDIUM, "A service code was defined, but is never active on any date."),
SERVICE_UNUSED(Priority.MEDIUM, "A service code was defined, but is never referenced by any trips."),
SHAPE_DIST_TRAVELED_NOT_INCREASING(Priority.MEDIUM, "Shape distance traveled must increase with stop times."),
SHAPE_UNUSED(Priority.LOW, "A shape was defined, but is never referenced by any trips."),
SHAPE_SHAPE_DIST_TRAVELED_NOT_INCREASING(Priority.MEDIUM, "Shape distance traveled must increase in shapes."),
STOP_TIME_SHAPE_DIST_TRAVELED_NOT_INCREASING(Priority.MEDIUM, "Shape distance traveled must increase in stop times."),
STOP_DESCRIPTION_SAME_AS_NAME(Priority.LOW, "The description of a stop is identical to its name, so does not add any information."),
STOP_LOW_POPULATION_DENSITY(Priority.HIGH, "A stop is located in a geographic area with very low human population density."),
STOP_NAME_MISSING(Priority.MEDIUM, "A stop does not have a name."),
Expand Down
22 changes: 12 additions & 10 deletions src/main/java/com/conveyal/gtfs/loader/Feed.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ public class Feed {
// This may be the empty string if the feed is stored in the root ("public") schema.
public final String tablePrefix;

public final TableReader<Agency> agencies;
public final TableReader<Calendar> calendars;
public final TableReader<CalendarDate> calendarDates;
public final JDBCTableReader<Agency> agencies;
public final JDBCTableReader<Calendar> calendars;
public final JDBCTableReader<CalendarDate> calendarDates;
// public final TableReader<Fare> fares;
public final TableReader<Route> routes;
public final TableReader<Stop> stops;
public final TableReader<Trip> trips;
// public final TableReader<ShapePoint> shapePoints;
public final TableReader<StopTime> stopTimes;
public final JDBCTableReader<Route> routes;
public final JDBCTableReader<Stop> stops;
public final JDBCTableReader<Trip> trips;
public final JDBCTableReader<ShapePoint> shapePoints;
public final JDBCTableReader<StopTime> stopTimes;

/* A place to accumulate errors while the feed is loaded. Tolerate as many errors as possible and keep on loading. */
// TODO remove this and use only NewGTFSErrors in Validators, loaded into a JDBC table
Expand All @@ -63,7 +63,7 @@ public Feed (DataSource dataSource, String tablePrefix) {
routes = new JDBCTableReader(Table.ROUTES, dataSource, tablePrefix, EntityPopulator.ROUTE);
stops = new JDBCTableReader(Table.STOPS, dataSource, tablePrefix, EntityPopulator.STOP);
trips = new JDBCTableReader(Table.TRIPS, dataSource, tablePrefix, EntityPopulator.TRIP);
// shapePoints = new JDBCTableReader(Table.SHAPES, dataSource, tablePrefix, EntityPopulator.SHAPE_POINT);
shapePoints = new JDBCTableReader(Table.SHAPES, dataSource, tablePrefix, EntityPopulator.SHAPE_POINT);
stopTimes = new JDBCTableReader(Table.STOP_TIMES, dataSource, tablePrefix, EntityPopulator.STOP_TIME);
}

Expand Down Expand Up @@ -91,7 +91,9 @@ public ValidationResult validate () {
new DuplicateStopsValidator(this, errorStorage),
new TimeZoneValidator(this, errorStorage),
new NewTripTimesValidator(this, errorStorage),
new NamesValidator(this, errorStorage));
new NamesValidator(this, errorStorage),
new ShapeValidator(this, errorStorage)
);

for (FeedValidator feedValidator : feedValidators) {
String validatorName = feedValidator.getClass().getSimpleName();
Expand Down
70 changes: 70 additions & 0 deletions src/main/java/com/conveyal/gtfs/validator/ShapeValidator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package com.conveyal.gtfs.validator;

import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.model.ShapePoint;
import com.conveyal.gtfs.model.Trip;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import static com.conveyal.gtfs.error.NewGTFSErrorType.SHAPE_SHAPE_DIST_TRAVELED_NOT_INCREASING;
import static com.conveyal.gtfs.error.NewGTFSErrorType.SHAPE_UNUSED;

/**
* A validator that checks the integrity of the shapes records
*/
public class ShapeValidator extends FeedValidator {
public ShapeValidator(Feed feed, SQLErrorStorage errorStorage) {
super(feed, errorStorage);
}

@Override
public void validate() {
ShapePoint lastShapePoint = null;
Map<String, ShapePoint> firstShapePointByShapeId = new HashMap<>();
// this stores all shape ids found in the shapes initially, but will eventually be modified to only have the
// extra shape ids if there are any
Set<String> extraShapeIds = new HashSet<>();

for (ShapePoint shapePoint : feed.shapePoints) {
// store the first found shapePoint when a new shape_id is found
if (shapePoint.shape_id != null && !firstShapePointByShapeId.containsKey(shapePoint.shape_id)) {
firstShapePointByShapeId.put(shapePoint.shape_id, shapePoint);
extraShapeIds.add(shapePoint.shape_id);
}

// continue loop if first shape, or beginning analysis of new shape
if (lastShapePoint == null || !lastShapePoint.shape_id.equals(shapePoint.shape_id)) {
lastShapePoint = shapePoint;
continue;
}

// make sure the shape distance traveled is increasing
if (lastShapePoint.shape_dist_traveled > shapePoint.shape_dist_traveled) {
registerError(shapePoint, SHAPE_SHAPE_DIST_TRAVELED_NOT_INCREASING, shapePoint.shape_dist_traveled);
}

lastShapePoint = shapePoint;
}

// verify that all found shapeIds exist in trips

// compile a list of shape_ids found in the trips table
// Optimization idea: speed up by making custom SQL call to fetch distinct shape_ids from trip table
Set<String> tripShapeIds = new HashSet<>();
for (Trip trip : feed.trips) {
tripShapeIds.add(trip.shape_id);
}

// remove all trip shape ids from the found shape ids in the shapes table
extraShapeIds.removeAll(tripShapeIds);

// iterate over the extra shape Ids and create errors for each
for (String extraShapeId : extraShapeIds) {
registerError(firstShapePointByShapeId.get(extraShapeId), SHAPE_UNUSED, extraShapeId);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ private void checkShapeDistTraveled(StopTime previous, StopTime current) {
current.shape_dist_traveled <= previous.shape_dist_traveled
)
) {
registerError(current, SHAPE_DIST_TRAVELED_NOT_INCREASING, current.shape_dist_traveled);
registerError(current, STOP_TIME_SHAPE_DIST_TRAVELED_NOT_INCREASING, current.shape_dist_traveled);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
agency_id,agency_name,agency_url,agency_lang,agency_phone,agency_email,agency_timezone,agency_fare_url,agency_branding_url
1,Fake Transit,,,,,America/Los_Angeles,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
04100312-8fe1-46a5-a9f2-556f39478f57,1,1,1,1,1,1,1,20170915,20170917
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
service_id,date,exception_type
04100312-8fe1-46a5-a9f2-556f39478f57,20170916,2
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fare_id,price,currency_type,payment_method,transfers,transfer_duration
route_based_fare,1.23,USD,0,0,0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fare_id,route_id,origin_id,destination_id,contains_id
route_based_fare,1,,,
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
feed_publisher_name,feed_publisher_url,feed_lang,feed_version
Conveyal,http://www.conveyal.com,en,1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
trip_id,start_time,end_time,headway_secs,exact_times
frequency-trip,08:00:00,09:00:00,1800,0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
agency_id,route_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color,route_branding_url
1,1,1,Route 1,,3,,7CE6E7,FFFFFF,
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled
5820f377-f947-4728-ac29-ac0102cbc34e,37.0612132,-122.0074332,1,0.0000000
5820f377-f947-4728-ac29-ac0102cbc34e,37.0611720,-122.0075000,2,7.4997067
5820f377-f947-4728-ac29-ac0102cbc34e,37.0613590,-122.0076830,3,33.8739075
5820f377-f947-4728-ac29-ac0102cbc34e,37.0608780,-122.0082780,4,109.0402932
5820f377-f947-4728-ac29-ac0102cbc34e,37.0603590,-122.0088280,5,84.6078298
5820f377-f947-4728-ac29-ac0102cbc34e,37.0597610,-122.0093540,6,265.8053023
5820f377-f947-4728-ac29-ac0102cbc34e,37.0590660,-122.0099190,7,357.8617018
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
a30277f8-e50a-4a85-9141-b1e0da9d429d,07:00:00,07:00:00,4u6g,1,,0,0,0.0000000,
a30277f8-e50a-4a85-9141-b1e0da9d429d,07:01:00,07:01:00,johv,2,,0,0,341.4491961,
frequency-trip,08:00:00,08:00:00,4u6g,1,,0,0,0.0000000,
frequency-trip,08:01:00,08:01:00,johv,2,,0,0,341.4491961,
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
4u6g,,Butler Ln,,37.0612132,-122.0074332,,,0,,,
johv,,Scotts Valley Dr & Victor Sq,,37.0590172,-122.0096058,,,0,,,
123,,Parent Station,,37.0666,-122.0777,,,1,,,
1234,,Child Stop,,37.06662,-122.07772,,,0,123,,
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from_stop_id,to_stop_id,transfer_type,min_transfer_time
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
route_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,bikes_allowed,wheelchair_accessible,service_id
1,a30277f8-e50a-4a85-9141-b1e0da9d429d,,,0,,5820f377-f947-4728-ac29-ac0102cbc34e,0,0,04100312-8fe1-46a5-a9f2-556f39478f57
1,frequency-trip,,,0,,5820f377-f947-4728-ac29-ac0102cbc34e,0,0,04100312-8fe1-46a5-a9f2-556f39478f57

0 comments on commit 45e4c52

Please sign in to comment.