diff --git a/README.md b/README.md index ef8f0e205..b0f7d1c33 100644 --- a/README.md +++ b/README.md @@ -36,77 +36,165 @@ $ git clone https://github.com/conveyal/gtfs-lib.git $ cd gtfs-lib ## build the jar $ mvn package -## run the validation suite on a GTFS file and save the result to result.json - change the version number to match file name in /target -$ java -jar target/gtfs-lib-2.2.0-SNAPSHOT-shaded.jar -validate /path/to/gtfs.zip /path/to/result.json +## Run the validation suite on a GTFS file and save the results to json files. +## Note: Change the version number to match the shaded jar file name in /target directory. +$ java -cp gtfs-lib-3.4.0-SNAPSHOT-shaded.jar com.conveyal.gtfs.GTFS --load /path/to/gtfs.zip --validate --json /optional/path/to/results ``` -### Validation result +### Load and Validation results -The result from running the command line validator is a json file containing -basic info about the feed as well, geographic info (bounding box, plus a merged buffers of the stop -locations), and a list of validation issues. +The result from running the load or validate command line option with +the `--json` option is a json file containing summary information about +the feed and the process (load or validate) that was run. The results will +be stored at `[feedId]-[load|validate].json` in the system temp directory +or the optional directory specified with the `--json` option. + +#### load.json ```json { - "fileName": "/path/to/gtfs.zip", - "validationTimestamp": "Tue Mar 21 11:21:56 EDT 2017", - "feedStatistics": { - "feed_id": "feed-id", - "revenueTime": 14778300, - "startDate": "2017-04-10", - "endDate": "2017-07-02", - "agencyCount": 1, - "routeCount": 81, - "stopCount": 3875, - "tripCount": 8633, - "frequencyCount": 0, - "stopTimeCount": 385558, - "shapePointCount": 246084, - "fareAttributeCount": 10, - "fareRuleCount": 186, - "serviceCount": 3, - "datesOfService": [ - "2017-04-10", - "2017-04-11", - ... - ], - "bounds": { - "west": -122.173638697, - "east": -121.54902915, - "south": 36.974922178, - "north": 37.558388156 - }, - "mergedBuffers": {GeoJSON MultiPolygon}, - }, - "errorCount": 203, - "errors": [ - { - "file": "stops", - "line": 2282, - "field": "stop_id", - "affectedEntityId": "3006", - "errorType": "UnusedStopError", - "priority": "LOW", - "stop": { - "sourceFileLine": 2282, - "stop_id": "3006", - "stop_code": "63006", - "stop_name": "COTTLE & MALONE", - "stop_desc": null, - "stop_lat": 37.290717809, - "stop_lon": -121.895693535, - "zone_id": "1", - "stop_url": null, - "location_type": 0, - "parent_station": null, - "stop_timezone": null, - "wheelchair_boarding": null, - "feed_id": "feed-id" - }, - "message": "Stop Id 3006 is not used in any trips.", - "messageWithContext": "stops line 2282, field 'stop_id': Stop Id 3006 is not used in any trips." - } - ... - ] -} + "filename" : "/Users/me/files/gtfs.zip", + "uniqueIdentifier" : "dqzn_ogndwayamkasyatagjfkoa", + "errorCount" : 0, + "fatalException" : null, + "agency" : { + "rowCount" : 1, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 165 + }, + "calendar" : { + "rowCount" : 29, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 1503 + }, + "calendarDates" : { + "rowCount" : 176, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 4904 + }, + "fareAttributes" : { + "rowCount" : 0, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 0 + }, + "fareRules" : { + "rowCount" : 0, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 0 + }, + "feedInfo" : { + "rowCount" : 0, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 0 + }, + "frequencies" : { + "rowCount" : 0, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 0 + }, + "routes" : { + "rowCount" : 275, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 42123 + }, + "shapes" : { + "rowCount" : 715639, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 26076306 + }, + "stops" : { + "rowCount" : 4702, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 281423 + }, + "stopTimes" : { + "rowCount" : 2054189, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 128596782 + }, + "transfers" : { + "rowCount" : 0, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 0 + }, + "trips" : { + "rowCount" : 46036, + "errorCount" : 0, + "fatalException" : null, + "fileSize" : 4009568 + }, + "loadTimeMillis" : 62746, + "completionTime" : 1535468396785 +} ``` + +#### validate.json + +```json +{ + "fatalException" : null, + "errorCount" : 1193, + "declaredStartDate" : null, + "declaredEndDate" : null, + "firstCalendarDate" : { + "year" : 2018, + "month" : "JUNE", + "chronology" : { + "id" : "ISO", + "calendarType" : "iso8601" + }, + "dayOfMonth" : 30, + "dayOfWeek" : "SATURDAY", + "era" : "CE", + "dayOfYear" : 181, + "leapYear" : false, + "monthValue" : 6 + }, + "lastCalendarDate" : { + "year" : 2018, + "month" : "SEPTEMBER", + "chronology" : { + "id" : "ISO", + "calendarType" : "iso8601" + }, + "dayOfMonth" : 1, + "dayOfWeek" : "SATURDAY", + "era" : "CE", + "dayOfYear" : 244, + "leapYear" : false, + "monthValue" : 9 + }, + "dailyBusSeconds" : [ 9999, ... ], + "dailyTramSeconds" : [ 0, ... ], + "dailyMetroSeconds" : [ 0, ... ], + "dailyRailSeconds" : [ 0, ... ], + "dailyTotalSeconds" : [ 2220, ... ], + "dailyTripCounts" : [ 1, ... ], + "fullBounds" : { + "minLon" : -74.040876, + "minLat" : 40.572635, + "maxLon" : -73.779519, + "maxLat" : 40.762524 + }, + "boundsWithoutOutliers" : { + "minLon" : 0.0, + "minLat" : 0.0, + "maxLon" : 0.0, + "maxLat" : 0.0 + }, + "validationTime" : 16319 +} + +``` \ No newline at end of file diff --git a/pom.xml b/pom.xml index ea617eef1..caac53044 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ com.conveyal gtfs-lib - 3.3.1-SNAPSHOT + 4.0.1-SNAPSHOT jar diff --git a/src/main/java/com/conveyal/gtfs/GTFS.java b/src/main/java/com/conveyal/gtfs/GTFS.java index 6e31b66da..d1e2fb690 100644 --- a/src/main/java/com/conveyal/gtfs/GTFS.java +++ b/src/main/java/com/conveyal/gtfs/GTFS.java @@ -5,18 +5,30 @@ import com.conveyal.gtfs.loader.JdbcGtfsExporter; import com.conveyal.gtfs.loader.JdbcGtfsLoader; import com.conveyal.gtfs.loader.JdbcGtfsSnapshotter; +import com.conveyal.gtfs.util.InvalidNamespaceException; import com.conveyal.gtfs.validator.ValidationResult; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.io.Files; import org.apache.commons.cli.*; import org.apache.commons.dbcp2.ConnectionFactory; import org.apache.commons.dbcp2.DriverManagerConnectionFactory; import org.apache.commons.dbcp2.PoolableConnectionFactory; import org.apache.commons.dbcp2.PoolingDataSource; +import org.apache.commons.dbutils.DbUtils; import org.apache.commons.pool2.impl.GenericObjectPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.sql.DataSource; +import java.io.File; +import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Statement; + +import static com.conveyal.gtfs.util.Util.ensureValidNamespace; /** * This is the public interface to the RDBMS backed functionality in gtfs-lib. @@ -86,6 +98,36 @@ public static ValidationResult validate (String feedId, DataSource dataSource) { return result; } + /** + * Deletes all tables for the specified feed. Simply put, this is a "drop schema" SQL statement called on the feed's + * namespace. + */ + public static void delete (String feedId, DataSource dataSource) throws SQLException, InvalidNamespaceException { + LOG.info("Deleting all tables (dropping schema) for {} feed namespace.", feedId); + Connection connection = null; + try { + connection = dataSource.getConnection(); + ensureValidNamespace(feedId); + // Mark entry in feeds table as deleted. + String deleteFeedEntrySql = "update feeds set deleted = true where namespace = ?"; + PreparedStatement deleteFeedStatement = connection.prepareStatement(deleteFeedEntrySql); + deleteFeedStatement.setString(1, feedId); + deleteFeedStatement.executeUpdate(); + // Drop all tables bearing the feedId namespace. + // Note: It does not appear to be possible to use prepared statements with "drop schema." + String dropSchemaSql = String.format("DROP SCHEMA %s CASCADE", feedId); + Statement statement = connection.createStatement(); + statement.executeUpdate(dropSchemaSql); + // Commit the changes. + connection.commit(); + } catch (InvalidNamespaceException | SQLException e) { + LOG.error(String.format("Could not drop feed for namespace %s", feedId), e); + throw e; + } finally { + if (connection != null) DbUtils.closeQuietly(connection); + } + } + /** * Create an automatically managed pool of database connections to the supplied JDBC database URL. * @@ -136,10 +178,9 @@ public static DataSource createDataSource (String url, String username, String p * A command-line interface that lets you load GTFS feeds into a database and validate the loaded feeds. * It also lets you run a GraphQL API for all the feeds loaded into the database. */ - public static void main (String[] args) { - + public static void main (String[] args) throws IOException { Options options = getOptions(); - CommandLine cmd = null; + CommandLine cmd; try { cmd = new DefaultParser().parse(options, args); } catch (ParseException e) { @@ -159,12 +200,21 @@ public static void main (String[] args) { return; } - if (!(cmd.hasOption("export") || cmd.hasOption("snapshot") || cmd.hasOption("load") || cmd.hasOption("validate") || cmd.hasOption("graphql"))) { - LOG.error("Must specify one of 'snapshot', 'load', 'validate', 'export', or 'graphql'."); + if (!(cmd.hasOption("export") || cmd.hasOption("snapshot") || cmd.hasOption("load") || cmd.hasOption("validate") || cmd.hasOption("delete"))) { + LOG.error("Must specify one of 'snapshot', 'load', 'validate', 'export', or 'delete'."); printHelp(options); return; } - + boolean storeResults = cmd.hasOption("json"); + // Object mapper and directory used for writing load or validation results to file if required. + ObjectMapper mapper = null; + File directory = null; + if (storeResults) { + // Instantiate mapper for use with outputting load/validation results. + mapper = new ObjectMapper(); + directory = cmd.getOptionValue("json") != null ? new File(cmd.getOptionValue("json")) : Files.createTempDir(); + LOG.info("Storing results in directory: {}", directory.getAbsolutePath()); + } String databaseUrl = cmd.getOptionValue("database", DEFAULT_DATABASE_URL); String databaseUser = cmd.getOptionValue("user"); String databasePassword = cmd.getOptionValue("password"); @@ -179,6 +229,11 @@ public static void main (String[] args) { if (cmd.hasOption("load")) { String filePath = cmd.getOptionValue("load"); loadResult = load(filePath, dataSource); + if (storeResults) { + File loadResultFile = new File(directory, String.format("%s-load.json", loadResult.uniqueIdentifier)); + LOG.info("Storing load result at {}", loadResultFile.getAbsolutePath()); + mapper.writerWithDefaultPrettyPrinter().writeValue(loadResultFile, loadResult); + } LOG.info("The unique identifier for this feed is: {}", loadResult.uniqueIdentifier); } @@ -194,6 +249,11 @@ public static void main (String[] args) { if (feedToValidate != null) { LOG.info("Validating feed with unique identifier {}", feedToValidate); ValidationResult validationResult = validate (feedToValidate, dataSource); + if (storeResults) { + File validationResultFile = new File(directory, String.format("%s-validation.json", feedToValidate)); + LOG.info("Storing validation result at {}", validationResultFile.getAbsolutePath()); + mapper.writerWithDefaultPrettyPrinter().writeValue(validationResultFile, validationResult); + } LOG.info("Done validating."); } else { LOG.error("No feed to validate. Specify one, or load a feed in the same command."); @@ -208,7 +268,12 @@ public static void main (String[] args) { if (namespaceToSnapshot != null) { LOG.info("Snapshotting feed with unique identifier {}", namespaceToSnapshot); FeedLoadResult snapshotResult = makeSnapshot(namespaceToSnapshot, dataSource); - LOG.info("Done snapshotting."); + if (storeResults) { + File snapshotResultFile = new File(directory, String.format("%s-snapshot.json", snapshotResult.uniqueIdentifier)); + LOG.info("Storing validation result at {}", snapshotResultFile.getAbsolutePath()); + mapper.writerWithDefaultPrettyPrinter().writeValue(snapshotResultFile, snapshotResult); + } + LOG.info("Done snapshotting. The unique identifier for this snapshot is: {}", snapshotResult.uniqueIdentifier); } else { LOG.error("No feed to snapshot. Specify one, or load a feed in the same command."); } @@ -229,12 +294,21 @@ public static void main (String[] args) { } } - if (cmd.hasOption("graphql")) { - Integer port = Integer.parseInt(cmd.getOptionValue("graphql")); - LOG.info("Starting GraphQL server on port {}", port); - throw new UnsupportedOperationException(); - } + if (cmd.hasOption("delete")) { + String namespaceToDelete = cmd.getOptionValue("delete"); + if (namespaceToDelete != null) { + LOG.info("Deleting feed with unique identifier {}", namespaceToDelete); + try { + delete(namespaceToDelete, dataSource); + LOG.info("Feed {} has been successfully deleted.", namespaceToDelete); + } catch (SQLException | InvalidNamespaceException e) { + e.printStackTrace(); + } + } else { + LOG.error("No feed to delete. Specify one with the --delete argument."); + } + } } /** @@ -244,27 +318,45 @@ public static void main (String[] args) { private static Options getOptions () { Options options = new Options(); options.addOption(Option.builder("h").longOpt("help").desc("print this message").build()); - options.addOption(Option.builder().longOpt("export").hasArg() - .argName("feedId") - .desc("export GTFS data from the given database feedId to the given directory").build()); - options.addOption(Option.builder().longOpt("outFile").hasArg() + options.addOption(Option.builder() + .longOpt("export").hasArg() + .argName("namespace") + .desc("export GTFS data from the given database namespace (feed) to the given directory").build()); + options.addOption(Option.builder() + .longOpt("outFile").hasArg() .argName("file") .desc("zip file path for the exported GTFS").build()); - options.addOption(Option.builder().longOpt("load").hasArg() - .argName("file").desc("load GTFS data from the given file").build()); - options.addOption(Option.builder().longOpt("validate").hasArg().optionalArg(true).argName("feed") + options.addOption(Option.builder() + .longOpt("load").hasArg() + .argName("file") + .desc("load GTFS data from the given file").build()); + options.addOption(Option.builder() + .longOpt("validate").hasArg().optionalArg(true) + .argName("namespace") .desc("validate the specified feed. defaults to the feed loaded with the --load option").build()); - options.addOption(Option.builder().longOpt("snapshot").hasArg() - .argName("feedId").desc("snapshot GTFS data from the given database feedId").build()); - options.addOption(Option.builder("d").longOpt("database") - .hasArg().argName("url").desc("JDBC URL for the database. Defaults to " + DEFAULT_DATABASE_URL).build()); - options.addOption(Option.builder("u").longOpt("user") - .hasArg().argName("username").desc("database username").build()); - options.addOption(Option.builder("p").longOpt("password") - .hasArg().argName("password").desc("database password").build()); - options.addOption(Option.builder().longOpt("graphql") - .desc("start a GraphQL API on the given port").optionalArg(true).build()); - options.addOption(Option.builder().longOpt("json").desc("optionally store in result.json").build()); + options.addOption(Option.builder() + .longOpt("snapshot").hasArg() + .argName("namespace") + .desc("snapshot GTFS data from the given database namespace (feed)").build()); + options.addOption(Option.builder("d") + .longOpt("database").hasArg() + .argName("url") + .desc("JDBC URL for the database. Defaults to " + DEFAULT_DATABASE_URL).build()); + options.addOption(Option.builder("u").longOpt("user").hasArg() + .argName("username") + .desc("database username").build()); + options.addOption(Option.builder("p") + .longOpt("password").hasArg() + .argName("password") + .desc("database password").build()); + options.addOption(Option.builder() + .longOpt("delete").hasArg() + .argName("namespace") + .desc("delete the feed for the specified namespace.").build()); + options.addOption(Option.builder() + .longOpt("json").hasArg().optionalArg(true) + .argName("directory") + .desc("optionally store results in specified directory (defaults to system temp)").build()); return options; } diff --git a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java index b7d9babf3..489cdcdea 100644 --- a/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java +++ b/src/main/java/com/conveyal/gtfs/graphql/GTFSGraphQL.java @@ -1,6 +1,5 @@ package com.conveyal.gtfs.graphql; -import com.conveyal.gtfs.GTFS; import graphql.GraphQL; import javax.sql.DataSource; @@ -23,7 +22,8 @@ public class GTFSGraphQL { /** Username and password can be null if connecting to a local instance with host-based authentication. */ public static void initialize (DataSource dataSource) { GTFSGraphQL.dataSource = dataSource; - GRAPHQL = new GraphQL(GraphQLGtfsSchema.feedBasedSchema); + GRAPHQL = GraphQL.newGraphQL(GraphQLGtfsSchema.feedBasedSchema) + .build(); } public static Connection getConnection() { diff --git a/src/main/java/com/conveyal/gtfs/loader/FeedLoadResult.java b/src/main/java/com/conveyal/gtfs/loader/FeedLoadResult.java index 77116da76..69d62d9f7 100644 --- a/src/main/java/com/conveyal/gtfs/loader/FeedLoadResult.java +++ b/src/main/java/com/conveyal/gtfs/loader/FeedLoadResult.java @@ -16,6 +16,7 @@ public class FeedLoadResult implements Serializable { private static final long serialVersionUID = 1L; + public String filename; public String uniqueIdentifier; public int errorCount; public String fatalException; diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index f8e89baa5..47a9d1c43 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -127,6 +127,7 @@ public FeedLoadResult loadTables () { // retry in a loop. // TODO handle the case where we don't want any prefix. this.tablePrefix = randomIdString(); + result.filename = gtfsFilePath; result.uniqueIdentifier = tablePrefix; registerFeed(gtfsFile); // Include the dot separator in the table prefix. @@ -274,6 +275,7 @@ private TableLoadResult load (Table table) { int initialErrorCount = errorStorage.getErrorCount(); try { tableLoadResult.rowCount = loadInternal(table); + tableLoadResult.fileSize = getTableSize(table); } catch (Exception ex) { LOG.error("Fatal error loading table", ex); tableLoadResult.fatalException = ex.toString(); @@ -295,6 +297,15 @@ private TableLoadResult load (Table table) { return tableLoadResult; } + /** + * Get the uncompressed file size in bytes for the specified GTFS table. + */ + private int getTableSize(Table table) { + ZipEntry zipEntry = zip.getEntry(table.name + ".txt"); + if (zipEntry == null) return 0; + return (int) zipEntry.getSize(); + } + /** * This function will throw any exception that occurs. Those exceptions will be handled by the outer load method. * @return number of rows that were loaded. diff --git a/src/main/java/com/conveyal/gtfs/loader/TableLoadResult.java b/src/main/java/com/conveyal/gtfs/loader/TableLoadResult.java index d43bcc726..540b44350 100644 --- a/src/main/java/com/conveyal/gtfs/loader/TableLoadResult.java +++ b/src/main/java/com/conveyal/gtfs/loader/TableLoadResult.java @@ -12,6 +12,7 @@ public class TableLoadResult implements Serializable { public int rowCount; public int errorCount; public String fatalException = null; + public int fileSize; /** No-arg constructor for Mongo */ public TableLoadResult () { } diff --git a/src/main/java/com/conveyal/gtfs/validator/PatternFinderValidator.java b/src/main/java/com/conveyal/gtfs/validator/PatternFinderValidator.java index 0f3d57250..8e38b745f 100644 --- a/src/main/java/com/conveyal/gtfs/validator/PatternFinderValidator.java +++ b/src/main/java/com/conveyal/gtfs/validator/PatternFinderValidator.java @@ -161,6 +161,7 @@ public void complete(ValidationResult validationResult) { : departure - arrival; insertPatternStopStatement.setString(1, pattern.pattern_id); + // Stop sequence is zero-based. setIntParameter(insertPatternStopStatement, 2, i); insertPatternStopStatement.setString(3, stopId); setIntParameter(insertPatternStopStatement,4, travelTime); diff --git a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java index 3f294f1d9..e14a19071 100644 --- a/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java +++ b/src/test/java/com/conveyal/gtfs/graphql/GTFSGraphQLTest.java @@ -227,7 +227,7 @@ public void canSanitizeSQLInjectionSentAsKeyValue() throws IOException, SQLExcep * Helper method to make a query with default variables * * @param queryFilename the filename that should be used to generate the GraphQL query. This file must be present - * in the `src/test/resources/grahpql` folder + * in the `src/test/resources/graphql` folder */ private Map queryGraphQL(String queryFilename) throws IOException { Map variables = new HashMap(); @@ -239,7 +239,7 @@ private Map queryGraphQL(String queryFilename) throws IOExceptio * Helper method to execute a GraphQL query and return the result * * @param queryFilename the filename that should be used to generate the GraphQL query. This file must be present - * in the `src/test/resources/grahpql` folder + * in the `src/test/resources/graphql` folder * @param variables a Map of input variables to the graphql query about to be executed * @param dataSource the datasource to use when initializing GraphQL */