diff --git a/docs/onebusaway-gtfs-transformer-cli.md b/docs/onebusaway-gtfs-transformer-cli.md index 92873436..ec386d41 100644 --- a/docs/onebusaway-gtfs-transformer-cli.md +++ b/docs/onebusaway-gtfs-transformer-cli.md @@ -343,13 +343,13 @@ those manually. #### Remove Old Calendar Statements -RemoveOldCalendarStatements is an operation designed to remove calendar entries that are no longer valid on today's date. +RemoveOldCalendarStatements is an operation designed to remove calendar and calendar dates entries that are no longer valid on today's date. -By default, it deletes entries from the calendar.txt file whose end_date field has passed. +By default, it deletes entries from both the calendar.txt and calendar_dates.txt files, where the end_date in calendar.txt or the date field in calendar_dates.txt has passed. -With the remove_today attribute added to the JSON transformer snippet, users can control whether calendar entries valid for today are included or excluded in the output GTFS. +With the remove_today attribute added to the JSON transformer snippet, users can control whether entries in calendar or calendar_dates that are valid for today are included or excluded in the GTFS output. - * If remove_today is set to true, the transformer will remove the calendar entries for the current date. + * If remove_today is set to true, the transformer will remove entries for the current date. ``` {"op":"transform", "class":"org.onebusaway.gtfs_transformer.impl.RemoveOldCalendarStatements", "remove_today":true} @@ -361,6 +361,15 @@ With the remove_today attribute added to the JSON transformer snippet, users can {"op":"transform", "class":"org.onebusaway.gtfs_transformer.impl.RemoveOldCalendarStatements", "remove_today":false} ``` +Additionally, after truncating the calendar entries, it is recommended to use a **retain operation** to ensure that only trips with valid calendar dates are retained. + +Without this retain operation, the `trips.txt` file will contain trips with non-existent calendar dates, leading to invalid data. + +``` +{"op":"transform", "class":"org.onebusaway.gtfs_transformer.impl.RemoveOldCalendarStatements", "remove_today":false} +{"op":"retain", "match":{"file":"calendar_dates.txt"}, "retainBlocks":false} +``` + #### Deduplicate Calendar Entries Finds GTFS service_ids that have the exact same set of active days and consolidates each set of duplicated diff --git a/onebusaway-gtfs-transformer/src/main/java/org/onebusaway/gtfs_transformer/impl/RemoveOldCalendarStatements.java b/onebusaway-gtfs-transformer/src/main/java/org/onebusaway/gtfs_transformer/impl/RemoveOldCalendarStatements.java index 5fa49183..38f369b6 100644 --- a/onebusaway-gtfs-transformer/src/main/java/org/onebusaway/gtfs_transformer/impl/RemoveOldCalendarStatements.java +++ b/onebusaway-gtfs-transformer/src/main/java/org/onebusaway/gtfs_transformer/impl/RemoveOldCalendarStatements.java @@ -20,6 +20,7 @@ import org.onebusaway.csv_entities.schema.annotations.CsvField; import org.onebusaway.gtfs.model.ServiceCalendar; +import org.onebusaway.gtfs.model.ServiceCalendarDate; import org.onebusaway.gtfs.services.GtfsMutableRelationalDao; import org.onebusaway.gtfs_transformer.services.GtfsTransformStrategy; import org.onebusaway.gtfs_transformer.services.TransformContext; @@ -64,5 +65,16 @@ public void run(TransformContext transformContext, GtfsMutableRelationalDao gtfs for (ServiceCalendar serviceCalendar : serviceCalendarsToRemove) { removeEntityLibrary.removeCalendar(gtfsMutableRelationalDao, serviceCalendar.getServiceId()); } + + Set serviceCalendarDatesToRemove = new HashSet(); + for (ServiceCalendarDate calendarDate : gtfsMutableRelationalDao.getAllCalendarDates()) { + if (calendarDate.getDate().getAsDate().before(today)) { + serviceCalendarDatesToRemove.add(calendarDate); + } + } + for (ServiceCalendarDate serviceCalendarDate : serviceCalendarDatesToRemove) { + // here we can't delete the trips as the serviceid may be active elsewhere + removeEntityLibrary.removeServiceCalendarDate(gtfsMutableRelationalDao, serviceCalendarDate); + } } } \ No newline at end of file diff --git a/onebusaway-gtfs-transformer/src/test/java/org/onebusaway/gtfs_transformer/impl/RemoveOldCalendarStatementsTest.java b/onebusaway-gtfs-transformer/src/test/java/org/onebusaway/gtfs_transformer/impl/RemoveOldCalendarStatementsTest.java index 3519bfc2..87a2d393 100644 --- a/onebusaway-gtfs-transformer/src/test/java/org/onebusaway/gtfs_transformer/impl/RemoveOldCalendarStatementsTest.java +++ b/onebusaway-gtfs-transformer/src/test/java/org/onebusaway/gtfs_transformer/impl/RemoveOldCalendarStatementsTest.java @@ -31,11 +31,18 @@ public void setup() throws IOException{ String startDate = getCurrentDateFormatted(-3); String endDate = getCurrentDateFormatted(null); + // Define additional date for testing purposes, relative to startDate + String threeDaysFromStartDate = getCurrentDateFormatted(3); + _gtfs.putCalendars( 1, "start_date="+startDate, "end_date="+endDate ); + + // Insert calendar dates entries + _gtfs.putCalendarDates("sid0="+startDate+","+endDate+","+ + threeDaysFromStartDate); } @Test @@ -46,6 +53,8 @@ public void testRemoveCalendarForToday() throws IOException { removeOldCalendarStatements.run(_context, dao); // Verify that GtfsMutableRelationalDao object no longer contains any calendar entries after removing the calendar for today's date assertEquals(0,dao.getAllCalendars().size()); + // Verify that GtfsMutableRelationalDao object no longer contains any calendar dates entries after removing invalid dates, including today's date + assertEquals(0,dao.getAllCalendarDates().size()); } @Test @@ -55,6 +64,8 @@ public void testRemoveCalendar() throws IOException { removeOldCalendarStatements.run(_context, dao); // Verify that GtfsMutableRelationalDao object still contain the initially added calendar entry assertEquals(1,dao.getAllCalendars().size()); + // Verify that GtfsMutableRelationalDao object contains two calendar dates entries after removing invalid dates + assertEquals(2,dao.getAllCalendarDates().size()); } // Helper function to get today's date in the required format