Skip to content

Commit d0b6b2e

Browse files
committed
add optimise-services-and-exceptions βœ…πŸ“
part of #10
1 parent f77a344 commit d0b6b2e

File tree

6 files changed

+327
-5
lines changed

6 files changed

+327
-5
lines changed

β€Ždocs/api.md

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
- [`readPathways(readFile, filters)`](#readpathways)
1919
- [`readShapes(readFile, filters)`](#readshapes)
2020
- [`computeTrajectories(readFile, filters)`](#computetrajectories)
21+
- [`optimiseServicesAndExceptions(readFile, timezone, filters)`](#optimiseservicesandexceptions)
2122

2223

2324
## `readCsv`
@@ -764,3 +765,94 @@ for await (const trajectory of computeTrajectories(readFile, filters)) {
764765
```
765766

766767
*Note:* In order to work, `computeTrajectories` must load reduced forms of `trips.txt`, `stop_times.txt`, `frequencies.txt` and `shapes.txt` into memory. See [*store API*](#store-api) for more details.
768+
769+
770+
## `optimiseServicesAndExceptions`
771+
772+
A GTFS feed may have a set of `calendar.txt` and/or `calendar_dates.txt` rows that express service days in an overly verbose way. Some examples:
773+
774+
- feeds without `calendar.txt`, where every service day is expressed as a `exception_type=1` (added) exception – In many of such cases, we can reduce the number of exceptions by adding a row in `calendar.txt` with the respective day(s) turned on (e.g. `tuesday=1`).
775+
- feeds with `calendar.txt`, where some services have more `exception_type=2` (removed) exceptions than "regular" day-of-the-week-based service dates (e.g. `thursday=1`) – In this case, we can turn off the "regular" service dates (`thursday=0`) and use `exception_type=1` (added) exceptions.
776+
777+
For each service, **`optimiseServicesAndExceptions` computes the optimal combination of day of the week flags (e.g. `monday=1`) and exceptions, minimalising the number of exceptions necessary to express the set of service dates**.
778+
779+
```js
780+
const readCsv = require('gtfs-utils/read-csv')
781+
const optimiseServices = require('gtfs-utils/optimise-services-and-exceptions')
782+
783+
const readFile = name => readCsv('path/to/gtfs/' + name + '.txt')
784+
785+
const services = readServices(readFile, 'Europe/Berlin')
786+
for await (const [id, changed, service, exceptions] of services) {
787+
if (changed) {
788+
console.log(id, 'changed!')
789+
console.log('service:', service)
790+
console.log('exceptions:', exceptions)
791+
} else {
792+
console.log(id, 'unchanged!', id)
793+
}
794+
}
795+
```
796+
797+
`optimiseServicesAndExceptions(readFile, timezone, filters = {})` reads `calendar.txt` and `calendar_dates.txt`. It returns an [async iterable](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Symbol/asyncIterator) of `[serviceId, changed, service, exceptions]` entries.
798+
799+
- If `changed` is `true`,
800+
- the service's `calendar.txt` row or `calendar_dates.txt` rows (or both) have been optimised,
801+
- `service` contains the optimised service,
802+
- `exceptions` contains all `calendar_dates.txt` rows applying to the *optimised* service.
803+
- If `changed` is `false`,
804+
- the service cannot be optimised,
805+
- `service` contains the `calendar.txt` as it was before, or a mock service if there was none before,
806+
- `exceptions` contains the `calendar_dates.txt` rows as they were before.
807+
808+
The [test fixture](../test/fixtures/optimise-services-and-exceptions) contains three services (`more-exceptions-than-regular`, `more-regular-than-exceptions`, should-stay-unchanged), of which the first two can be optimised. With its files as input, the code above will print the following:
809+
810+
```
811+
more-exceptions-than-regular changed!
812+
service: {
813+
service_id: 'more-exceptions-than-regular',
814+
start_date: '20220301',
815+
end_date: '20220410',
816+
monday: '0',
817+
tuesday: '0',
818+
wednesday: '0',
819+
thursday: '0',
820+
friday: '0',
821+
saturday: '0',
822+
sunday: '0',
823+
}
824+
exceptions: [{
825+
service_id: 'more-exceptions-than-regular',
826+
date: '20220302',
827+
exception_type: '1',
828+
}, {
829+
service_id: 'more-exceptions-than-regular',
830+
date: '20220324',
831+
exception_type: '1',
832+
}, {
833+
service_id: 'more-exceptions-than-regular',
834+
date: '20220330',
835+
exception_type: '1',
836+
}, {
837+
service_id: 'more-exceptions-than-regular',
838+
date: '20220331',
839+
exception_type: '1',
840+
}]
841+
842+
more-regular-than-exceptions changed!
843+
service: {
844+
service_id: 'more-regular-than-exceptions',
845+
monday: '1',
846+
tuesday: '0',
847+
wednesday: '0',
848+
thursday: '0',
849+
friday: '1',
850+
saturday: '0',
851+
sunday: '0',
852+
start_date: '20220301',
853+
end_date: '20220410',
854+
}
855+
exceptions: []
856+
857+
should-stay-unchanged unchanged! should-stay-unchanged
858+
```
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
'use strict'
2+
3+
const {join: pathJoin} = require('path')
4+
const readCsv = require('../read-csv')
5+
const optimiseServicesAndExceptions = require('../optimise-services-and-exceptions')
6+
7+
const fixtureDir = pathJoin(__dirname, '..', 'test', 'fixtures', 'optimise-services-and-exceptions')
8+
const readFile = (file) => {
9+
return readCsv(pathJoin(fixtureDir, file + '.csv'))
10+
}
11+
12+
;(async () => {
13+
const optimisedSvcs = optimiseServicesAndExceptions(readFile, 'Europe/Berlin')
14+
for await (const [id, changed, service, exceptions] of optimisedSvcs) {
15+
if (changed) {
16+
console.log(id, 'changed!')
17+
console.log('service:', service)
18+
console.log('exceptions:', exceptions)
19+
} else {
20+
console.log(id, 'unchanged!', id)
21+
}
22+
}
23+
})()
24+
.catch((err) => {
25+
console.error(err)
26+
process.exit(1)
27+
})

β€Žlib/dates-between.js

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ const weekdayIndexes = [
3030

3131
const cache = new LRUCache({maxSize: 50})
3232

33-
const computeDatesBetween = (beginning, end, weekdays, timezone) => {
33+
const computeDatesBetween = (beginning, end, weekdays, timezone, weekdayMap = null) => {
3434
if (!isObj(weekdays)) throw new Error('weekdays must be an object.')
3535
weekdays = Object.assign(Object.create(null), noWeekdays, weekdays)
3636
for (let weekday in weekdays) {
@@ -50,6 +50,7 @@ const computeDatesBetween = (beginning, end, weekdays, timezone) => {
5050
weekdays.saturday,
5151
weekdays.sunday,
5252
timezone,
53+
weekdayMap !== null ? 'wd' : '',
5354
].join('-')
5455
if (cache.has(signature)) {
5556
return Array.from(cache.get(signature))
@@ -62,8 +63,14 @@ const computeDatesBetween = (beginning, end, weekdays, timezone) => {
6263
const dates = []
6364
let t = new Date(beginning + 'T00:00Z')
6465
for (let i = 0; t <= end; i++) {
65-
if (weekdays[weekdayIndexes[t.getUTCDay()]]) {
66-
dates.push(t.toISOString().slice(0, 10))
66+
const weekday = t.getUTCDay()
67+
if (weekdays[weekdayIndexes[weekday]]) {
68+
const date = t.toISOString().slice(0, 10)
69+
dates.push(date)
70+
71+
if (weekdayMap !== null) {
72+
weekdayMap.set(date, weekday)
73+
}
6774
}
6875
t.setUTCDate(t.getUTCDate() + 1)
6976
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
'use strict'
2+
3+
const readServicesAndExceptions = require('./read-services-and-exceptions')
4+
const datesBetween = require('./lib/dates-between')
5+
6+
const WEEKDAYS = [
7+
// JS Date ordering
8+
'sunday',
9+
'monday',
10+
'tuesday',
11+
'wednesday',
12+
'thursday',
13+
'friday',
14+
'saturday',
15+
]
16+
17+
const noWeekday = {
18+
monday: false,
19+
tuesday: false,
20+
wednesday: false,
21+
thursday: false,
22+
friday: false,
23+
saturday: false,
24+
sunday: false,
25+
}
26+
27+
const formatDate = isoDate => isoDate.split('-').join('')
28+
29+
const optimiseServicesAndExceptions = async function* (readFile, timezone, filters = {}, opt = {}) {
30+
const weekdaysMap = new Map()
31+
const svcsAndExceptions = readServicesAndExceptions(readFile, timezone, filters, {
32+
...opt,
33+
exposeStats: true,
34+
weekdaysMap,
35+
})
36+
37+
for await (let [serviceId, dates, svc, nrOfDates, removedDates] of svcsAndExceptions) {
38+
const nrOfDefaultDates = []
39+
for (let wd = 0; wd < WEEKDAYS.length; wd++) {
40+
const defaultDates = datesBetween(
41+
svc.start_date, svc.end_date,
42+
{...noWeekday, [WEEKDAYS[wd]]: true},
43+
timezone,
44+
weekdaysMap,
45+
)
46+
nrOfDefaultDates[wd] = defaultDates.length
47+
}
48+
49+
let changed = false
50+
svc = {...svc}
51+
for (let wd = 0; wd < 7; wd++) {
52+
// todo: make this customisable
53+
const flag = nrOfDates[wd] > nrOfDefaultDates[wd] / 2 | 0 ? '1' : '0'
54+
changed = changed || (flag !== svc[WEEKDAYS[wd]])
55+
svc[WEEKDAYS[wd]] = flag
56+
}
57+
58+
const exceptions = []
59+
for (const date of dates) {
60+
const wd = weekdaysMap.get(date)
61+
if (svc[WEEKDAYS[wd]] === '1') continue
62+
exceptions.push({
63+
service_id: serviceId,
64+
date: formatDate(date),
65+
exception_type: '1', // added
66+
})
67+
}
68+
69+
for (const date of removedDates) {
70+
const wd = weekdaysMap.get(date)
71+
if (svc[WEEKDAYS[wd]] === '0') continue
72+
exceptions.push({
73+
service_id: serviceId,
74+
date: formatDate(date),
75+
exception_type: '2', // removed
76+
})
77+
}
78+
79+
// todo [breaking]: remove serviceId (idx 0), move svc first,
80+
// follow read-services-and-exceptions here
81+
yield [serviceId, changed, svc, exceptions]
82+
}
83+
}
84+
85+
module.exports = optimiseServicesAndExceptions

β€Žread-services-and-exceptions.js

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@ const readServicesAndExceptions = async function* (readFile, timezone, filters =
3434
throw new TypeError('filters.serviceException must be a function')
3535
}
3636

37+
const {
38+
exposeStats,
39+
weekdaysMap,
40+
} = {
41+
exposeStats: false,
42+
weekdaysMap: new Map(),
43+
...opt,
44+
}
45+
3746
await new Promise(r => setTimeout(r, 0))
3847

3948
let servicesFileExists = true
@@ -83,9 +92,19 @@ const readServicesAndExceptions = async function* (readFile, timezone, filters =
8392
filterB: serviceExceptionFilter,
8493
})
8594

95+
const weekdayOf = (date) => {
96+
if (weekdaysMap.has(date)) return weekdaysMap.get(date)
97+
const weekday = new Date(date + 'T00:00Z').getDay()
98+
weekdaysMap.set(date, weekday)
99+
return weekday
100+
}
101+
86102
const {NONE} = joinIteratively
87103
let dates = []
88104
let svc = {service_id: NaN}
105+
// todo: default to null? perf?
106+
let nrOfDates = new Array(7).fill(0)
107+
let removedDates = []
89108

90109
for await (const [s, ex] of pairs) {
91110
let _svc = {service_id: NaN}
@@ -113,12 +132,13 @@ const readServicesAndExceptions = async function* (readFile, timezone, filters =
113132
if (dates.length > 0) {
114133
if (svc.start_date === null) svc.start_date = dates[0]
115134
if (svc.end_date === null) svc.end_date = dates[dates.length - 1]
116-
yield [svc.service_id, dates, svc]
135+
yield [svc.service_id, dates, svc, nrOfDates, removedDates]
117136
}
118137

119138
svc = _svc
120139

121140
if (s !== NONE) {
141+
const wdm = exposeStats ? weekdaysMap : null
122142
dates = datesBetween(
123143
s.start_date, s.end_date,
124144
{
@@ -131,10 +151,19 @@ const readServicesAndExceptions = async function* (readFile, timezone, filters =
131151
sunday: s.sunday === '1',
132152
},
133153
timezone,
154+
wdm,
134155
)
135156
} else {
136157
dates = []
137158
}
159+
160+
if (exposeStats) {
161+
nrOfDates = new Array(7).fill(0)
162+
for (const date of dates) {
163+
nrOfDates[weekdayOf(date)]++
164+
}
165+
removedDates = []
166+
}
138167
}
139168

140169
if (ex !== NONE) {
@@ -145,10 +174,17 @@ const readServicesAndExceptions = async function* (readFile, timezone, filters =
145174
const i = arrEq(dates, date)
146175
if (i >= 0) {
147176
dates.splice(i, 1) // delete
177+
if (exposeStats) {
178+
nrOfDates[weekdayOf(date)]--
179+
removedDates.push(date)
180+
}
148181
}
149182
} else if (ex.exception_type === ADDED) {
150183
if (!arrHas(dates, date)) {
151184
arrInsert(dates, date)
185+
if (exposeStats) {
186+
nrOfDates[weekdayOf(date)]++
187+
}
152188
}
153189
} // todo: else emit error
154190
}
@@ -157,7 +193,7 @@ const readServicesAndExceptions = async function* (readFile, timezone, filters =
157193
if (dates.length > 0) {
158194
if (svc.start_date === null) svc.start_date = dates[0]
159195
if (svc.end_date === null) svc.end_date = dates[dates.length - 1]
160-
yield [svc.service_id, dates, svc]
196+
yield [svc.service_id, dates, svc, nrOfDates, removedDates]
161197
}
162198
}
163199

0 commit comments

Comments
Β (0)