Skip to content

Commit a440f0c

Browse files
committed
Merge branch 'dev' into chicago-rta-fares
2 parents bbdc271 + e857fcd commit a440f0c

File tree

64 files changed

+1890
-1137
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1890
-1137
lines changed

.github/workflows/codeql-analysis.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ jobs:
4343
- name: Checkout repository
4444
uses: actions/checkout@v3
4545

46+
# Java setup step completes very fast, no need to run in a preconfigured docker container.
47+
# CodeQL is intended to detect any Java toolchains added to the execution environment.
48+
- name: Set up JDK 21
49+
uses: actions/setup-java@v3
50+
with:
51+
distribution: corretto
52+
java-version: 21
53+
cache: gradle
54+
4655
# Initializes the CodeQL tools for scanning.
4756
- name: Initialize CodeQL
4857
uses: github/codeql-action/init@v2

.github/workflows/cypress-integration.yml

Lines changed: 0 additions & 54 deletions
This file was deleted.

.github/workflows/gradle.yml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,25 @@ jobs:
2828
# BUILD_TARGET:staging
2929
steps:
3030
# Starting in v2.2 checkout action fetches all tags when fetch-depth=0, for auto-versioning.
31-
- uses: actions/checkout@v2.3.2
31+
- uses: actions/checkout@v3
3232
with:
3333
fetch-depth: 0
3434
# Java setup step completes very fast, no need to run in a preconfigured docker container.
35-
- name: Set up JDK 11
35+
- name: Set up JDK 21
3636
uses: actions/setup-java@v3
3737
with:
38-
java-version: 11
39-
distribution: temurin
40-
cache: 'gradle'
38+
distribution: corretto
39+
java-version: 21
40+
cache: gradle
4141
- name: Show version string
4242
run: gradle -q printVersion | head -n1
43-
- name: Build and Test
43+
- name: Build and test
4444
run: gradle build
45-
- name: Ensure shadow JAR is runnable as local backend
45+
# Check that build product is able to start up as a backend server before handing it to end-to-end testing
46+
- name: Ensure backend runnable
4647
run: |
4748
cp analysis.properties.template analysis.properties
48-
gradle testShadowJarRunnable
49+
gradle testRunnable -x test
4950
- name: Publish to GH Packages
5051
# Supply access token to build.gradle (used in publishing.repositories.maven.credentials)
5152
env:

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2020 Conveyal
3+
Copyright (c) 2020-2023 Conveyal
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

build.gradle

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
plugins {
22
id 'java'
3-
id 'com.github.johnrengelman.shadow' version '8.1.0'
3+
id 'application'
44
id 'maven-publish'
55
id 'com.palantir.git-version' version '2.0.0'
66
}
@@ -10,18 +10,19 @@ group = 'com.conveyal'
1010
version gitVersion()
1111

1212
java {
13-
sourceCompatibility = JavaVersion.VERSION_11
14-
targetCompatibility = JavaVersion.VERSION_11
13+
toolchain {
14+
languageVersion.set(JavaLanguageVersion.of(21))
15+
}
1516
}
1617

1718
jar {
18-
// For Java 11 Modules, specify a module name.
19+
// For Java 11+ Modules, specify a module name.
1920
// Do not create module-info.java until all our dependencies specify a module name.
2021
// Main-Class BackendMain will start a local backend.
2122
// Build-Jdk-Spec mimics a Maven manifest entry that helps us automatically install the right JVM.
2223
// Implementation-X attributes are needed for ImageIO (used by Geotools) to initialize in some environments.
2324
manifest {
24-
attributes 'Automatic-Module-Name': 'com.conveyal.analysis',
25+
attributes 'Automatic-Module-Name': 'com.conveyal.r5',
2526
'Main-Class': 'com.conveyal.analysis.BackendMain',
2627
'Build-Jdk-Spec': targetCompatibility.getMajorVersion(),
2728
'Implementation-Title': 'Conveyal Analysis Backend',
@@ -30,18 +31,15 @@ jar {
3031
}
3132
}
3233

33-
shadowJar {
34-
mergeServiceFiles()
35-
}
36-
37-
// Allow reflective access by Kryo to normally closed Java internals.
38-
// This is used for testing equality, but also for building automatic Kryo (de)serializers.
34+
// Allow reflective access by ObjectDiffer to normally closed Java internals. Used for round-trip testing serialization.
35+
// IntelliJ seems not to pass these JVM arguments when running tests from within the IDE, so the Kryo serialization
36+
// tests may only succeed under command line Gradle.
3937
test {
38+
useJUnitPlatform()
4039
jvmArgs = ['--add-opens=java.base/java.io=ALL-UNNAMED',
4140
'--add-opens=java.base/java.time=ALL-UNNAMED',
4241
'--add-opens=java.base/java.time.zone=ALL-UNNAMED',
4342
'--add-opens=java.base/java.lang=ALL-UNNAMED']
44-
useJUnitPlatform()
4543
}
4644

4745
// `gradle publish` will upload both shadow and simple JAR to Github Packages
@@ -80,19 +78,25 @@ task copyDependencies(type: Copy) {
8078
into 'dependencies'
8179
}
8280

81+
application {
82+
applicationDefaultJvmArgs = ['-Xmx6G']
83+
mainClass = 'com.conveyal.analysis.BackendMain'
84+
}
85+
8386
// Run R5 as a local analysis backend with all dependencies on the classpath, without building a shadowJar.
8487
task runBackend (type: JavaExec) {
85-
dependsOn(build)
86-
classpath(sourceSets.main.runtimeClasspath)
87-
mainClass = 'com.conveyal.analysis.BackendMain'
88+
dependsOn(build)
89+
maxHeapSize('7G')
90+
classpath(sourceSets.main.runtimeClasspath)
91+
mainClass = 'com.conveyal.analysis.BackendMain'
8892
}
8993

90-
// Start up an analysis local backend from a shaded JAR and ask it to shut down immediately.
91-
// This is used to check in the automated build that the JAR is usable before we keep it.
94+
// Start up an analysis local backend and ask it to shut down immediately.
95+
// This is used to check in the automated build that the JAR is usable in end-to-end tests before we keep it.
9296
// Create a configuration properties file (by copying the template) before running this task.
93-
task testShadowJarRunnable(type: JavaExec) {
94-
dependsOn(shadowJar)
95-
classpath(shadowJar.archiveFile.get())
97+
task testRunnable(type: JavaExec) {
98+
dependsOn(build)
99+
classpath(sourceSets.main.runtimeClasspath)
96100
mainClass = 'com.conveyal.analysis.BackendMain'
97101
jvmArgs("-Dconveyal.immediate.shutdown=true")
98102
}
@@ -116,6 +120,12 @@ task createVersionProperties(dependsOn: processResources) {
116120
}
117121
}
118122

123+
// Fix inconsistent Gradle behavior (see https://github.com/gradle/gradle/issues/16791)
124+
// By default JavaExec tasks use the JVM Gradle was launched with, ignoring the project-level toolchain.
125+
tasks.withType(JavaExec).configureEach {
126+
javaLauncher.set(javaToolchains.launcherFor(java.toolchain))
127+
}
128+
119129
classes {
120130
dependsOn createVersionProperties
121131
}
@@ -136,10 +146,10 @@ configurations.all {
136146

137147
dependencies {
138148
// Provides our logging API
139-
implementation 'org.slf4j:slf4j-api:1.7.30'
149+
implementation 'org.slf4j:slf4j-api:2.0.7'
140150

141151
// Implementation of the logging API
142-
implementation 'ch.qos.logback:logback-classic:1.2.3'
152+
implementation 'ch.qos.logback:logback-classic:1.4.11'
143153

144154
// Spark is an HTTP framework built on Jetty. Its name is the same as several other projects.
145155
implementation (group: 'com.sparkjava', name: 'spark-core', version: '2.7.2') {
@@ -195,9 +205,6 @@ dependencies {
195205
// Commons IO gives us BOMInputStream for handling UTF-8 Byte Order Marks.
196206
implementation 'commons-io:commons-io:2.6'
197207

198-
// Guava provides a lot of functionality, collections, and tools "missing" from the Java standard library.
199-
implementation 'com.google.guava:guava:28.2-jre'
200-
201208
// Java 8 rewrite of the Guava cache with asynchronous LoadingCaches. We don't currently use the async
202209
// capabilities, but Caffeine's LoadingCache syntax is more modern idiomatic Java than Guava's.
203210
implementation 'com.github.ben-manes.caffeine:caffeine:2.8.1'
@@ -214,15 +221,19 @@ dependencies {
214221
// Commons Math gives us FastMath, MersenneTwister, and low-discrepancy vector generators.
215222
implementation 'org.apache.commons:commons-math3:3.0'
216223

217-
// Provides some shared serializers for Kryo. Introduces transitive dependencies on Guava, Trove, and Kryo.
224+
// Provides some Kryo serializers for Guava and Trove collecitons.
218225
// Also provides classes for testing that a round trip through serialization reproduces the same network.
219226
// This is an external dependency (not merged into backend) because it's also used by OTP2.
220-
// TODO arguably we should declare non-transitive dependencies on Guava, Trove, and Kryo since we use them directly
221-
implementation 'com.conveyal:kryo-tools:1.3.0'
227+
implementation 'com.conveyal:kryo-tools:1.6.0'
222228

229+
// Ensure the versions of the next three dependencies match the transitive dependencies of kryo-tools.
230+
implementation 'com.esotericsoftware:kryo:5.5.0'
231+
// Guava provides a lot of functionality, collections, and tools "missing" from the Java standard library.
232+
implementation 'com.google.guava:guava:32.1.2-jre'
223233
// Trove supplies very efficient collections of primitive data types for Java.
224234
implementation 'net.sf.trove4j:trove4j:3.0.3'
225235

236+
226237
// TODO eliminate custom Conveyal geojson library, use Geotools?
227238
implementation 'com.conveyal:jackson2-geojson:0.9'
228239

@@ -245,8 +256,7 @@ dependencies {
245256
////// Test-only dependencies //////
246257

247258
// Java unit testing framework.
248-
testImplementation(platform('org.junit:junit-bom:5.7.0'))
249-
testImplementation('org.junit.jupiter:junit-jupiter')
259+
testImplementation('org.junit.jupiter:junit-jupiter:5.9.2')
250260

251261
// Chart drawing library for examining travel time distributions when crafting tests.
252262
// Although rarely used it should be low-impact: it is a test-only dependency with no transitive dependenices.

src/main/java/com/conveyal/analysis/components/broker/Broker.java

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -48,36 +48,41 @@
4848
/**
4949
* This class distributes the tasks making up regional jobs to workers.
5050
* <p>
51-
* It should aim to draw tasks fairly from all organizations, and fairly from all jobs within each
52-
* organization, while attempting to respect the transport network affinity of each worker, giving
53-
* the worker tasks that require the same network it has been using recently.
51+
* It respects the declared transport network affinity of each worker, giving the worker tasks that
52+
* relate to the same network it has been using recently, and is therefore already loaded in memory.
5453
* <p>
55-
* Previously workers long-polled for work, holding lots of connections open. Now they short-poll
56-
* and sleep for a while if there's no work. This is simpler and allows us to work withing much more
57-
* standard HTTP frameworks.
54+
* In our initial design, workers long-polled for work, holding lots of connections open. This was
55+
* soon revised to short-poll and sleep for a while when there's no work. This was found to be
56+
* simpler, allowing use of standard HTTP frameworks instead of custom networking code.
5857
* <p>
59-
* The fact that workers continuously re-poll for work every 10-30 seconds serves as a signal to the
60-
* broker that they are still alive and waiting. This also allows the broker to maintain a catalog
61-
* of active workers.
58+
* Issue conveyal/r5#596 arose because we'd only poll when a worker was running low on tasks or
59+
* idling. A worker could disappear for a long time, leaving the backend to assume it had shut down
60+
* or crashed. Workers were revised to poll more frequently even when they were busy and didn't
61+
* need any new tasks to work on, providing a signal to the broker that they are still alive and
62+
* functioning. This allows the broker to maintain a more accurate catalog of active workers.
6263
* <p>
63-
* Because (at least currently) two organizations never share the same graph, we can get by with
64-
* pulling tasks cyclically or randomly from all the jobs, and actively shape the number of workers
65-
* with affinity for each graph by forcing some of them to accept tasks on graphs other than the one
66-
* they have declared affinity for.
64+
* Most methods on this class are synchronized because they can be called from many HTTP handler
65+
* threads at once (when many workers are polling at once). We should occasionally evaluate whether
66+
* synchronizing all methods to make this threadsafe is a performance issue. If so, fine-grained
67+
* locking may be advantageous, but as a rule it is much harder to design, test, and maintain.
6768
* <p>
68-
* This could be thought of as "affinity homeostasis". We will constantly keep track of the ideal
69+
* Workers were originally intended to migrate from one network to another to handle subsequent jobs
70+
* without waiting for more cloud compute instances to start up. In practice we currently assign
71+
* each worker a single network, but the balance of workers assigned to each network and the reuse
72+
* of workers could in principle be made more sophisticated. The remainder of the comments below
73+
* provide context for how this could be refined or improved.
74+
*
75+
* Because (at least currently) two organizations never share the same graph, we could get by with
76+
* pulling tasks cyclically or randomly from all the jobs, and could actively shape the number of
77+
* workers with affinity for each graph by forcing some of them to accept tasks on graphs other than
78+
* the one they have declared affinity for. If the pool of workers was allowed to grow very large,
79+
* we could aim to draw tasks fairly from all organizations, and fairly from all jobs within each
80+
* organization.
81+
* <p>
82+
* We have described this approach as "affinity homeostasis": constantly keep track of the ideal
6983
* proportion of workers by graph (based on active jobs), and the true proportion of consumers by
7084
* graph (based on incoming polling), then we can decide when a worker's graph affinity should be
71-
* ignored and what it should be forced to.
72-
* <p>
73-
* It may also be helpful to mark jobs every time they are skipped in the LRU queue. Each time a job
74-
* is serviced, it is taken out of the queue and put at its end. Jobs that have not been serviced
75-
* float to the top.
76-
* <p>
77-
* Most methods on this class are synchronized, because they can be called from many HTTP handler
78-
* threads at once.
79-
*
80-
* TODO evaluate whether synchronizing all methods to make this threadsafe is a performance issue.
85+
* ignored and what graph it should be forced to.
8186
*/
8287
public class Broker implements Component {
8388

@@ -100,7 +105,15 @@ public interface Config {
100105
private final ListMultimap<WorkerCategory, Job> jobs =
101106
MultimapBuilder.hashKeys().arrayListValues().build();
102107

103-
/** The most tasks to deliver to a worker at a time. */
108+
/**
109+
* The most tasks to deliver to a worker at a time. Workers may request less tasks than this, and the broker should
110+
* never send more than the minimum of the two values. 50 tasks gives response bodies of about 65kB. If this value
111+
* is too high, all remaining tasks in a job could be distributed to a single worker leaving none for the other
112+
* workers, creating a slow-joiner problem especially if the tasks are complicated and slow to complete.
113+
*
114+
* The value should eventually be tuned. The current value of 16 is just the value used by the previous sporadic
115+
* polling system (WorkerStatus.LEGACY_WORKER_MAX_TASKS) which may not be ideal but is known to work.
116+
*/
104117
public final int MAX_TASKS_PER_WORKER = 16;
105118

106119
/**
@@ -317,9 +330,13 @@ public void createWorkersInCategory (WorkerCategory category, WorkerTags workerT
317330

318331
/**
319332
* Attempt to find some tasks that match what a worker is requesting.
320-
* Always returns a list, which may be empty if there is nothing to deliver.
333+
* Always returns a non-null List, which may be empty if there is nothing to deliver.
334+
* Number of tasks in the list is strictly limited to maxTasksRequested.
321335
*/
322-
public synchronized List<RegionalTask> getSomeWork (WorkerCategory workerCategory) {
336+
public synchronized List<RegionalTask> getSomeWork (WorkerCategory workerCategory, int maxTasksRequested) {
337+
if (maxTasksRequested <= 0) {
338+
return Collections.EMPTY_LIST;
339+
}
323340
Job job;
324341
if (config.offline()) {
325342
// Working in offline mode; get tasks from the first job that has any tasks to deliver.
@@ -335,7 +352,10 @@ public synchronized List<RegionalTask> getSomeWork (WorkerCategory workerCategor
335352
return Collections.EMPTY_LIST;
336353
}
337354
// Return up to N tasks that are waiting to be processed.
338-
return job.generateSomeTasksToDeliver(MAX_TASKS_PER_WORKER);
355+
if (maxTasksRequested > MAX_TASKS_PER_WORKER) {
356+
maxTasksRequested = MAX_TASKS_PER_WORKER;
357+
}
358+
return job.generateSomeTasksToDeliver(maxTasksRequested);
339359
}
340360

341361
/**

src/main/java/com/conveyal/analysis/components/broker/Job.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ public boolean isErrored () {
177177
/**
178178
* @param maxTasks the maximum number of tasks to return.
179179
* @return some tasks that are not yet marked as completed and have not yet been delivered in
180-
* this delivery pass.
180+
* this delivery pass. May return an empty list, but never null.
181181
*/
182182
public List<RegionalTask> generateSomeTasksToDeliver (int maxTasks) {
183183
List<RegionalTask> tasks = new ArrayList<>(maxTasks);

0 commit comments

Comments
 (0)