Skip to content

Commit

Permalink
working selected link analysis prototype
Browse files Browse the repository at this point in the history
add a custom modification of type select-link and run regional path csv
  • Loading branch information
abyrd committed Dec 31, 2023
1 parent 000d3d0 commit f1ab484
Show file tree
Hide file tree
Showing 17 changed files with 527 additions and 128 deletions.
8 changes: 5 additions & 3 deletions src/main/java/com/conveyal/gtfs/GTFSCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ public class GTFSCache implements Component {
// The following two caches hold spatial indexes of GTFS geometries for generating Mapbox vector tiles, one spatial
// index per feed keyed on BundleScopedFeedId. They could potentially be combined such that cache values are a
// compound type holding two indexes, or cache values are a single index containing a mix of different geometry
// types that are filtered on iteration. They could also be integreated into the GTFSFeed values of the main
// GTFSCache#cache. However GTFSFeed is already a very long class, and we may want to tune eviction parameters
// types that are filtered on iteration. They could also be integrated into the GTFSFeed values of the main
// GTFSCache#cache. However, GTFSFeed is already a very long class, and we may want to tune eviction parameters
// separately for GTFSFeed and these indexes. While GTFSFeeds are expected to incur constant memory use, the
// spatial indexes are potentially unlimited in size and we may want to evict them faster or limit their quantity.
// spatial indexes are potentially unlimited in size, so we may want to evict them faster or limit their quantity.
// We have decided to keep them as separate caches until we're certain of the chosen eviction tuning parameters.

/** A cache of spatial indexes of TripPattern shapes, keyed on the BundleScopedFeedId. */
Expand Down Expand Up @@ -127,6 +127,8 @@ public FileStorageKey getFileKey (String id, String extension) {
// The feedId of the GTFSFeed objects may not be unique - we can have multiple versions of the same feed
// covering different time periods, uploaded by different users. Therefore we record another ID here that is
// known to be unique across the whole application - the ID used to fetch the feed.
// NOTE as of 2023, this is no longer true. All uploaded feeds have assigned unique UUIDs so as far as I know
// they can't collide, we don't need this uniqueId field, and we may not even need bundle-scoped feed IDs.
feed.uniqueId = id;
return feed;
}
Expand Down
14 changes: 8 additions & 6 deletions src/main/java/com/conveyal/gtfs/GTFSFeed.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,18 @@ public class GTFSFeed implements Cloneable, Closeable {
/** The MapDB database handling persistence of Maps to a pair of disk files behind the scenes. */
private DB db;

/** An ID (sometimes declared by the feed itself) which may remain the same across successive feed versions. */
/**
* An ID (sometimes declared by the feed itself) which may remain the same across successive feed versions.
* In R5 as of 2023 this is always overwritten with a unique UUID to avoid problems with successive feed versions
* or edited/modified versions of the same feeds.
*/
public String feedId;

/**
* This field was merged in from the wrapper FeedSource. It is a unique identifier for this particular GTFS file.
* Successive versions of the data for the same operators, or even different copies of the same operator's data
* uploaded by different people, should have different uniqueIds.
* In practice this is mostly copied into WrappedGTFSEntity instances used in the Analysis GraphQL API.
* In R5 as of 2023, this field will contain the bundle-scoped feed ID used to fetch the feed object from the
* GTFSCache (but is not present on disk or before saving - only after it's been reloaded from a file by the cache).
*/
public transient String uniqueId; // set this to feedId until it is overwritten, to match FeedSource behavior
public transient String uniqueId;

// All tables below should be MapDB maps so the entire GTFSFeed is persistent and uses constant memory.

Expand Down
4 changes: 3 additions & 1 deletion src/main/java/com/conveyal/r5/analyst/Grid.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
Expand Down Expand Up @@ -170,7 +171,8 @@ public List<PixelWeight> getPixelWeights (Geometry geometry, boolean relativeToP

double area = geometry.getArea();
if (area < 1e-12) {
throw new IllegalArgumentException("Feature geometry is too small");
LOG.warn("Discarding feature. Its area is too small to serve as a denominator ({} square degrees).", area);
return Collections.EMPTY_LIST;
}

if (area > MAX_FEATURE_AREA_SQ_DEG) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,14 +498,6 @@ protected void handleOneRegionalTask (RegionalTask task) throws Throwable {
oneOriginResult = new OneOriginResult(null, new AccessibilityResult(task), null, null);
}

// Post-process the OneOriginResult to filter paths down to only those passing through the selected links.
// The set of routes and stop pairs concerned are precalculated and retained on per regional analysis.
// The first thing to do is specify the point of interest on the request. selectedLink: {lat, lon, radiusMeters}
// Without precomputing anything ... just do the geometric calculations every time. And memoize the results.
transportNetwork.transitLayer.tripPatterns.getFirst().shape;
transportNetwork.transitLayer.tripPatterns.getFirst().getHopGeometries();


// Accumulate accessibility results, which will be returned to the backend in batches.
// For most regional analyses, this is an accessibility indicator value for one of many origins,
// but for static sites the indicator value is not known, it is computed in the UI. We still want to return
Expand Down
45 changes: 45 additions & 0 deletions src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.conveyal.r5.analyst.StreetTimesAndModes;
import com.conveyal.r5.transit.TransitLayer;
import com.conveyal.r5.transit.TripPattern;
import com.conveyal.r5.transit.path.Path;
import com.conveyal.r5.transit.path.PatternSequence;
import com.conveyal.r5.transit.path.RouteSequence;
Expand All @@ -10,7 +11,11 @@
import gnu.trove.list.TIntList;
import gnu.trove.list.array.TIntArrayList;
import org.apache.commons.lang3.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.awt.*;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
Expand All @@ -32,6 +37,8 @@

public class PathResult {

private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

/**
* The maximum number of destinations for which we'll generate detailed path information in a single request.
* Detailed path information was added on to the original design, which returned a simple grid of travel times.
Expand All @@ -41,12 +48,14 @@ public class PathResult {
public static int maxDestinations = 5000;

private final int nDestinations;

/**
* Array with one entry per destination. Each entry is a map from a "path template" to the associated iteration
* details. For now, the path template is a route-based path ignoring per-iteration details such as wait time.
* With additional changes, patterns could be collapsed further to route combinations or modes.
*/
public final Multimap<RouteSequence, Iteration>[] iterationsForPathTemplates;

private final TransitLayer transitLayer;

public static String[] DATA_COLUMNS = new String[]{
Expand Down Expand Up @@ -83,6 +92,42 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) {
* pattern-based keys
*/
public void setTarget(int targetIndex, Multimap<PatternSequence, Iteration> patterns) {

// When selected link analysis is enabled, filter down the PatternSequences to include only those passing
// through the selected links.
// TODO Maybe selectedLink should be on TransitLayer, and somehow capture the number of filtered iterations.
if (transitLayer.parentNetwork.selectedLink != null) {
final SelectedLink selectedLink = transitLayer.parentNetwork.selectedLink;
Multimap<PatternSequence, Iteration> filteredPatterns = HashMultimap.create();
for (PatternSequence patternSequence : patterns.keySet()) {
// Why do we have some null patterns lists? Walk-only routes with no transit legs?
if (patternSequence.patterns == null) {
continue;
}
boolean retain = false;
// Iterate over the three parallel arrays containing TripPattern, board stop, and alight stop indexes.
for (int ride = 0; ride < patternSequence.patterns.size(); ride++) {
int pattern = patternSequence.patterns.get(ride);
int board = patternSequence.stopSequence.boardStops.get(ride);
int alight = patternSequence.stopSequence.alightStops.get(ride);
if (selectedLink.includes(pattern, board, alight)) {
retain = true;
// String routeId = transitLayer.tripPatterns.get(pattern).routeId;
// String boardStopName = transitLayer.stopNames.get(board);
// String alightStopName = transitLayer.stopNames.get(alight);
// LOG.info("Retaining {} from {} to {}", routeId, boardStopName, alightStopName);
break;
}
}
if (retain) {
Collection<Iteration> iterations = patterns.get(patternSequence);
filteredPatterns.putAll(patternSequence, iterations);
}
}
patterns = filteredPatterns;
}

// The rest of this runs independent of whether a SelectedLink filtered down the patterns-iterations map.
Multimap<RouteSequence, Iteration> routes = HashMultimap.create();
patterns.forEach(((patternSeq, iteration) -> routes.put(new RouteSequence(patternSeq, transitLayer), iteration)));
iterationsForPathTemplates[targetIndex] = routes;
Expand Down
140 changes: 106 additions & 34 deletions src/main/java/com/conveyal/r5/analyst/cluster/SelectedLink.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,27 @@
import com.conveyal.r5.transit.TransitLayer;
import com.conveyal.r5.transit.TransportNetworkCache;
import com.conveyal.r5.transit.TripPattern;
import com.conveyal.r5.util.TIntIntHashMultimap;
import com.conveyal.r5.util.TIntIntMultimap;
import gnu.trove.TIntCollection;
import gnu.trove.set.TIntSet;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.LineString;
import org.locationtech.jts.geom.Polygon;

import java.util.List;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import static com.conveyal.r5.common.GeometryUtils.envelopeForCircle;
import static com.conveyal.r5.common.GeometryUtils.polygonForEnvelope;

/**
* For Selected Link Analysis.
* This object caches a collection of every segment of every pattern that passes through a certain polygon.
* It also provides methods for quickly checking whether any leg of a public transit trip overlaps these selected segments.
*
* Implementation considerations follow:
*
* Simplifications:
* Assumes all trips on the same pattern have the same geometry.
Expand All @@ -27,49 +41,107 @@
* Do the workers have access to the GTFS files or not?
* WorkerComponents has a TransportNetworkCache which is injected into the AnalysisWorker constructor. This is the only
* path to access a GtfsCache which is private, so we need a method on TransportNetworkCache.
* The full path to the GtfsCache is: AnalysisWorker.networkPreloader.transportNetworkCache.gtfsCache.
*
* The best way to easily get the intended behavior is probably to create a new modification type.
* This provides a mechanism for attaching things to a network, at a point where we may still have access to the gtfs.
* And ensure that the network with extra information is properly cached for similar requests.
* We can't attach it to the raw TransportNetwork, because then the first request for that network would always need
* to be one with the selected-link behavior specified. We need to be able to apply it to a network that was loaded
* without the selected link. Or we could treat the network as mutable and write to it, which is not clean but would
* get the job done.
*
* Well, TransportNetworkCache#getNetworkForScenario is where we always apply scenarios in the worker, and that class
* has direct access to the GtfsCache.
*
* The SelectedLink instance will need to be stored/referenced:
* - Somewhere that is reachable from inside PathResult.setTarget or PathResult.summarizeIterations
* - Somewhere that is correctly scoped to where the selected-link filtering is specified (request/task or scenario)
* - Somewhere that is writable in the places where we have access to the gtfsCache
* - Somewhere that is PERSISTENT across requests - this is inherently the case for TransportNetwork but for Task we'd
* need to introduce another cache. The problem being that the base TransportNetwork's scope is too wide (could be
* used in requests with or without the SelectedLink), so it needs to be a modification on a specific scenario.
*
* PathResult is constructed with a Task and a TransitLayer. It retains only the TransitLayer but could retain both.
* In AnalysisWorker.handleAndSerializeOneSinglePointTask we still have the task context, but deeper on the stack in
* networkPreloader and then transportNetworkCache (which has gtfsCache), we have the scenario but not the task. But
* then once you go deeper into applying the scenario modifications, the gtfsCache is no longer visible.
* Anyway this doesn't feel like a modification. It feels like a parameter to the CSV path output in the task.
* The AnalysisWorker could have a Map from SelectionBox to SelectedLink (fuzzy matching keys... ugh... linear scan
* maybe).
* Also we need to tie items in the TransportNetwork to the GTFS... it feels like this should be on the TransportNetwork
* of a scenario.
* What if we scan over the incoming modifications and inject the GtfsCache onto a transient field of any
* SelectedLinkModification present?
* Basically: getting this into the Scenario makes it auto-retained, gives it a stable identity so we don't need to
* fuzzy-match it in the task to cache. That could also be done by uploading a geometry file with an ID, but ugh.
* In the future it would make sense to treat all lat/lon effectively integers (fixed-point) since it simplifies this
* kind of keying and matching.
*
* Alternatively to all this we could switch on the storage of GTFS route shapes on the network file. Then the
* modification could be applied normally without injecting a GtfsCache.
*
* Additional problem:
* The gtfsCache feed IDs (gtfs file names) are bundle-scoped but the ones in the TripPatterns are not.
* TransportNetworks and TransitLayers apparently do not retain their bundle ID. In any case they can have multiple
* feeds originally uploaded with different bundles.
* TransitLayer.feedChecksums keys are the same feed IDs prefixing TripPattern.routeId, which are the gtfsFeed.feedId,
* which is not bundle-scoped so can't be used to get a feed from gtfsCache.
*
* A network is always based on one bundle with the same ID, but the bundle config can also reference GTFS with a
* different bundle scope (originally uploaded for another bundle). So knowing the network ID is not sufficient.
*
* Based on GtfsController.bundleScopedFeedIdFromRequest, the bundleScopedFeedId is feedId_feedGroupId. So they're no
* longer based on the bundle/network ID, but the feed group.
* It seems like we wouldn't need these scopes at all since all feeds now have unique IDs.
*
* When we make the TransportNetwork from these bundles, it's always on a worker, based on the bundle's
* TransportNetworkConfig JSON file. This is in TransportNetworkCache.buildNetworkFromConfig().
* At first it looks like the bundleScopedId is completely lost after we go through the loading process.
* But GtfsCache.get(String id) stores that key id in feed.uniqueId. That field is never read (or written) anywhere else.
* This means they're available during network creation to be retained in the TransportNetwork... but aren't retained.
* I think the only place we can get these bundle scoped feed IDs is from the TransportNetworkConfig JSON file.
* Perhaps that should be serialized into the TransportNetwork itself (check risk of serializing used Modifications).
* But in the meantime TNCache has a method to load that configuration.
*/
public class SelectedLink {

public SelectedLink (TransportNetworkCache transportNetworkCache, SelectionBox box) {
for (TripPattern pattern : transit.tripPatterns) {
for (LineString hopGeoms : pattern.getHopGeometries(transit)) {

}
}
}
/**
* Contains all TripPattern inter-stop hops that pass through the selected link area for fast hash-based lookup.
* Keys are the index of a TripPattern in the TransitLayer, and values are the stop indexes in the TransitLayer.
* They are coded this way to match how they're coded in PatternSequence and minimize conversions in tight loops.
* A hop from stop A to stop B on pattern X is recorded as the mapping X -> A. Note: This is ambiguous if the stop
* appears more than once in the pattern, but PatternSequence does not seem to allow otherwise.
*/
private final TIntIntMultimap hopsInTripPattern;

public SelectedLink (SelectionBox box, TransitLayer transit) {
for (TripPattern pattern : transit.tripPatterns) {
for (LineString hopGeoms : pattern.getHopGeometries(transit)) {
// FIXME clean up or remove these notes.
// Post-process the OneOriginResult to filter paths down to only those passing through the selected links.
// The set of routes and stop pairs concerned are precalculated and retained on per regional analysis.
// The first thing to do is specify the point of interest on the request. selectedLink: {lat, lon, radiusMeters}
// Without precomputing anything ... just do the geometric calculations every time. And memoize the results.

}
}
public SelectedLink(TIntIntMultimap hopsInTripPattern) {
this.hopsInTripPattern = hopsInTripPattern;
}

/**
* An alternate way of specifying a bounding box where there is a central point of interest and a margin of error
* around it. Some points at the corners of the bounding box are farther away than the radius (which is the radius
* of a circle inscribed in the bounding box).
* For a given transit ride from a boardStop to an alightStop on a TripPattern, return whether that ride
* passes through this SelectedLink area.
*/
public static class SelectionBox {
double lon;
double lat;
double radiusMeters;
public Envelope toEnvelope () {
Envelope env = new Envelope();
env.expandToInclude(lon, lat);
env.expandBy(radiusMeters); // FIXME convert to lon and lat degrees
public boolean includes (int tripPattern, int boardStop, int alightStop) {
TIntCollection hops = hopsInTripPattern.get(tripPattern);
if (hops.isEmpty()) {
return false;
}
}

/**
* Uniquely identifies a segment between two subsequent stops on a TripPattern.
* This allows us to record in advance which segments pass through the link selection box.
*/
public static class TripPatternSegment {
TripPattern tripPattern;
int tripPatternIndex; // The integer ID of this tripPattern as a Raptor "route" in R5 routing.
int fromStopIndex; // Not the GTFS stop sequence number, the internal R5 index within the pattern.
for (int hop : hops.toArray()) {
// Hops are identified with the stop index at their beginning so alightStop is exclusive.
// (Alighting at a stop does not ride over the hop identified with that stop index.)
if (hop >= boardStop && hop < alightStop) {
return true;
}
}
return false;
}

}
Loading

0 comments on commit f1ab484

Please sign in to comment.