working selected link analysis prototype

add a custom modification of type select-link and run regional path csv
conveyal · Dec 31, 2023 · f1ab484 · f1ab484
1 parent 000d3d0
commit f1ab484
Show file tree

Hide file tree

Showing 17 changed files with 527 additions and 128 deletions.
diff --git a/src/main/java/com/conveyal/gtfs/GTFSCache.java b/src/main/java/com/conveyal/gtfs/GTFSCache.java
@@ -48,10 +48,10 @@ public class GTFSCache implements Component {
     // The following two caches hold spatial indexes of GTFS geometries for generating Mapbox vector tiles, one spatial
     // index per feed keyed on BundleScopedFeedId. They could potentially be combined such that cache values are a
     // compound type holding two indexes, or cache values are a single index containing a mix of different geometry
-    // types that are filtered on iteration. They could also be integreated into the GTFSFeed values of the main
-    // GTFSCache#cache. However GTFSFeed is already a very long class, and we may want to tune eviction parameters
+    // types that are filtered on iteration. They could also be integrated into the GTFSFeed values of the main
+    // GTFSCache#cache. However, GTFSFeed is already a very long class, and we may want to tune eviction parameters
     // separately for GTFSFeed and these indexes. While GTFSFeeds are expected to incur constant memory use, the
-    // spatial indexes are potentially unlimited in size and we may want to evict them faster or limit their quantity.
+    // spatial indexes are potentially unlimited in size, so we may want to evict them faster or limit their quantity.
     // We have decided to keep them as separate caches until we're certain of the chosen eviction tuning parameters.
 
     /** A cache of spatial indexes of TripPattern shapes, keyed on the BundleScopedFeedId. */
@@ -127,6 +127,8 @@ public FileStorageKey getFileKey (String id, String extension) {
         // The feedId of the GTFSFeed objects may not be unique - we can have multiple versions of the same feed
         // covering different time periods, uploaded by different users. Therefore we record another ID here that is
         // known to be unique across the whole application - the ID used to fetch the feed.
+        // NOTE as of 2023, this is no longer true. All uploaded feeds have assigned unique UUIDs so as far as I know
+        // they can't collide, we don't need this uniqueId field, and we may not even need bundle-scoped feed IDs.
         feed.uniqueId = id;
         return feed;
     }

diff --git a/src/main/java/com/conveyal/gtfs/GTFSFeed.java b/src/main/java/com/conveyal/gtfs/GTFSFeed.java
@@ -85,16 +85,18 @@ public class GTFSFeed implements Cloneable, Closeable {
     /** The MapDB database handling persistence of Maps to a pair of disk files behind the scenes. */
     private DB db;
 
-    /** An ID (sometimes declared by the feed itself) which may remain the same across successive feed versions. */
+    /**
+     *  An ID (sometimes declared by the feed itself) which may remain the same across successive feed versions.
+     *  In R5 as of 2023 this is always overwritten with a unique UUID to avoid problems with successive feed versions
+     *  or edited/modified versions of the same feeds.
+     */
     public String feedId;
 
     /**
-     * This field was merged in from the wrapper FeedSource. It is a unique identifier for this particular GTFS file.
-     * Successive versions of the data for the same operators, or even different copies of the same operator's data
-     * uploaded by different people, should have different uniqueIds.
-     * In practice this is mostly copied into WrappedGTFSEntity instances used in the Analysis GraphQL API.
+     * In R5 as of 2023, this field will contain the bundle-scoped feed ID used to fetch the feed object from the
+     * GTFSCache (but is not present on disk or before saving - only after it's been reloaded from a file by the cache).
      */
-    public transient String uniqueId; // set this to feedId until it is overwritten, to match FeedSource behavior
+    public transient String uniqueId;
 
     // All tables below should be MapDB maps so the entire GTFSFeed is persistent and uses constant memory.
 

diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java
@@ -53,6 +53,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -170,7 +171,8 @@ public List<PixelWeight> getPixelWeights (Geometry geometry, boolean relativeToP
 
         double area = geometry.getArea();
         if (area < 1e-12) {
-            throw new IllegalArgumentException("Feature geometry is too small");
+            LOG.warn("Discarding feature. Its area is too small to serve as a denominator ({} square degrees).", area);
+            return Collections.EMPTY_LIST;
         }
 
         if (area > MAX_FEATURE_AREA_SQ_DEG) {

diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorker.java b/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorker.java
@@ -498,14 +498,6 @@ protected void handleOneRegionalTask (RegionalTask task) throws Throwable {
             oneOriginResult = new OneOriginResult(null, new AccessibilityResult(task), null, null);
         }
 
-        // Post-process the OneOriginResult to filter paths down to only those passing through the selected links.
-        // The set of routes and stop pairs concerned are precalculated and retained on per regional analysis.
-        // The first thing to do is specify the point of interest on the request. selectedLink: {lat, lon, radiusMeters}
-        // Without precomputing anything ... just do the geometric calculations every time. And memoize the results.
-        transportNetwork.transitLayer.tripPatterns.getFirst().shape;
-        transportNetwork.transitLayer.tripPatterns.getFirst().getHopGeometries();
-
-
         // Accumulate accessibility results, which will be returned to the backend in batches.
         // For most regional analyses, this is an accessibility indicator value for one of many origins,
         // but for static sites the indicator value is not known, it is computed in the UI. We still want to return

diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java
@@ -2,6 +2,7 @@
 
 import com.conveyal.r5.analyst.StreetTimesAndModes;
 import com.conveyal.r5.transit.TransitLayer;
+import com.conveyal.r5.transit.TripPattern;
 import com.conveyal.r5.transit.path.Path;
 import com.conveyal.r5.transit.path.PatternSequence;
 import com.conveyal.r5.transit.path.RouteSequence;
@@ -10,7 +11,11 @@
 import gnu.trove.list.TIntList;
 import gnu.trove.list.array.TIntArrayList;
 import org.apache.commons.lang3.ArrayUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import java.awt.*;
+import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Comparator;
@@ -32,6 +37,8 @@
 
 public class PathResult {
 
+    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
     /**
      * The maximum number of destinations for which we'll generate detailed path information in a single request.
      * Detailed path information was added on to the original design, which returned a simple grid of travel times.
@@ -41,12 +48,14 @@ public class PathResult {
     public static int maxDestinations = 5000;
 
     private final int nDestinations;
+
     /**
      * Array with one entry per destination. Each entry is a map from a "path template" to the associated iteration
      * details. For now, the path template is a route-based path ignoring per-iteration details such as wait time.
      * With additional changes, patterns could be collapsed further to route combinations or modes.
      */
     public final Multimap<RouteSequence, Iteration>[] iterationsForPathTemplates;
+
     private final TransitLayer transitLayer;
 
     public static String[] DATA_COLUMNS = new String[]{
@@ -83,6 +92,42 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) {
      * pattern-based keys
      */
     public void setTarget(int targetIndex, Multimap<PatternSequence, Iteration> patterns) {
+
+        // When selected link analysis is enabled, filter down the PatternSequences to include only those passing
+        // through the selected links.
+        // TODO Maybe selectedLink should be on TransitLayer, and somehow capture the number of filtered iterations.
+        if (transitLayer.parentNetwork.selectedLink != null) {
+            final SelectedLink selectedLink = transitLayer.parentNetwork.selectedLink;
+            Multimap<PatternSequence, Iteration> filteredPatterns = HashMultimap.create();
+            for (PatternSequence patternSequence : patterns.keySet()) {
+                // Why do we have some null patterns lists? Walk-only routes with no transit legs?
+                if (patternSequence.patterns == null) {
+                    continue;
+                }
+                boolean retain = false;
+                // Iterate over the three parallel arrays containing TripPattern, board stop, and alight stop indexes.
+                for (int ride = 0; ride < patternSequence.patterns.size(); ride++) {
+                    int pattern = patternSequence.patterns.get(ride);
+                    int board = patternSequence.stopSequence.boardStops.get(ride);
+                    int alight = patternSequence.stopSequence.alightStops.get(ride);
+                    if (selectedLink.includes(pattern, board, alight)) {
+                        retain = true;
+                        // String routeId = transitLayer.tripPatterns.get(pattern).routeId;
+                        // String boardStopName  = transitLayer.stopNames.get(board);
+                        // String alightStopName  = transitLayer.stopNames.get(alight);
+                        // LOG.info("Retaining {} from {} to {}", routeId, boardStopName, alightStopName);
+                        break;
+                    }
+                }
+                if (retain) {
+                    Collection<Iteration> iterations = patterns.get(patternSequence);
+                    filteredPatterns.putAll(patternSequence, iterations);
+                }
+            }
+            patterns = filteredPatterns;
+        }
+
+        // The rest of this runs independent of whether a SelectedLink filtered down the patterns-iterations map.
         Multimap<RouteSequence, Iteration> routes = HashMultimap.create();
         patterns.forEach(((patternSeq, iteration) -> routes.put(new RouteSequence(patternSeq, transitLayer), iteration)));
         iterationsForPathTemplates[targetIndex] = routes;

diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/SelectedLink.java b/src/main/java/com/conveyal/r5/analyst/cluster/SelectedLink.java
@@ -3,13 +3,27 @@
 import com.conveyal.r5.transit.TransitLayer;
 import com.conveyal.r5.transit.TransportNetworkCache;
 import com.conveyal.r5.transit.TripPattern;
+import com.conveyal.r5.util.TIntIntHashMultimap;
+import com.conveyal.r5.util.TIntIntMultimap;
+import gnu.trove.TIntCollection;
+import gnu.trove.set.TIntSet;
 import org.locationtech.jts.geom.Envelope;
 import org.locationtech.jts.geom.LineString;
+import org.locationtech.jts.geom.Polygon;
 
-import java.util.List;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import static com.conveyal.r5.common.GeometryUtils.envelopeForCircle;
+import static com.conveyal.r5.common.GeometryUtils.polygonForEnvelope;
 
 /**
  * For Selected Link Analysis.
+ * This object caches a collection of every segment of every pattern that passes through a certain polygon.
+ * It also provides methods for quickly checking whether any leg of a public transit trip overlaps these selected segments.
+ *
+ * Implementation considerations follow:
  *
  * Simplifications:
  * Assumes all trips on the same pattern have the same geometry.
@@ -27,49 +41,107 @@
  * Do the workers have access to the GTFS files or not?
  * WorkerComponents has a TransportNetworkCache which is injected into the AnalysisWorker constructor. This is the only
  * path to access a GtfsCache which is private, so we need a method on TransportNetworkCache.
+ * The full path to the GtfsCache is: AnalysisWorker.networkPreloader.transportNetworkCache.gtfsCache.
+ *
+ * The best way to easily get the intended behavior is probably to create a new modification type.
+ * This provides a mechanism for attaching things to a network, at a point where we may still have access to the gtfs.
+ * And ensure that the network with extra information is properly cached for similar requests.
+ * We can't attach it to the raw TransportNetwork, because then the first request for that network would always need
+ * to be one with the selected-link behavior specified. We need to be able to apply it to a network that was loaded
+ * without the selected link. Or we could treat the network as mutable and write to it, which is not clean but would
+ * get the job done.
+ *
+ * Well, TransportNetworkCache#getNetworkForScenario is where we always apply scenarios in the worker, and that class
+ * has direct access to the GtfsCache.
+ *
+ * The SelectedLink instance will need to be stored/referenced:
+ * - Somewhere that is reachable from inside PathResult.setTarget or PathResult.summarizeIterations
+ * - Somewhere that is correctly scoped to where the selected-link filtering is specified (request/task or scenario)
+ * - Somewhere that is writable in the places where we have access to the gtfsCache
+ * - Somewhere that is PERSISTENT across requests - this is inherently the case for TransportNetwork but for Task we'd
+ *   need to introduce another cache. The problem being that the base TransportNetwork's scope is too wide (could be
+ *   used in requests with or without the SelectedLink), so it needs to be a modification on a specific scenario.
+ *
+ * PathResult is constructed with a Task and a TransitLayer. It retains only the TransitLayer but could retain both.
+ * In AnalysisWorker.handleAndSerializeOneSinglePointTask we still have the task context, but deeper on the stack in
+ * networkPreloader and then transportNetworkCache (which has gtfsCache), we have the scenario but not the task. But
+ * then once you go deeper into applying the scenario modifications, the gtfsCache is no longer visible.
+ * Anyway this doesn't feel like a modification. It feels like a parameter to the CSV path output in the task.
+ * The AnalysisWorker could have a Map from SelectionBox to SelectedLink (fuzzy matching keys... ugh... linear scan
+ * maybe).
+ * Also we need to tie items in the TransportNetwork to the GTFS... it feels like this should be on the TransportNetwork
+ * of a scenario.
+ * What if we scan over the incoming modifications and inject the GtfsCache onto a transient field of any
+ * SelectedLinkModification present?
+ * Basically: getting this into the Scenario makes it auto-retained, gives it a stable identity so we don't need to
+ * fuzzy-match it in the task to cache. That could also be done by uploading a geometry file with an ID, but ugh.
+ * In the future it would make sense to treat all lat/lon effectively integers (fixed-point) since it simplifies this
+ * kind of keying and matching.
+ *
+ * Alternatively to all this we could switch on the storage of GTFS route shapes on the network file. Then the
+ * modification could be applied normally without injecting a GtfsCache.
+ *
+ * Additional problem:
+ * The gtfsCache feed IDs (gtfs file names) are bundle-scoped but the ones in the TripPatterns are not.
+ * TransportNetworks and TransitLayers apparently do not retain their bundle ID. In any case they can have multiple
+ * feeds originally uploaded with different bundles.
+ * TransitLayer.feedChecksums keys are the same feed IDs prefixing TripPattern.routeId, which are the gtfsFeed.feedId,
+ * which is not bundle-scoped so can't be used to get a feed from gtfsCache.
+ *
+ * A network is always based on one bundle with the same ID, but the bundle config can also reference GTFS with a
+ * different bundle scope (originally uploaded for another bundle). So knowing the network ID is not sufficient.
+ *
+ * Based on GtfsController.bundleScopedFeedIdFromRequest, the bundleScopedFeedId is feedId_feedGroupId. So they're no
+ * longer based on the bundle/network ID, but the feed group.
+ * It seems like we wouldn't need these scopes at all since all feeds now have unique IDs.
+ *
+ * When we make the TransportNetwork from these bundles, it's always on a worker, based on the bundle's
+ * TransportNetworkConfig JSON file. This is in TransportNetworkCache.buildNetworkFromConfig().
+ * At first it looks like the bundleScopedId is completely lost after we go through the loading process.
+ * But GtfsCache.get(String id) stores that key id in feed.uniqueId. That field is never read (or written) anywhere else.
+ * This means they're available during network creation to be retained in the TransportNetwork... but aren't retained.
+ * I think the only place we can get these bundle scoped feed IDs is from the TransportNetworkConfig JSON file.
+ * Perhaps that should be serialized into the TransportNetwork itself (check risk of serializing used Modifications).
+ * But in the meantime TNCache has a method to load that configuration.
  */
 public class SelectedLink {
 
-    public SelectedLink (TransportNetworkCache transportNetworkCache, SelectionBox box) {
-        for (TripPattern pattern : transit.tripPatterns) {
-            for (LineString hopGeoms : pattern.getHopGeometries(transit)) {
-
-            }
-        }
-    }
+    /**
+     * Contains all TripPattern inter-stop hops that pass through the selected link area for fast hash-based lookup.
+     * Keys are the index of a TripPattern in the TransitLayer, and values are the stop indexes in the TransitLayer.
+     * They are coded this way to match how they're coded in PatternSequence and minimize conversions in tight loops.
+     * A hop from stop A to stop B on pattern X is recorded as the mapping X -> A. Note: This is ambiguous if the stop
+     * appears more than once in the pattern, but PatternSequence does not seem to allow otherwise.
+     */
+    private final TIntIntMultimap hopsInTripPattern;
 
-    public SelectedLink (SelectionBox box, TransitLayer transit) {
-        for (TripPattern pattern : transit.tripPatterns) {
-            for (LineString hopGeoms : pattern.getHopGeometries(transit)) {
+    // FIXME clean up or remove these notes.
+    // Post-process the OneOriginResult to filter paths down to only those passing through the selected links.
+    // The set of routes and stop pairs concerned are precalculated and retained on per regional analysis.
+    // The first thing to do is specify the point of interest on the request. selectedLink: {lat, lon, radiusMeters}
+    // Without precomputing anything ... just do the geometric calculations every time. And memoize the results.
 
-            }
-        }
+    public SelectedLink(TIntIntMultimap hopsInTripPattern) {
+        this.hopsInTripPattern = hopsInTripPattern;
     }
 
     /**
-     * An alternate way of specifying a bounding box where there is a central point of interest and a margin of error
-     * around it. Some points at the corners of the bounding box are farther away than the radius (which is the radius
-     * of a circle inscribed in the bounding box).
+     * For a given transit ride from a boardStop to an alightStop on a TripPattern, return whether that ride
+     * passes through this SelectedLink area.
      */
-    public static class SelectionBox {
-        double lon;
-        double lat;
-        double radiusMeters;
-        public Envelope toEnvelope () {
-            Envelope env = new Envelope();
-            env.expandToInclude(lon, lat);
-            env.expandBy(radiusMeters); // FIXME convert to lon and lat degrees
+    public boolean includes (int tripPattern, int boardStop, int alightStop) {
+        TIntCollection hops = hopsInTripPattern.get(tripPattern);
+        if (hops.isEmpty()) {
+            return false;
         }
-    }
-
-    /**
-     * Uniquely identifies a segment between two subsequent stops on a TripPattern.
-     * This allows us to record in advance which segments pass through the link selection box.
-     */
-    public static class TripPatternSegment {
-        TripPattern tripPattern;
-        int tripPatternIndex; // The integer ID of this tripPattern as a Raptor "route" in R5 routing.
-        int fromStopIndex; // Not the GTFS stop sequence number, the internal R5 index within the pattern.
+        for (int hop : hops.toArray()) {
+            // Hops are identified with the stop index at their beginning so alightStop is exclusive.
+            // (Alighting at a stop does not ride over the hop identified with that stop index.)
+            if (hop >= boardStop && hop < alightStop) {
+                return true;
+            }
+        }
+        return false;
     }
 
 }