diff --git a/packages/turf-clusters-dbscan/index.ts b/packages/turf-clusters-dbscan/index.ts index 753fcd23dc..aec8c0bff9 100644 --- a/packages/turf-clusters-dbscan/index.ts +++ b/packages/turf-clusters-dbscan/index.ts @@ -1,9 +1,8 @@ import { GeoJsonProperties, FeatureCollection, Point } from "geojson"; import clone from "@turf/clone"; import distance from "@turf/distance"; -import { coordAll } from "@turf/meta"; -import { convertLength, Units } from "@turf/helpers"; -import clustering from "density-clustering"; +import { degreesToRadians, lengthToDegrees, Units } from "@turf/helpers"; +import RBush from "rbush"; export type Dbscan = "core" | "edge" | "noise"; export type DbscanProps = GeoJsonProperties & { @@ -11,6 +10,15 @@ export type DbscanProps = GeoJsonProperties & { cluster?: number; }; +// Structure of a point in the spatial index +type IndexedPoint = { + minX: number; + minY: number; + maxX: number; + maxY: number; + index: number; +}; + /** * Takes a set of {@link Point|points} and partition them into clusters according to {@link DBSCAN's|https://en.wikipedia.org/wiki/DBSCAN} data clustering algorithm. * @@ -53,37 +61,122 @@ function clustersDbscan( if (options.mutate !== true) points = clone(points); // Defaults - options.minPoints = options.minPoints || 3; - - // create clustered ids - var dbscan = new clustering.DBSCAN(); - var clusteredIds = dbscan.run( - coordAll(points), - convertLength(maxDistance, options.units), - options.minPoints, - distance + const minPoints = options.minPoints || 3; + + // Calculate the distance in degrees for region queries + const latDistanceInDegrees = lengthToDegrees(maxDistance, options.units); + + // Create a spatial index + var tree = new RBush(points.features.length); + + // Keeps track of whether a point has been visited or not. + var visited = points.features.map((_) => false); + + // Keeps track of whether a point is assigned to a cluster or not. + var assigned = points.features.map((_) => false); + + // Keeps track of whether a point is noise|edge or not. + var isnoise = points.features.map((_) => false); + + // Keeps track of the clusterId for each point + var clusterIds: number[] = points.features.map((_) => -1); + + // Index each point for spatial queries + tree.load( + points.features.map((point, index) => { + var [x, y] = point.geometry.coordinates; + return { + minX: x, + minY: y, + maxX: x, + maxY: y, + index: index, + } as IndexedPoint; + }) ); - // Tag points to Clusters ID - var clusterId = -1; - clusteredIds.forEach(function (clusterIds) { - clusterId++; - // assign cluster ids to input points - clusterIds.forEach(function (idx) { - var clusterPoint = points.features[idx]; - if (!clusterPoint.properties) clusterPoint.properties = {}; - clusterPoint.properties.cluster = clusterId; - clusterPoint.properties.dbscan = "core"; - }); + // Function to find neighbors of a point within a given distance + const regionQuery = (index: number): IndexedPoint[] => { + const point = points.features[index]; + const [x, y] = point.geometry.coordinates; + + const minY = Math.max(y - latDistanceInDegrees, -90.0); + const maxY = Math.min(y + latDistanceInDegrees, 90.0); + + const lonDistanceInDegrees = (function () { + // Handle the case where the bounding box crosses the poles + if (minY < 0 && maxY > 0) { + return latDistanceInDegrees; + } + if (Math.abs(minY) < Math.abs(maxY)) { + return latDistanceInDegrees / Math.cos(degreesToRadians(maxY)); + } else { + return latDistanceInDegrees / Math.cos(degreesToRadians(minY)); + } + })(); + + const minX = Math.max(x - lonDistanceInDegrees, -360.0); + const maxX = Math.min(x + lonDistanceInDegrees, 360.0); + + // Calculate the bounding box for the region query + const bbox = { minX, minY, maxX, maxY }; + return tree.search(bbox).filter((neighbor) => { + const neighborIndex = (neighbor as IndexedPoint).index; + const neighborPoint = points.features[neighborIndex]; + const distanceInKm = distance(point, neighborPoint, { + units: "kilometers", + }); + return distanceInKm <= maxDistance; + }) as IndexedPoint[]; + }; + + // Function to expand a cluster + const expandCluster = (clusteredId: number, neighbors: IndexedPoint[]) => { + for (var i = 0; i < neighbors.length; i++) { + var neighbor = neighbors[i]; + const neighborIndex = neighbor.index; + if (!visited[neighborIndex]) { + visited[neighborIndex] = true; + const nextNeighbors = regionQuery(neighborIndex); + if (nextNeighbors.length >= minPoints) { + neighbors.push(...nextNeighbors); + } + } + if (!assigned[neighborIndex]) { + assigned[neighborIndex] = true; + clusterIds[neighborIndex] = clusteredId; + } + } + }; + + // Main DBSCAN clustering algorithm + var nextClusteredId = 0; + points.features.forEach((_, index) => { + if (visited[index]) return; + const neighbors = regionQuery(index); + if (neighbors.length >= minPoints) { + const clusteredId = nextClusteredId; + nextClusteredId++; + visited[index] = true; + expandCluster(clusteredId, neighbors); + } else { + isnoise[index] = true; + } }); - // handle noise points, if any - // edges points are tagged by DBSCAN as both 'noise' and 'cluster' as they can "reach" less than 'minPoints' number of points - dbscan.noise.forEach(function (noiseId) { - var noisePoint = points.features[noiseId]; - if (!noisePoint.properties) noisePoint.properties = {}; - if (noisePoint.properties.cluster) noisePoint.properties.dbscan = "edge"; - else noisePoint.properties.dbscan = "noise"; + // Assign DBSCAN properties to each point + points.features.forEach((_, index) => { + var clusterPoint = points.features[index]; + if (!clusterPoint.properties) { + clusterPoint.properties = {}; + } + + if (clusterIds[index] >= 0) { + clusterPoint.properties.dbscan = isnoise[index] ? "edge" : "core"; + clusterPoint.properties.cluster = clusterIds[index]; + } else { + clusterPoint.properties.dbscan = "noise"; + } }); return points as FeatureCollection; diff --git a/packages/turf-clusters-dbscan/package.json b/packages/turf-clusters-dbscan/package.json index f97beea79f..dd47b96bb7 100644 --- a/packages/turf-clusters-dbscan/package.json +++ b/packages/turf-clusters-dbscan/package.json @@ -58,7 +58,6 @@ "devDependencies": { "@turf/centroid": "^7.0.0-alpha.2", "@turf/clusters": "^7.0.0-alpha.2", - "@types/density-clustering": "^1.3.3", "@types/tape": "^4.2.32", "benchmark": "^2.1.4", "chromatism": "^3.0.0", @@ -75,7 +74,7 @@ "@turf/distance": "^7.0.0-alpha.2", "@turf/helpers": "^7.0.0-alpha.2", "@turf/meta": "^7.0.0-alpha.2", - "density-clustering": "1.3.0", + "rbush": "^3.0.1", "tslib": "^2.6.2" } }