From 10a6d3b659ca04a14773f3e0443f942cdb8c4d1f Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Mon, 6 Jan 2025 16:20:01 -0500 Subject: [PATCH 01/15] Improve performance of hammer edge bundling --- datashader/bundling.py | 118 ++++++++++++++++++++++++++++++----------- 1 file changed, 86 insertions(+), 32 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 4cfac06ef..754c8d3c9 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -35,18 +35,42 @@ def func(*args, **kwargs): import numpy as np import pandas as pd import param +import numba as nb from .utils import ngjit -@ngjit +@nb.jit( + nb.float32(nb.float32[::1], nb.float32[::1]), + nopython=True, + nogil=True, + fastmath=True, + locals={"result": nb.float32, "diff": nb.float32}, +) def distance_between(a, b): """Find the Euclidean distance between two points.""" - return (((a[0] - b[0]) ** 2) + ((a[1] - b[1]) ** 2))**(0.5) - - -@ngjit -def resample_segment(segments, new_segments, min_segment_length, max_segment_length, ndims): + diff = a[0] - b[0] + result = diff * diff + diff = a[1] - b[1] + result += diff * diff + return result + + + +@nb.jit( + 'f4[:,::1](f4[:,::1],f4[:,::1],f8,f8,f8,i8)', + nopython=True, + nogil=True, + fastmath=True, + locals={ + 'next_point': nb.float32[::1], + 'current_point': nb.float32[::1], + 'pos': nb.uint64, + 'index': nb.uint64, + 'distance': nb.float32 + } +) +def resample_segment(segments, new_segments, min_segment_length, max_segment_length, squared_mean_segment_length, ndims): next_point = np.zeros(ndims, dtype=segments.dtype) current_point = segments[0] pos = 0 @@ -62,7 +86,7 @@ def resample_segment(segments, new_segments, min_segment_length, max_segment_len index += 2 elif distance > max_segment_length: # If points are too far away from each other, linearly place new points - points = int(ceil(distance / ((max_segment_length + min_segment_length) / 2))) + points = int(ceil(np.sqrt(distance / squared_mean_segment_length))) for i in range(points): new_segments[pos] = current_point + (i * ((next_point - current_point) / points)) pos += 1 @@ -78,8 +102,20 @@ def resample_segment(segments, new_segments, min_segment_length, max_segment_len return new_segments -@ngjit -def calculate_length(segments, min_segment_length, max_segment_length): +@nb.jit( + nb.types.Tuple((nb.boolean, nb.uint64))(nb.float32[:,::1], nb.float64, nb.float64, nb.float64), + nopython=True, + nogil=True, + fastmath=True, + locals={ + 'next_point': nb.float32[::1], + 'current_point': nb.float32[::1], + 'pos': nb.uint64, + 'index': nb.uint64, + 'distance': nb.float32 + } +) +def calculate_length(segments, squared_min_segment_length, squared_max_segment_length, squared_mean_segment_length): current_point = segments[0] index = 1 total = 0 @@ -87,15 +123,15 @@ def calculate_length(segments, min_segment_length, max_segment_length): while index < len(segments): next_point = segments[index] distance = distance_between(current_point, next_point) - if (distance < min_segment_length and 1 < index < (len(segments) - 2)): + if (distance < squared_min_segment_length and 1 < index < (len(segments) - 2)): any_change = True current_point = (current_point + next_point) / 2 total += 1 index += 2 - elif distance > max_segment_length: + elif distance > squared_max_segment_length: any_change = True # Linear subsample - points = int(ceil(distance / ((max_segment_length + min_segment_length) / 2))) + points = int(ceil(np.sqrt(distance / squared_mean_segment_length))) total += points current_point = next_point index += 1 @@ -108,20 +144,26 @@ def calculate_length(segments, min_segment_length, max_segment_length): return any_change, total -def resample_edge(segments, min_segment_length, max_segment_length, ndims): - change, total_resamples = calculate_length(segments, min_segment_length, max_segment_length) +def resample_edge(segments, squared_min_segment_length, squared_max_segment_length, + squared_mean_segment_length, ndims): + change, total_resamples = calculate_length(segments, squared_min_segment_length, + squared_max_segment_length, squared_mean_segment_length) if not change: return segments - resampled = np.empty((total_resamples, ndims)) - resample_segment(segments, resampled, min_segment_length, max_segment_length, ndims) + resampled = np.empty((total_resamples, ndims), dtype=np.float32) + resample_segment(segments, resampled, squared_min_segment_length, + squared_max_segment_length, squared_mean_segment_length, ndims) return resampled -@delayed def resample_edges(edge_segments, min_segment_length, max_segment_length, ndims): + squared_min_segment_length = min_segment_length**2 + squared_max_segment_length = max_segment_length**2 + squared_mean_segment_length = ((min_segment_length + max_segment_length) / 2)**2 replaced_edges = [] for segments in edge_segments: - replaced_edges.append(resample_edge(segments, min_segment_length, max_segment_length, + replaced_edges.append(resample_edge(segments, squared_min_segment_length, + squared_max_segment_length, squared_mean_segment_length, ndims)) return replaced_edges @@ -161,7 +203,6 @@ def advect_and_resample(vert, horiz, segments, iterations, accuracy, min_segment return segments -@delayed def advect_resample_all(gradients, edge_segments, iterations, accuracy, min_segment_length, max_segment_length, segment_class): vert, horiz = gradients @@ -176,7 +217,6 @@ def batches(seq, n): yield seq[i:i + n] -@delayed def draw_to_surface(edge_segments, bandwidth, accuracy, accumulator): img = np.zeros((accuracy + 1, accuracy + 1)) for segments in edge_segments: @@ -185,7 +225,6 @@ def draw_to_surface(edge_segments, bandwidth, accuracy, accumulator): return gaussian(img, sigma=bandwidth / 2) -@delayed def get_gradients(img): img /= np.max(img) @@ -219,7 +258,7 @@ def get_merged_columns(params): @staticmethod @ngjit def create_segment(edge): - return np.array([[edge[0], edge[1], edge[2]], [edge[0], edge[3], edge[4]]]) + return np.array([[edge[0], edge[1], edge[2]], [edge[0], edge[3], edge[4]]], dtype=np.float32) @staticmethod @ngjit @@ -242,7 +281,7 @@ def get_merged_columns(params): @staticmethod @ngjit def create_segment(edge): - return np.array([[edge[0], edge[1]], [edge[2], edge[3]]]) + return np.array([[edge[0], edge[1]], [edge[2], edge[3]]], dtype=np.float32) @staticmethod @ngjit @@ -266,7 +305,7 @@ def get_merged_columns(params): @ngjit def create_segment(edge): return np.array([[edge[0], edge[1], edge[2], edge[5]], - [edge[0], edge[3], edge[4], edge[5]]]) + [edge[0], edge[3], edge[4], edge[5]]], dtype=np.float32) @staticmethod @ngjit @@ -289,7 +328,7 @@ def get_merged_columns(params): @staticmethod @ngjit def create_segment(edge): - return np.array([[edge[0], edge[1], edge[4]], [edge[2], edge[3], edge[4]]]) + return np.array([[edge[0], edge[1], edge[4]], [edge[2], edge[3], edge[4]]], dtype=np.float32) @staticmethod @ngjit @@ -464,6 +503,9 @@ class hammer_bundle(connect_edges): weight = param.String(default='weight', allow_None=True, doc=""" Column name for each edge weight. If None, weights are ignored.""") + use_dask = param.Boolean(default=False, doc=""" + Whether to use dask to parallelize the computation.""") + def __call__(self, nodes, edges, **params): if dask is None or skimage is None: raise ImportError("hammer_bundle operation requires dask and scikit-image. " @@ -472,6 +514,17 @@ def __call__(self, nodes, edges, **params): p = param.ParamOverrides(self, params) + if p.use_dask: + resample_edges_fn = delayed(resample_edges) + draw_to_surface_fn = delayed(draw_to_surface) + get_gradients_fn = delayed(get_gradients) + advect_resample_all_fn = delayed(advect_resample_all) + else: + result_edges_fn = resample_edges + draw_to_surface_fn = draw_to_surface + get_gradients_fn = get_gradients + advect_resample_all_fn = advect_resample_all + # Calculate min/max for coordinates xmin, xmax = np.min(nodes[p.x]), np.max(nodes[p.x]) ymin, ymax = np.min(nodes[p.y]), np.max(nodes[p.y]) @@ -489,7 +542,7 @@ def __call__(self, nodes, edges, **params): # This gets the edges split into lots of small segments # Doing this inside a delayed function lowers the transmission overhead - edge_segments = [resample_edges(batch, p.min_segment_length, p.max_segment_length, + edge_segments = [resample_edges_fn(batch, p.min_segment_length, p.max_segment_length, segment_class.ndims) for batch in edge_batches] for i in range(p.iterations): @@ -501,25 +554,26 @@ def __call__(self, nodes, edges, **params): break # Draw the density maps and combine them - images = [draw_to_surface(segment, bandwidth, p.accuracy, segment_class.accumulate) + images = [draw_to_surface_fn(segment, bandwidth, p.accuracy, segment_class.accumulate) for segment in edge_segments] overall_image = sum(images) - gradients = get_gradients(overall_image) + gradients = get_gradients_fn(overall_image) # Move edges along the gradients and resample when necessary # This could include smoothing to adjust the amount a graph can change - edge_segments = [advect_resample_all(gradients, segment, p.advect_iterations, + edge_segments = [advect_resample_all_fn(gradients, segment, p.advect_iterations, p.accuracy, p.min_segment_length, p.max_segment_length, segment_class) for segment in edge_segments] # Do a final resample to a smaller size for nicer rendering - edge_segments = [resample_edges(segment, p.min_segment_length, p.max_segment_length, + edge_segments = [resample_edges_fn(segment, p.min_segment_length, p.max_segment_length, segment_class.ndims) for segment in edge_segments] - # Finally things can be sent for computation - edge_segments = compute(*edge_segments) + if p.use_dask: + # Finally things can be sent for computation + edge_segments = compute(edge_segments) # Smooth out the graph for i in range(10): From 49ea9e70646d01c9cd872def355ba40adca09005 Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Mon, 6 Jan 2025 16:23:49 -0500 Subject: [PATCH 02/15] Typo on naming function var --- datashader/bundling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 754c8d3c9..3330f40e4 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -520,7 +520,7 @@ def __call__(self, nodes, edges, **params): get_gradients_fn = delayed(get_gradients) advect_resample_all_fn = delayed(advect_resample_all) else: - result_edges_fn = resample_edges + resample_edges_fn = resample_edges draw_to_surface_fn = draw_to_surface get_gradients_fn = get_gradients advect_resample_all_fn = advect_resample_all From 3478ebb7c3214d2212f02fdbc8fbbe4bb111e8c7 Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Mon, 6 Jan 2025 20:09:41 -0500 Subject: [PATCH 03/15] Jit more things more carefully. --- datashader/bundling.py | 57 +++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 3330f40e4..6c52f0c19 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -64,7 +64,8 @@ def distance_between(a, b): fastmath=True, locals={ 'next_point': nb.float32[::1], - 'current_point': nb.float32[::1], + 'current_point': nb.float32[::1], + 'i': nb.uint16, 'pos': nb.uint64, 'index': nb.uint64, 'distance': nb.float32 @@ -86,7 +87,7 @@ def resample_segment(segments, new_segments, min_segment_length, max_segment_len index += 2 elif distance > max_segment_length: # If points are too far away from each other, linearly place new points - points = int(ceil(np.sqrt(distance / squared_mean_segment_length))) + points = np.uint16(ceil(np.sqrt(distance / squared_mean_segment_length))) for i in range(points): new_segments[pos] = current_point + (i * ((next_point - current_point) / points)) pos += 1 @@ -131,7 +132,7 @@ def calculate_length(segments, squared_min_segment_length, squared_max_segment_l elif distance > squared_max_segment_length: any_change = True # Linear subsample - points = int(ceil(np.sqrt(distance / squared_mean_segment_length))) + points = np.uint16(ceil(np.sqrt(distance / squared_mean_segment_length))) total += points current_point = next_point index += 1 @@ -143,7 +144,11 @@ def calculate_length(segments, squared_min_segment_length, squared_max_segment_l total += 1 return any_change, total - +@nb.jit( + 'f4[:,::1](f4[:,::1],f8,f8,f8,i8)', + nopython=True, + nogil=True, +) def resample_edge(segments, squared_min_segment_length, squared_max_segment_length, squared_mean_segment_length, ndims): change, total_resamples = calculate_length(segments, squared_min_segment_length, @@ -182,32 +187,48 @@ def smooth(edge_segments, tension, idx, idy): smooth_segment(segments, tension, idx, idy) -@ngjit +@nb.jit( + 'void(f4[:,::1],f8[:,::1],f8[:,::1],f8,i8,i8)', + nopython=True, + nogil=True, + fastmath=True, + locals={"x": nb.uint16, "y": nb.uint16} +) def advect_segments(segments, vert, horiz, accuracy, idx, idy): for i in range(1, len(segments) - 1): - x = int(segments[i][idx] * accuracy) - y = int(segments[i][idy] * accuracy) - segments[i][idx] = segments[i][idx] + horiz[x, y] / accuracy - segments[i][idy] = segments[i][idy] + vert[x, y] / accuracy - segments[i][idx] = max(0, min(segments[i][idx], 1)) - segments[i][idy] = max(0, min(segments[i][idy], 1)) + x = np.uint16(segments[i, idx] * accuracy) + y = np.uint16(segments[i, idy] * accuracy) + segments[i, idx] = segments[i, idx] + horiz[x, y] / accuracy + segments[i, idy] = segments[i, idy] + vert[x, y] / accuracy + segments[i, idx] = max(0, min(segments[i, idx], 1)) + segments[i, idy] = max(0, min(segments[i, idy], 1)) -def advect_and_resample(vert, horiz, segments, iterations, accuracy, min_segment_length, - max_segment_length, segment_class): +@nb.jit( + 'f4[:,::1](f8[:,::1],f8[:,::1],f4[:,::1],i8,f8,f8,f8,f8,i8,i8,i8)', + nopython=True, + nogil=True, + locals={'it': nb.uint8} +) +def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_min_segment_length, + squared_max_segment_length, squared_mean_segment_length, idx, idy, ndims): for it in range(iterations): - advect_segments(segments, vert, horiz, accuracy, segment_class.idx, segment_class.idy) + advect_segments(segments, vert, horiz, accuracy, idx, idy) if it % 2 == 0: - segments = resample_edge(segments, min_segment_length, max_segment_length, - segment_class.ndims) + segments = resample_edge(segments, squared_min_segment_length, squared_max_segment_length, + squared_mean_segment_length, ndims) return segments def advect_resample_all(gradients, edge_segments, iterations, accuracy, min_segment_length, max_segment_length, segment_class): vert, horiz = gradients - return [advect_and_resample(vert, horiz, edges, iterations, accuracy, min_segment_length, - max_segment_length, segment_class) + squared_min_segment_length = min_segment_length**2 + squared_max_segment_length = max_segment_length**2 + squared_mean_segment_length = ((min_segment_length + max_segment_length) / 2)**2 + return [advect_and_resample(vert, horiz, edges, iterations, accuracy, squared_min_segment_length, + squared_max_segment_length, squared_mean_segment_length, + segment_class.idx, segment_class.idy, segment_class.ndims) for edges in edge_segments] From f11ec9dafa3b82dfb8fe8c74fa59e15d5876e7cd Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Mon, 6 Jan 2025 21:50:54 -0500 Subject: [PATCH 04/15] exploring numba parallelism --- datashader/bundling.py | 67 ++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 6c52f0c19..7c6679f06 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -58,13 +58,14 @@ def distance_between(a, b): @nb.jit( - 'f4[:,::1](f4[:,::1],f4[:,::1],f8,f8,f8,i8)', + 'f4[:,::1](f4[:,::1],f4[:,::1],f8,f8,f8,u1)', nopython=True, nogil=True, fastmath=True, locals={ 'next_point': nb.float32[::1], 'current_point': nb.float32[::1], + 'step_vector': nb.float32[::1], 'i': nb.uint16, 'pos': nb.uint64, 'index': nb.uint64, @@ -88,8 +89,9 @@ def resample_segment(segments, new_segments, min_segment_length, max_segment_len elif distance > max_segment_length: # If points are too far away from each other, linearly place new points points = np.uint16(ceil(np.sqrt(distance / squared_mean_segment_length))) + step_vector = (next_point - current_point) / points for i in range(points): - new_segments[pos] = current_point + (i * ((next_point - current_point) / points)) + new_segments[pos] = current_point + (i * step_vector) pos += 1 current_point = next_point index += 1 @@ -145,7 +147,7 @@ def calculate_length(segments, squared_min_segment_length, squared_max_segment_l return any_change, total @nb.jit( - 'f4[:,::1](f4[:,::1],f8,f8,f8,i8)', + 'f4[:,::1](f4[:,::1],f8,f8,f8,u1)', nopython=True, nogil=True, ) @@ -160,17 +162,31 @@ def resample_edge(segments, squared_min_segment_length, squared_max_segment_leng squared_max_segment_length, squared_mean_segment_length, ndims) return resampled - +@nb.jit( + nopython=True, + nogil=True, + parallel=True, +) def resample_edges(edge_segments, min_segment_length, max_segment_length, ndims): squared_min_segment_length = min_segment_length**2 squared_max_segment_length = max_segment_length**2 squared_mean_segment_length = ((min_segment_length + max_segment_length) / 2)**2 - replaced_edges = [] - for segments in edge_segments: - replaced_edges.append(resample_edge(segments, squared_min_segment_length, - squared_max_segment_length, squared_mean_segment_length, - ndims)) - return replaced_edges + # replaced_edges = [] + # for segments in edge_segments: + # replaced_edges.append(resample_edge(segments, squared_min_segment_length, + # squared_max_segment_length, squared_mean_segment_length, + # ndims)) + # return replaced_edges + n_results = len(edge_segments) + result = [np.empty((1,1), dtype=np.float32) for i in range(n_results)] + for i in nb.prange(n_results): + result[i] = resample_edge(edge_segments[i], squared_min_segment_length, + squared_max_segment_length, squared_mean_segment_length, + ndims) + return result + # return [resample_edge(segments, squared_min_segment_length, squared_max_segment_length, + # squared_mean_segment_length, ndims) + # for segments in edge_segments] @ngjit @@ -188,7 +204,7 @@ def smooth(edge_segments, tension, idx, idy): @nb.jit( - 'void(f4[:,::1],f8[:,::1],f8[:,::1],f8,i8,i8)', + 'void(f4[:,::1],f8[:,::1],f8[:,::1],f8,u1,u1)', nopython=True, nogil=True, fastmath=True, @@ -205,7 +221,7 @@ def advect_segments(segments, vert, horiz, accuracy, idx, idy): @nb.jit( - 'f4[:,::1](f8[:,::1],f8[:,::1],f4[:,::1],i8,f8,f8,f8,f8,i8,i8,i8)', + 'f4[:,::1](f8[:,::1],f8[:,::1],f4[:,::1],i8,f8,f8,f8,f8,u1,u1,u1)', nopython=True, nogil=True, locals={'it': nb.uint8} @@ -219,17 +235,28 @@ def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_min squared_mean_segment_length, ndims) return segments - +@nb.jit( + nopython=True, + nogil=True, + parallel=True, +) def advect_resample_all(gradients, edge_segments, iterations, accuracy, min_segment_length, - max_segment_length, segment_class): + max_segment_length, idx, idy, ndims): vert, horiz = gradients squared_min_segment_length = min_segment_length**2 squared_max_segment_length = max_segment_length**2 squared_mean_segment_length = ((min_segment_length + max_segment_length) / 2)**2 - return [advect_and_resample(vert, horiz, edges, iterations, accuracy, squared_min_segment_length, - squared_max_segment_length, squared_mean_segment_length, - segment_class.idx, segment_class.idy, segment_class.ndims) - for edges in edge_segments] + n_results = len(edge_segments) + result = [np.empty((1,1), dtype=np.float32) for i in range(n_results)] + for i in nb.prange(n_results): + result[i] = advect_and_resample(vert, horiz, edge_segments[i], iterations, accuracy, squared_min_segment_length, + squared_max_segment_length, squared_mean_segment_length, + idx, idy, ndims) + return result + # return [advect_and_resample(vert, horiz, edges, iterations, accuracy, squared_min_segment_length, + # squared_max_segment_length, squared_mean_segment_length, + # idx, idy, ndims) + # for edges in edge_segments] def batches(seq, n): @@ -256,6 +283,7 @@ def get_gradients(img): vert /= magnitude horiz /= magnitude return (vert, horiz) + # return (vert.astype(np.float32), horiz.astype(np.float32)) class BaseSegment: @@ -585,7 +613,8 @@ def __call__(self, nodes, edges, **params): # This could include smoothing to adjust the amount a graph can change edge_segments = [advect_resample_all_fn(gradients, segment, p.advect_iterations, p.accuracy, p.min_segment_length, - p.max_segment_length, segment_class) + p.max_segment_length, segment_class.idx, + segment_class.idy, segment_class.ndims) for segment in edge_segments] # Do a final resample to a smaller size for nicer rendering From d0428a24651d0c74b79894327dd41f97779c371f Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Mon, 6 Jan 2025 22:59:33 -0500 Subject: [PATCH 05/15] Don't do double work on resampling; refactor resampling functions to use named tuples for segment lengths and simplify parameters --- datashader/bundling.py | 119 ++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 78 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 7c6679f06..96b08981b 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -16,6 +16,7 @@ from math import ceil from pandas import DataFrame +from collections import namedtuple try: import dask @@ -39,6 +40,9 @@ def func(*args, **kwargs): from .utils import ngjit +SegmentLength = namedtuple('SegmentLength', ['min', 'max', 'mean']) +segment_length_type = nb.types.NamedUniTuple(nb.float64, 3, SegmentLength) + @nb.jit( nb.float32(nb.float32[::1], nb.float32[::1]), @@ -55,10 +59,8 @@ def distance_between(a, b): result += diff * diff return result - - @nb.jit( - 'f4[:,::1](f4[:,::1],f4[:,::1],f8,f8,f8,u1)', + nb.float32[:,::1](nb.float32[:,::1], nb.float32[:,::1], nb.uint16[::1], nb.int64), nopython=True, nogil=True, fastmath=True, @@ -72,23 +74,22 @@ def distance_between(a, b): 'distance': nb.float32 } ) -def resample_segment(segments, new_segments, min_segment_length, max_segment_length, squared_mean_segment_length, ndims): +def resample_segment(segments, new_segments, n_points_to_add, ndims): next_point = np.zeros(ndims, dtype=segments.dtype) current_point = segments[0] pos = 0 index = 1 while index < len(segments): next_point = segments[index] - distance = distance_between(current_point, next_point) - if (distance < min_segment_length and 1 < index < (len(segments) - 2)): + if (n_points_to_add[index] == 0 and 1 < index < (len(segments) - 2)): # Merge points, because they're too close to each other current_point = (current_point + next_point) / 2 new_segments[pos] = current_point pos += 1 index += 2 - elif distance > max_segment_length: + elif n_points_to_add[index] > 1: # If points are too far away from each other, linearly place new points - points = np.uint16(ceil(np.sqrt(distance / squared_mean_segment_length))) + points = n_points_to_add[index] step_vector = (next_point - current_point) / points for i in range(points): new_segments[pos] = current_point + (i * step_vector) @@ -104,9 +105,8 @@ def resample_segment(segments, new_segments, min_segment_length, max_segment_len new_segments[pos] = next_point return new_segments - @nb.jit( - nb.types.Tuple((nb.boolean, nb.uint64))(nb.float32[:,::1], nb.float64, nb.float64, nb.float64), + nb.types.Tuple((nb.boolean, nb.uint64, nb.uint16[::1]))(nb.float32[:,::1], segment_length_type), nopython=True, nogil=True, fastmath=True, @@ -118,75 +118,56 @@ def resample_segment(segments, new_segments, min_segment_length, max_segment_len 'distance': nb.float32 } ) -def calculate_length(segments, squared_min_segment_length, squared_max_segment_length, squared_mean_segment_length): +def calculate_resampling(segments, squared_segment_length): current_point = segments[0] index = 1 total = 0 any_change = False + n_points_to_add = np.zeros(len(segments), dtype=np.uint16) while index < len(segments): next_point = segments[index] distance = distance_between(current_point, next_point) - if (distance < squared_min_segment_length and 1 < index < (len(segments) - 2)): + if (distance < squared_segment_length.min and 1 < index < (len(segments) - 2)): + # Merge points any_change = True current_point = (current_point + next_point) / 2 + n_points_to_add[index] = 0 total += 1 index += 2 - elif distance > squared_max_segment_length: + elif distance > squared_segment_length.max: any_change = True # Linear subsample - points = np.uint16(ceil(np.sqrt(distance / squared_mean_segment_length))) + points = np.uint16(ceil(np.sqrt(distance / squared_segment_length.mean))) + n_points_to_add[index] = points total += points current_point = next_point index += 1 else: # Do nothing + n_points_to_add[index] = 1 total += 1 current_point = next_point index += 1 total += 1 - return any_change, total + return any_change, total, n_points_to_add @nb.jit( - 'f4[:,::1](f4[:,::1],f8,f8,f8,u1)', + nb.float32[:,::1](nb.float32[:,::1], segment_length_type, nb.int64), nopython=True, nogil=True, ) -def resample_edge(segments, squared_min_segment_length, squared_max_segment_length, - squared_mean_segment_length, ndims): - change, total_resamples = calculate_length(segments, squared_min_segment_length, - squared_max_segment_length, squared_mean_segment_length) +def resample_edge(segments, squared_segment_length, ndims): + change, total_resamples, n_points_to_add = calculate_resampling(segments, squared_segment_length) if not change: return segments resampled = np.empty((total_resamples, ndims), dtype=np.float32) - resample_segment(segments, resampled, squared_min_segment_length, - squared_max_segment_length, squared_mean_segment_length, ndims) + resample_segment(segments, resampled, n_points_to_add, ndims) return resampled -@nb.jit( - nopython=True, - nogil=True, - parallel=True, -) -def resample_edges(edge_segments, min_segment_length, max_segment_length, ndims): - squared_min_segment_length = min_segment_length**2 - squared_max_segment_length = max_segment_length**2 - squared_mean_segment_length = ((min_segment_length + max_segment_length) / 2)**2 - # replaced_edges = [] - # for segments in edge_segments: - # replaced_edges.append(resample_edge(segments, squared_min_segment_length, - # squared_max_segment_length, squared_mean_segment_length, - # ndims)) - # return replaced_edges - n_results = len(edge_segments) - result = [np.empty((1,1), dtype=np.float32) for i in range(n_results)] - for i in nb.prange(n_results): - result[i] = resample_edge(edge_segments[i], squared_min_segment_length, - squared_max_segment_length, squared_mean_segment_length, - ndims) - return result - # return [resample_edge(segments, squared_min_segment_length, squared_max_segment_length, - # squared_mean_segment_length, ndims) - # for segments in edge_segments] + +def resample_edges(edge_segments, squared_segment_length, ndims): + return [resample_edge(segments, squared_segment_length, ndims) + for segments in edge_segments] @ngjit @@ -221,42 +202,23 @@ def advect_segments(segments, vert, horiz, accuracy, idx, idy): @nb.jit( - 'f4[:,::1](f8[:,::1],f8[:,::1],f4[:,::1],i8,f8,f8,f8,f8,u1,u1,u1)', + nb.float32[:,::1](nb.float64[:,::1], nb.float64[:,::1], nb.float32[:,::1], nb.int64, nb.float64, segment_length_type, nb.int64, nb.int64, nb.int64), nopython=True, nogil=True, locals={'it': nb.uint8} ) -def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_min_segment_length, - squared_max_segment_length, squared_mean_segment_length, idx, idy, ndims): +def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_segment_length, idx, idy, ndims): for it in range(iterations): advect_segments(segments, vert, horiz, accuracy, idx, idy) if it % 2 == 0: - segments = resample_edge(segments, squared_min_segment_length, squared_max_segment_length, - squared_mean_segment_length, ndims) + segments = resample_edge(segments, squared_segment_length, ndims) return segments -@nb.jit( - nopython=True, - nogil=True, - parallel=True, -) -def advect_resample_all(gradients, edge_segments, iterations, accuracy, min_segment_length, - max_segment_length, idx, idy, ndims): +def advect_resample_all(gradients, edge_segments, iterations, accuracy, squared_segment_length, idx, idy, ndims): vert, horiz = gradients - squared_min_segment_length = min_segment_length**2 - squared_max_segment_length = max_segment_length**2 - squared_mean_segment_length = ((min_segment_length + max_segment_length) / 2)**2 - n_results = len(edge_segments) - result = [np.empty((1,1), dtype=np.float32) for i in range(n_results)] - for i in nb.prange(n_results): - result[i] = advect_and_resample(vert, horiz, edge_segments[i], iterations, accuracy, squared_min_segment_length, - squared_max_segment_length, squared_mean_segment_length, - idx, idy, ndims) - return result - # return [advect_and_resample(vert, horiz, edges, iterations, accuracy, squared_min_segment_length, - # squared_max_segment_length, squared_mean_segment_length, - # idx, idy, ndims) - # for edges in edge_segments] + return [advect_and_resample(vert, horiz, edges, iterations, accuracy, squared_segment_length, + idx, idy, ndims) + for edges in edge_segments] def batches(seq, n): @@ -283,7 +245,6 @@ def get_gradients(img): vert /= magnitude horiz /= magnitude return (vert, horiz) - # return (vert.astype(np.float32), horiz.astype(np.float32)) class BaseSegment: @@ -589,9 +550,12 @@ def __call__(self, nodes, edges, **params): # This is simply to let the work split out over multiple cores edge_batches = list(batches(edges, p.batch_size)) + squared_segment_length = SegmentLength(p.min_segment_length**2, p.max_segment_length**2, + ((p.min_segment_length + p.max_segment_length) / 2)**2) + # This gets the edges split into lots of small segments # Doing this inside a delayed function lowers the transmission overhead - edge_segments = [resample_edges_fn(batch, p.min_segment_length, p.max_segment_length, + edge_segments = [resample_edges_fn(batch, squared_segment_length, segment_class.ndims) for batch in edge_batches] for i in range(p.iterations): @@ -612,13 +576,12 @@ def __call__(self, nodes, edges, **params): # Move edges along the gradients and resample when necessary # This could include smoothing to adjust the amount a graph can change edge_segments = [advect_resample_all_fn(gradients, segment, p.advect_iterations, - p.accuracy, p.min_segment_length, - p.max_segment_length, segment_class.idx, + p.accuracy, squared_segment_length, segment_class.idx, segment_class.idy, segment_class.ndims) for segment in edge_segments] # Do a final resample to a smaller size for nicer rendering - edge_segments = [resample_edges_fn(segment, p.min_segment_length, p.max_segment_length, + edge_segments = [resample_edges_fn(segment, squared_segment_length, segment_class.ndims) for segment in edge_segments] if p.use_dask: From c38505aa74911fccb5ca8fa4fe076d3d01e0b214 Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Mon, 6 Jan 2025 23:08:52 -0500 Subject: [PATCH 06/15] float accuracy for img --- datashader/bundling.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 96b08981b..639de609d 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -185,7 +185,7 @@ def smooth(edge_segments, tension, idx, idy): @nb.jit( - 'void(f4[:,::1],f8[:,::1],f8[:,::1],f8,u1,u1)', + nb.void(nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.float64, nb.int64, nb.int64), nopython=True, nogil=True, fastmath=True, @@ -195,14 +195,14 @@ def advect_segments(segments, vert, horiz, accuracy, idx, idy): for i in range(1, len(segments) - 1): x = np.uint16(segments[i, idx] * accuracy) y = np.uint16(segments[i, idy] * accuracy) - segments[i, idx] = segments[i, idx] + horiz[x, y] / accuracy - segments[i, idy] = segments[i, idy] + vert[x, y] / accuracy + segments[i, idx] += horiz[x, y] / accuracy + segments[i, idy] += vert[x, y] / accuracy segments[i, idx] = max(0, min(segments[i, idx], 1)) segments[i, idy] = max(0, min(segments[i, idy], 1)) @nb.jit( - nb.float32[:,::1](nb.float64[:,::1], nb.float64[:,::1], nb.float32[:,::1], nb.int64, nb.float64, segment_length_type, nb.int64, nb.int64, nb.int64), + nb.float32[:,::1](nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.int64, nb.float64, segment_length_type, nb.int64, nb.int64, nb.int64), nopython=True, nogil=True, locals={'it': nb.uint8} @@ -228,7 +228,7 @@ def batches(seq, n): def draw_to_surface(edge_segments, bandwidth, accuracy, accumulator): - img = np.zeros((accuracy + 1, accuracy + 1)) + img = np.zeros((accuracy + 1, accuracy + 1), dtype=np.float32) for segments in edge_segments: for point in segments: accumulator(img, point, accuracy) From 0239eeda9c1574c3188773a918eb9a53817ab24b Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Tue, 7 Jan 2025 01:15:20 -0500 Subject: [PATCH 07/15] Refactor accumulation methods to handle multiple points and optimize smooth segment processing --- datashader/bundling.py | 85 ++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 33 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 639de609d..a9af34e7c 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -37,6 +37,7 @@ def func(*args, **kwargs): import pandas as pd import param import numba as nb +import itertools from .utils import ngjit @@ -170,20 +171,32 @@ def resample_edges(edge_segments, squared_segment_length, ndims): for segments in edge_segments] -@ngjit +@nb.jit( + nb.void(nb.float32[:,::1], nb.float32, nb.int64, nb.int64), + nopython=True, + nogil=True, + fastmath=True, + locals={ + "i": nb.uint16, + "segments": nb.float32[:,::1], + "previous": nb.float32[::1], + "current": nb.float32[::1], + "next_point": nb.float32[::1] + } +) def smooth_segment(segments, tension, idx, idy): - seg_length = len(segments) - 2 - for i in range(1, seg_length): - previous, current, next_point = segments[i - 1], segments[i], segments[i + 1] - current[idx] = ((1-tension)*current[idx]) + (tension*(previous[idx] + next_point[idx]) / 2) - current[idy] = ((1-tension)*current[idy]) + (tension*(previous[idy] + next_point[idy]) / 2) - + for _ in range(10): + seg_length = len(segments) - 2 + for i in range(1, seg_length): + previous, current, next_point = segments[i - 1], segments[i], segments[i + 1] + current[idx] = ((1-tension)*current[idx]) + (tension*(previous[idx] + next_point[idx]) / 2) + current[idy] = ((1-tension)*current[idy]) + (tension*(previous[idy] + next_point[idy]) / 2) + def smooth(edge_segments, tension, idx, idy): for segments in edge_segments: smooth_segment(segments, tension, idx, idy) - @nb.jit( nb.void(nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.float64, nb.int64, nb.int64), nopython=True, @@ -226,14 +239,24 @@ def batches(seq, n): for i in range(0, len(seq), n): yield seq[i:i + n] - def draw_to_surface(edge_segments, bandwidth, accuracy, accumulator): img = np.zeros((accuracy + 1, accuracy + 1), dtype=np.float32) for segments in edge_segments: - for point in segments: - accumulator(img, point, accuracy) + accumulator(img, segments, accuracy) return gaussian(img, sigma=bandwidth / 2) +@nb.jit( + nb.void(nb.float32[:,::1], nb.float32[:,::1]), + nopython=True, + nogil=True, + fastmath=True, +) +def normalize_gradients(vert, horiz): + for i in range(vert.shape[0]): + for j in range(vert.shape[1]): + magnitude = np.sqrt(horiz[i, j]**2 + vert[i, j]**2) + 1e-5 + vert[i, j] /= magnitude + horiz[i, j] /= magnitude def get_gradients(img): img /= np.max(img) @@ -241,9 +264,7 @@ def get_gradients(img): horiz = sobel_h(img) vert = sobel_v(img) - magnitude = np.sqrt(horiz**2 + vert**2) + 1e-5 - vert /= magnitude - horiz /= magnitude + normalize_gradients(vert, horiz) return (vert, horiz) @@ -272,8 +293,9 @@ def create_segment(edge): @staticmethod @ngjit - def accumulate(img, point, accuracy): - img[int(point[1] * accuracy), int(point[2] * accuracy)] += 1 + def accumulate(img, points, accuracy): + for point in points: + img[int(point[1] * accuracy), int(point[2] * accuracy)] += 1 class EdgelessUnweightedSegment(BaseSegment): @@ -295,8 +317,9 @@ def create_segment(edge): @staticmethod @ngjit - def accumulate(img, point, accuracy): - img[int(point[0] * accuracy), int(point[1] * accuracy)] += 1 + def accumulate(img, points, accuracy): + for point in points: + img[int(point[0] * accuracy), int(point[1] * accuracy)] += 1 class WeightedSegment(BaseSegment): @@ -319,8 +342,9 @@ def create_segment(edge): @staticmethod @ngjit - def accumulate(img, point, accuracy): - img[int(point[1] * accuracy), int(point[2] * accuracy)] += point[3] + def accumulate(img, points, accuracy): + for point in points: + img[int(point[1] * accuracy), int(point[2] * accuracy)] += point[3] class EdgelessWeightedSegment(BaseSegment): @@ -342,8 +366,9 @@ def create_segment(edge): @staticmethod @ngjit - def accumulate(img, point, accuracy): - img[int(point[0] * accuracy), int(point[1] * accuracy)] += point[2] + def accumulate(img, points, accuracy): + for point in points: + img[int(point[0] * accuracy), int(point[1] * accuracy)] += point[2] def _convert_graph_to_edge_segments(nodes, edges, params): @@ -411,12 +436,9 @@ def _convert_edge_segments_to_dataframe(edge_segments, segment_class, params): """ # Need to put an array of NaNs with size point_dims between edges - def edge_iterator(): - for edge in edge_segments: - yield edge - yield segment_class.create_delimiter() - - df = DataFrame(np.concatenate(list(edge_iterator()))) + delimiters = np.full((len(edge_segments), 1, segment_class.ndims), np.nan) + combined = list(itertools.chain(*zip(edge_segments, delimiters))) + df = DataFrame(np.concatenate(combined)) df.columns = segment_class.get_columns(params) return df @@ -588,16 +610,13 @@ def __call__(self, nodes, edges, **params): # Finally things can be sent for computation edge_segments = compute(edge_segments) - # Smooth out the graph - for i in range(10): - for batch in edge_segments: - smooth(batch, p.tension, segment_class.idx, segment_class.idy) - # Flatten things new_segs = [] for batch in edge_segments: new_segs.extend(batch) + smooth(new_segs, p.tension, segment_class.idx, segment_class.idy) + # Convert list of edge segments to Pandas dataframe df = _convert_edge_segments_to_dataframe(new_segs, segment_class, p) From 6080067f5d8c3f56aca6f784f4cc125379f29509 Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Tue, 7 Jan 2025 01:17:36 -0500 Subject: [PATCH 08/15] Update accuracy parameter bounds to allow a maximum of 65535 --- datashader/bundling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index a9af34e7c..d136a9892 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -520,7 +520,7 @@ class hammer_bundle(connect_edges): tension = param.Number(default=0.3,bounds=(0,None),precedence=-0.5,doc=""" Exponential smoothing factor to use when smoothing""") - accuracy = param.Integer(default=500,bounds=(1,None),precedence=-0.5,doc=""" + accuracy = param.Integer(default=500,bounds=(1,65535),precedence=-0.5,doc=""" Number of entries in table for...""") advect_iterations = param.Integer(default=50,bounds=(0,None),precedence=-0.5,doc=""" From 15af8fb5dce0849f9c617607d0c8bbf64ab4481f Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Tue, 7 Jan 2025 01:31:52 -0500 Subject: [PATCH 09/15] Refactor advect_segments function to inline logic --- datashader/bundling.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index d136a9892..0f6e269f9 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -198,31 +198,23 @@ def smooth(edge_segments, tension, idx, idy): smooth_segment(segments, tension, idx, idy) @nb.jit( - nb.void(nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.float64, nb.int64, nb.int64), + nb.float32[:,::1](nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.int64, nb.float64, + segment_length_type, nb.uint64, nb.uint64, nb.int64), nopython=True, nogil=True, fastmath=True, - locals={"x": nb.uint16, "y": nb.uint16} -) -def advect_segments(segments, vert, horiz, accuracy, idx, idy): - for i in range(1, len(segments) - 1): - x = np.uint16(segments[i, idx] * accuracy) - y = np.uint16(segments[i, idy] * accuracy) - segments[i, idx] += horiz[x, y] / accuracy - segments[i, idy] += vert[x, y] / accuracy - segments[i, idx] = max(0, min(segments[i, idx], 1)) - segments[i, idy] = max(0, min(segments[i, idy], 1)) - - -@nb.jit( - nb.float32[:,::1](nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.int64, nb.float64, segment_length_type, nb.int64, nb.int64, nb.int64), - nopython=True, - nogil=True, - locals={'it': nb.uint8} + locals={'it': nb.uint8, "i": nb.uint16, "x": nb.uint16, "y": nb.uint16} ) def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_segment_length, idx, idy, ndims): for it in range(iterations): - advect_segments(segments, vert, horiz, accuracy, idx, idy) + for i in range(1, len(segments) - 1): + x = np.uint16(segments[i, idx] * accuracy) + y = np.uint16(segments[i, idy] * accuracy) + segments[i, idx] += horiz[x, y] / accuracy + segments[i, idy] += vert[x, y] / accuracy + segments[i, idx] = max(0, min(segments[i, idx], 1)) + segments[i, idy] = max(0, min(segments[i, idy], 1)) + if it % 2 == 0: segments = resample_edge(segments, squared_segment_length, ndims) return segments From fea12d68b562ea3dae37c9c027a4e3a1e1185b1f Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Tue, 7 Jan 2025 11:37:46 -0500 Subject: [PATCH 10/15] dask puts it all in a length 1 list; fix for that --- datashader/bundling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 0f6e269f9..581e5e465 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -600,7 +600,7 @@ def __call__(self, nodes, edges, **params): if p.use_dask: # Finally things can be sent for computation - edge_segments = compute(edge_segments) + edge_segments = compute(edge_segments)[0] # Flatten things new_segs = [] From 9e8b6867431b34e528d373abee95c18093f552fb Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Tue, 7 Jan 2025 16:13:53 -0500 Subject: [PATCH 11/15] reformat long lines --- datashader/bundling.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 581e5e465..5ff4dfcb8 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -158,7 +158,8 @@ def calculate_resampling(segments, squared_segment_length): nogil=True, ) def resample_edge(segments, squared_segment_length, ndims): - change, total_resamples, n_points_to_add = calculate_resampling(segments, squared_segment_length) + change, total_resamples, n_points_to_add = calculate_resampling(segments, + squared_segment_length) if not change: return segments resampled = np.empty((total_resamples, ndims), dtype=np.float32) @@ -189,8 +190,10 @@ def smooth_segment(segments, tension, idx, idy): seg_length = len(segments) - 2 for i in range(1, seg_length): previous, current, next_point = segments[i - 1], segments[i], segments[i + 1] - current[idx] = ((1-tension)*current[idx]) + (tension*(previous[idx] + next_point[idx]) / 2) - current[idy] = ((1-tension)*current[idy]) + (tension*(previous[idy] + next_point[idy]) / 2) + current[idx] = (((1-tension)*current[idx]) + + (tension*(previous[idx] + next_point[idx]) / 2)) + current[idy] = (((1-tension)*current[idy]) + + (tension*(previous[idy] + next_point[idy]) / 2)) def smooth(edge_segments, tension, idx, idy): @@ -205,7 +208,8 @@ def smooth(edge_segments, tension, idx, idy): fastmath=True, locals={'it': nb.uint8, "i": nb.uint16, "x": nb.uint16, "y": nb.uint16} ) -def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_segment_length, idx, idy, ndims): +def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_segment_length, + idx, idy, ndims): for it in range(iterations): for i in range(1, len(segments) - 1): x = np.uint16(segments[i, idx] * accuracy) @@ -281,7 +285,8 @@ def get_merged_columns(params): @staticmethod @ngjit def create_segment(edge): - return np.array([[edge[0], edge[1], edge[2]], [edge[0], edge[3], edge[4]]], dtype=np.float32) + return np.array([[edge[0], edge[1], edge[2]], [edge[0], edge[3], edge[4]]], + dtype=np.float32) @staticmethod @ngjit @@ -354,7 +359,8 @@ def get_merged_columns(params): @staticmethod @ngjit def create_segment(edge): - return np.array([[edge[0], edge[1], edge[4]], [edge[2], edge[3], edge[4]]], dtype=np.float32) + return np.array([[edge[0], edge[1], edge[4]], [edge[2], edge[3], edge[4]]], + dtype=np.float32) @staticmethod @ngjit @@ -564,8 +570,10 @@ def __call__(self, nodes, edges, **params): # This is simply to let the work split out over multiple cores edge_batches = list(batches(edges, p.batch_size)) - squared_segment_length = SegmentLength(p.min_segment_length**2, p.max_segment_length**2, - ((p.min_segment_length + p.max_segment_length) / 2)**2) + squared_segment_length = SegmentLength( + p.min_segment_length**2, p.max_segment_length**2, + ((p.min_segment_length + p.max_segment_length) / 2)**2 + ) # This gets the edges split into lots of small segments # Doing this inside a delayed function lowers the transmission overhead @@ -590,8 +598,9 @@ def __call__(self, nodes, edges, **params): # Move edges along the gradients and resample when necessary # This could include smoothing to adjust the amount a graph can change edge_segments = [advect_resample_all_fn(gradients, segment, p.advect_iterations, - p.accuracy, squared_segment_length, segment_class.idx, - segment_class.idy, segment_class.ndims) + p.accuracy, squared_segment_length, + segment_class.idx, segment_class.idy, + segment_class.ndims) for segment in edge_segments] # Do a final resample to a smaller size for nicer rendering From c81fa457d560fc9f9ed0d40474fe933ee427d9fc Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Tue, 7 Jan 2025 16:19:30 -0500 Subject: [PATCH 12/15] Remove trailing whitespace --- datashader/bundling.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 5ff4dfcb8..2e621572a 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -62,16 +62,16 @@ def distance_between(a, b): @nb.jit( nb.float32[:,::1](nb.float32[:,::1], nb.float32[:,::1], nb.uint16[::1], nb.int64), - nopython=True, - nogil=True, + nopython=True, + nogil=True, fastmath=True, locals={ - 'next_point': nb.float32[::1], + 'next_point': nb.float32[::1], 'current_point': nb.float32[::1], 'step_vector': nb.float32[::1], 'i': nb.uint16, - 'pos': nb.uint64, - 'index': nb.uint64, + 'pos': nb.uint64, + 'index': nb.uint64, 'distance': nb.float32 } ) @@ -112,10 +112,10 @@ def resample_segment(segments, new_segments, n_points_to_add, ndims): nogil=True, fastmath=True, locals={ - 'next_point': nb.float32[::1], - 'current_point': nb.float32[::1], - 'pos': nb.uint64, - 'index': nb.uint64, + 'next_point': nb.float32[::1], + 'current_point': nb.float32[::1], + 'pos': nb.uint64, + 'index': nb.uint64, 'distance': nb.float32 } ) @@ -178,7 +178,7 @@ def resample_edges(edge_segments, squared_segment_length, ndims): nogil=True, fastmath=True, locals={ - "i": nb.uint16, + "i": nb.uint16, "segments": nb.float32[:,::1], "previous": nb.float32[::1], "current": nb.float32[::1], @@ -201,7 +201,7 @@ def smooth(edge_segments, tension, idx, idy): smooth_segment(segments, tension, idx, idy) @nb.jit( - nb.float32[:,::1](nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.int64, nb.float64, + nb.float32[:,::1](nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.int64, nb.float64, segment_length_type, nb.uint64, nb.uint64, nb.int64), nopython=True, nogil=True, @@ -223,9 +223,10 @@ def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_seg segments = resample_edge(segments, squared_segment_length, ndims) return segments -def advect_resample_all(gradients, edge_segments, iterations, accuracy, squared_segment_length, idx, idy, ndims): +def advect_resample_all(gradients, edge_segments, iterations, accuracy, squared_segment_length, + idx, idy, ndims): vert, horiz = gradients - return [advect_and_resample(vert, horiz, edges, iterations, accuracy, squared_segment_length, + return [advect_and_resample(vert, horiz, edges, iterations, accuracy, squared_segment_length, idx, idy, ndims) for edges in edge_segments] From 9143ec146a783b7b5401f4005dab636d9ab74412 Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Tue, 7 Jan 2025 16:40:35 -0500 Subject: [PATCH 13/15] more trailing whitespace --- datashader/bundling.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 2e621572a..18097b871 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -108,8 +108,8 @@ def resample_segment(segments, new_segments, n_points_to_add, ndims): @nb.jit( nb.types.Tuple((nb.boolean, nb.uint64, nb.uint16[::1]))(nb.float32[:,::1], segment_length_type), - nopython=True, - nogil=True, + nopython=True, + nogil=True, fastmath=True, locals={ 'next_point': nb.float32[::1], @@ -158,7 +158,7 @@ def calculate_resampling(segments, squared_segment_length): nogil=True, ) def resample_edge(segments, squared_segment_length, ndims): - change, total_resamples, n_points_to_add = calculate_resampling(segments, + change, total_resamples, n_points_to_add = calculate_resampling(segments, squared_segment_length) if not change: return segments @@ -190,9 +190,9 @@ def smooth_segment(segments, tension, idx, idy): seg_length = len(segments) - 2 for i in range(1, seg_length): previous, current, next_point = segments[i - 1], segments[i], segments[i + 1] - current[idx] = (((1-tension)*current[idx]) + + current[idx] = (((1-tension)*current[idx]) + (tension*(previous[idx] + next_point[idx]) / 2)) - current[idy] = (((1-tension)*current[idy]) + + current[idy] = (((1-tension)*current[idy]) + (tension*(previous[idy] + next_point[idy]) / 2)) @@ -208,7 +208,7 @@ def smooth(edge_segments, tension, idx, idy): fastmath=True, locals={'it': nb.uint8, "i": nb.uint16, "x": nb.uint16, "y": nb.uint16} ) -def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_segment_length, +def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_segment_length, idx, idy, ndims): for it in range(iterations): for i in range(1, len(segments) - 1): @@ -223,7 +223,7 @@ def advect_and_resample(vert, horiz, segments, iterations, accuracy, squared_seg segments = resample_edge(segments, squared_segment_length, ndims) return segments -def advect_resample_all(gradients, edge_segments, iterations, accuracy, squared_segment_length, +def advect_resample_all(gradients, edge_segments, iterations, accuracy, squared_segment_length, idx, idy, ndims): vert, horiz = gradients return [advect_and_resample(vert, horiz, edges, iterations, accuracy, squared_segment_length, @@ -286,7 +286,7 @@ def get_merged_columns(params): @staticmethod @ngjit def create_segment(edge): - return np.array([[edge[0], edge[1], edge[2]], [edge[0], edge[3], edge[4]]], + return np.array([[edge[0], edge[1], edge[2]], [edge[0], edge[3], edge[4]]], dtype=np.float32) @staticmethod @@ -360,7 +360,7 @@ def get_merged_columns(params): @staticmethod @ngjit def create_segment(edge): - return np.array([[edge[0], edge[1], edge[4]], [edge[2], edge[3], edge[4]]], + return np.array([[edge[0], edge[1], edge[4]], [edge[2], edge[3], edge[4]]], dtype=np.float32) @staticmethod @@ -599,8 +599,8 @@ def __call__(self, nodes, edges, **params): # Move edges along the gradients and resample when necessary # This could include smoothing to adjust the amount a graph can change edge_segments = [advect_resample_all_fn(gradients, segment, p.advect_iterations, - p.accuracy, squared_segment_length, - segment_class.idx, segment_class.idy, + p.accuracy, squared_segment_length, + segment_class.idx, segment_class.idy, segment_class.ndims) for segment in edge_segments] From 89984f409dbcefde5dc791f696e6dff0bd5fa5e6 Mon Sep 17 00:00:00 2001 From: lmcinnes Date: Tue, 7 Jan 2025 16:54:36 -0500 Subject: [PATCH 14/15] yet more trailing whitespace --- datashader/bundling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 18097b871..6de84bcb1 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -194,7 +194,6 @@ def smooth_segment(segments, tension, idx, idy): (tension*(previous[idx] + next_point[idx]) / 2)) current[idy] = (((1-tension)*current[idy]) + (tension*(previous[idy] + next_point[idy]) / 2)) - def smooth(edge_segments, tension, idx, idy): for segments in edge_segments: From 2610f532783770955f511b52901389d24f677fa9 Mon Sep 17 00:00:00 2001 From: Leland McInnes Date: Fri, 10 Jan 2025 18:26:49 -0500 Subject: [PATCH 15/15] Update datashader/bundling.py Co-authored-by: Andy Maloney --- datashader/bundling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 6de84bcb1..b6f832d35 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -153,7 +153,7 @@ def calculate_resampling(segments, squared_segment_length): return any_change, total, n_points_to_add @nb.jit( - nb.float32[:,::1](nb.float32[:,::1], segment_length_type, nb.int64), + nb.float32[:, ::1](nb.float32[:, ::1], segment_length_type, nb.int64), nopython=True, nogil=True, )