From d32eb9319f7531d2ffea83787c97aff7a272b413 Mon Sep 17 00:00:00 2001 From: xywei Date: Sat, 11 Nov 2017 09:58:13 -0600 Subject: [PATCH 01/25] Expose bounding box from tree builder --- boxtree/tree_build.py | 826 +++++++++++++++++++++++------------------- 1 file changed, 455 insertions(+), 371 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 2566dc44..afa9c318 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -22,7 +22,6 @@ THE SOFTWARE. """ - from six.moves import range, zip import numpy as np @@ -59,27 +58,42 @@ def __init__(self, context): ROOT_EXTENT_STRETCH_FACTOR = 1e-4 @memoize_method - def get_kernel_info(self, dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - kind): + def get_kernel_info(self, dimensions, coord_dtype, particle_id_dtype, + box_id_dtype, sources_are_targets, srcntgts_extent_norm, + kind): from boxtree.tree_build_kernels import get_tree_build_kernel_info - return get_tree_build_kernel_info(self.context, dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - self.morton_nr_dtype, self.box_level_dtype, + return get_tree_build_kernel_info( + self.context, + dimensions, + coord_dtype, + particle_id_dtype, + box_id_dtype, + sources_are_targets, + srcntgts_extent_norm, + self.morton_nr_dtype, + self.box_level_dtype, kind=kind) # {{{ run control - def __call__(self, queue, particles, kind="adaptive", - max_particles_in_box=None, allocator=None, debug=False, - targets=None, source_radii=None, target_radii=None, - stick_out_factor=None, refine_weights=None, - max_leaf_refine_weight=None, wait_for=None, - extent_norm=None, - **kwargs): + def __call__(self, + queue, + particles, + kind="adaptive", + max_particles_in_box=None, + allocator=None, + debug=False, + targets=None, + source_radii=None, + target_radii=None, + stick_out_factor=None, + refine_weights=None, + max_leaf_refine_weight=None, + wait_for=None, + extent_norm=None, + bbox=None, + **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. @@ -118,6 +132,8 @@ def __call__(self, queue, particles, kind="adaptive", execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. + :arg bbox: When specified, the bounding box is used for tree + building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -127,7 +143,9 @@ def __call__(self, queue, particles, kind="adaptive", # {{{ input processing - if kind not in ["adaptive", "adaptive-level-restricted", "non-adaptive"]: + if kind not in [ + "adaptive", "adaptive-level-restricted", "non-adaptive" + ]: raise ValueError("unknown tree kind \"{0}\"".format(kind)) # we'll modify this below, so copy it @@ -149,8 +167,8 @@ def __call__(self, queue, particles, kind="adaptive", extent_norm = "linf" if extent_norm not in ["linf", "l2"]: - raise ValueError("unexpected value of 'extent_norm': %s" - % extent_norm) + raise ValueError( + "unexpected value of 'extent_norm': %s" % extent_norm) srcntgts_extent_norm = extent_norm srcntgts_have_extent = sources_have_extent or targets_have_extent @@ -161,7 +179,7 @@ def __call__(self, queue, particles, kind="adaptive", if srcntgts_extent_norm and targets is None: raise ValueError("must specify targets when specifying " - "any kind of radii") + "any kind of radii") from pytools import single_valued particle_id_dtype = np.int32 @@ -176,25 +194,26 @@ def __call__(self, queue, particles, kind="adaptive", nsrcntgts = nsources + ntargets if source_radii is not None: - if source_radii.shape != (nsources,): + if source_radii.shape != (nsources, ): raise ValueError("source_radii has an invalid shape") if source_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "source_radii must agree") + "source_radii must agree") if target_radii is not None: - if target_radii.shape != (ntargets,): + if target_radii.shape != (ntargets, ): raise ValueError("target_radii has an invalid shape") if target_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "target_radii must agree") + "target_radii must agree") if sources_have_extent or targets_have_extent: if stick_out_factor is None: - raise ValueError("if sources or targets have extent, " - "stick_out_factor must be explicitly specified") + raise ValueError( + "if sources or targets have extent, " + "stick_out_factor must be explicitly specified") else: stick_out_factor = 0 @@ -207,10 +226,14 @@ def zeros(shape, dtype): event, = result.events return result, event - knl_info = self.get_kernel_info(dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - kind=kind) + knl_info = self.get_kernel_info( + dimensions, + coord_dtype, + particle_id_dtype, + box_id_dtype, + sources_are_targets, + srcntgts_extent_norm, + kind=kind) logger.info("tree build: start") @@ -240,7 +263,7 @@ def zeros(shape, dtype): if target_coord_dtype != coord_dtype: raise TypeError("sources and targets must have same coordinate " - "dtype") + "dtype") def combine_srcntgt_arrays(ary1, ary2=None): if ary2 is None: @@ -264,10 +287,11 @@ def combine_srcntgt_arrays(ary1, ary2=None): srcntgts = make_obj_array([ combine_srcntgt_arrays(src_i, tgt_i) for src_i, tgt_i in zip(particles, targets) - ]) + ]) if srcntgts_have_extent: - srcntgt_radii = combine_srcntgt_arrays(source_radii, target_radii) + srcntgt_radii = combine_srcntgt_arrays(source_radii, + target_radii) else: srcntgt_radii = None @@ -276,8 +300,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): del particles - user_srcntgt_ids = cl.array.arange(queue, nsrcntgts, dtype=particle_id_dtype, - allocator=allocator) + user_srcntgt_ids = cl.array.arange( + queue, nsrcntgts, dtype=particle_id_dtype, allocator=allocator) evt, = user_srcntgt_ids.events wait_for.append(evt) @@ -295,33 +319,34 @@ def combine_srcntgt_arrays(ary1, ary2=None): if specified_max_particles_in_box and specified_refine_weights: raise ValueError("may only specify one of max_particles_in_box and " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif not specified_max_particles_in_box and not specified_refine_weights: raise ValueError("must specify either max_particles_in_box or " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif specified_max_particles_in_box: - refine_weights = ( - cl.array.empty( - queue, nsrcntgts, refine_weight_dtype, allocator=allocator) - .fill(1)) + refine_weights = (cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) event, = refine_weights.events prep_events.append(event) max_leaf_refine_weight = max_particles_in_box elif specified_refine_weights: if refine_weights.dtype != refine_weight_dtype: - raise TypeError("refine_weights must have dtype '%s'" - % refine_weight_dtype) + raise TypeError( + "refine_weights must have dtype '%s'" % refine_weight_dtype) if max_leaf_refine_weight < cl.array.max(refine_weights).get(): raise ValueError( - "entries of refine_weights cannot exceed max_leaf_refine_weight") + "entries of refine_weights cannot exceed max_leaf_refine_weight" + ) if 0 > cl.array.min(refine_weights).get(): - raise ValueError("all entries of refine_weights must be nonnegative") + raise ValueError( + "all entries of refine_weights must be nonnegative") if max_leaf_refine_weight <= 0: raise ValueError("max_leaf_refine_weight must be positive") total_refine_weight = cl.array.sum( - refine_weights, dtype=np.dtype(np.int64)).get() + refine_weights, dtype=np.dtype(np.int64)).get() del max_particles_in_box del specified_max_particles_in_box @@ -331,22 +356,35 @@ def combine_srcntgt_arrays(ary1, ary2=None): # {{{ find and process bounding box - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + if bbox is None: + bbox, _ = self.bbox_finder( + srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_"+ax] - bbox["min_"+ax] - for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( + 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_"+ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_"+ax] = bbox_max[i] + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_" + ax] = bbox_max[i] + else: + # all coordinates must be at the interior (boundary excluded) + # of the bbox + # Currently only cubic domain has ensured mesh reconstruction. + bbox_min = np.empty(dimensions, coord_dtype) + bbox_max = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] + bbox_max[i] = bbox["max_" + ax] + assert (bbox_min[i] < bbox_max[i]) + root_extent = max(bbox_max - bbox_min) # }}} @@ -356,7 +394,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): # box-local morton bin counts for each particle at the current level # only valid from scan -> split'n'sort - morton_bin_counts = empty(nsrcntgts, dtype=knl_info.morton_bin_count_dtype) + morton_bin_counts = empty( + nsrcntgts, dtype=knl_info.morton_bin_count_dtype) # (local) morton nrs for each particle at the current level # only valid from scan -> split'n'sort @@ -376,8 +415,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: nboxes_guess = 2**dimensions * ( - (max_leaf_refine_weight + total_refine_weight - 1) - // max_leaf_refine_weight) + (max_leaf_refine_weight + total_refine_weight - 1 + ) // max_leaf_refine_weight) assert nboxes_guess > 0 @@ -398,8 +437,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): prep_events.append(evt) # per-box morton bin counts - box_morton_bin_counts, evt = zeros(nboxes_guess, - dtype=knl_info.morton_bin_count_dtype) + box_morton_bin_counts, evt = zeros( + nboxes_guess, dtype=knl_info.morton_bin_count_dtype) prep_events.append(evt) # particle# at which each box starts @@ -411,18 +450,19 @@ def combine_srcntgt_arrays(ary1, ary2=None): prep_events.append(evt) # pointer to child box, by morton number - box_child_ids, evts = zip( - *(zeros(nboxes_guess, dtype=box_id_dtype) for d in range(2**dimensions))) + box_child_ids, evts = zip(*(zeros(nboxes_guess, dtype=box_id_dtype) + for d in range(2**dimensions))) prep_events.extend(evts) # box centers, by dimension - box_centers, evts = zip( - *(zeros(nboxes_guess, dtype=coord_dtype) for d in range(dimensions))) + box_centers, evts = zip(*(zeros(nboxes_guess, dtype=coord_dtype) + for d in range(dimensions))) prep_events.extend(evts) # Initialize box_centers[0] to contain the root box's center for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_"+ax] + (bbox["max_"+ax] - bbox["min_"+ax]) / 2 + center_ax = bbox["min_" + + ax] + (bbox["max_" + ax] - bbox["min_" + ax]) / 2 box_centers[d][0].fill(center_ax, wait_for=[evt]) # box -> level map @@ -431,12 +471,12 @@ def combine_srcntgt_arrays(ary1, ary2=None): # number of particles in each box # needs to be globally initialized because empty boxes never get touched - box_srcntgt_counts_cumul, evt = zeros(nboxes_guess, dtype=particle_id_dtype) + box_srcntgt_counts_cumul, evt = zeros( + nboxes_guess, dtype=particle_id_dtype) prep_events.append(evt) # Initalize box 0 to contain all particles - box_srcntgt_counts_cumul[0].fill( - nsrcntgts, queue=queue, wait_for=[evt]) + box_srcntgt_counts_cumul[0].fill(nsrcntgts, queue=queue, wait_for=[evt]) # box -> whether the box has a child. FIXME: use smaller integer type box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) @@ -444,14 +484,14 @@ def combine_srcntgt_arrays(ary1, ary2=None): # box -> whether the box needs a splitting to enforce level restriction. # FIXME: use smaller integer type - force_split_box, evt = zeros(nboxes_guess - if knl_info.level_restrict - else 0, dtype=np.dtype(np.int32)) + force_split_box, evt = zeros( + nboxes_guess if knl_info.level_restrict else 0, + dtype=np.dtype(np.int32)) prep_events.append(evt) # set parent of root box to itself - evt = cl.enqueue_copy( - queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) + evt = cl.enqueue_copy(queue, box_parent_ids.data, + np.zeros((), dtype=box_parent_ids.dtype)) prep_events.append(evt) nlevels_max = np.iinfo(self.box_level_dtype).max @@ -543,55 +583,50 @@ def fin_debug(s): if level > np.iinfo(self.box_level_dtype).max: raise RuntimeError("level count exceeded maximum") - common_args = ((morton_bin_counts, morton_nrs, - box_start_flags, - srcntgt_box_ids, split_box_ids, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_srcntgt_starts, box_srcntgt_counts_cumul, - box_parent_ids, box_levels, - level, bbox, - user_srcntgt_ids) - + tuple(srcntgts) - + ((srcntgt_radii,) if srcntgts_have_extent else ()) - ) + common_args = ( + (morton_bin_counts, morton_nrs, box_start_flags, + srcntgt_box_ids, split_box_ids, box_morton_bin_counts, + refine_weights, max_leaf_refine_weight, box_srcntgt_starts, + box_srcntgt_counts_cumul, box_parent_ids, box_levels, level, + bbox, user_srcntgt_ids) + tuple(srcntgts) + + ((srcntgt_radii, ) if srcntgts_have_extent else ())) fin_debug("morton count scan") morton_count_args = common_args if srcntgts_have_extent: - morton_count_args += (stick_out_factor,) + morton_count_args += (stick_out_factor, ) # writes: box_morton_bin_counts evt = knl_info.morton_count_scan( - *morton_count_args, queue=queue, size=nsrcntgts, - wait_for=wait_for) + *morton_count_args, + queue=queue, + size=nsrcntgts, + wait_for=wait_for) wait_for = [evt] fin_debug("split box id scan") # writes: box_has_children, split_box_ids evt = knl_info.split_box_id_scan( - srcntgt_box_ids, - box_srcntgt_counts_cumul, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_levels, - level_start_box_nrs_dev, - level_used_box_counts_dev, - force_split_box, - level, + srcntgt_box_ids, + box_srcntgt_counts_cumul, + box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + box_levels, + level_start_box_nrs_dev, + level_used_box_counts_dev, + force_split_box, + level, - # output: - box_has_children, - split_box_ids, - have_oversize_split_box, - - queue=queue, - size=level_start_box_nrs[level], - wait_for=wait_for) + # output: + box_has_children, + split_box_ids, + have_oversize_split_box, + queue=queue, + size=level_start_box_nrs[level], + wait_for=wait_for) wait_for = [evt] # {{{ compute new level_used_box_counts, level_leaf_counts @@ -603,21 +638,20 @@ def fin_debug(s): last_box_on_prev_level = level_start_box_id - 1 new_level_used_box_counts.append( # FIXME: Get this all at once. - int(split_box_ids[last_box_on_prev_level].get()) - - level_start_box_id) + int(split_box_ids[last_box_on_prev_level].get()) - + level_start_box_id) # New leaf count = # old leaf count # + nr. new boxes from splitting parent's leaves # - nr. new boxes from splitting current level's leaves / 2**d - level_used_box_counts_diff = (new_level_used_box_counts - - np.append(level_used_box_counts, [0])) - new_level_leaf_counts = (level_leaf_counts - + level_used_box_counts_diff[:-1] - - level_used_box_counts_diff[1:] // 2 ** dimensions) - new_level_leaf_counts = np.append( - new_level_leaf_counts, - [level_used_box_counts_diff[-1]]) + level_used_box_counts_diff = (new_level_used_box_counts - + np.append(level_used_box_counts, [0])) + new_level_leaf_counts = ( + level_leaf_counts + level_used_box_counts_diff[:-1] - + level_used_box_counts_diff[1:] // 2**dimensions) + new_level_leaf_counts = np.append(new_level_leaf_counts, + [level_used_box_counts_diff[-1]]) del level_used_box_counts_diff # }}} @@ -638,7 +672,8 @@ def fin_debug(s): curr_upper_level_lengths = np.diff(level_start_box_nrs) minimal_upper_level_lengths = np.max( - [new_level_used_box_counts[:-1], curr_upper_level_lengths], axis=0) + [new_level_used_box_counts[:-1], curr_upper_level_lengths], + axis=0) minimal_new_level_length = new_level_used_box_counts[-1] # Allocate extra space at the end of the current level for higher @@ -652,7 +687,7 @@ def fin_debug(s): # Currently undocumented. lr_lookbehind_levels = kwargs.get("lr_lookbehind", 1) minimal_new_level_length += sum( - 2**(l*dimensions) * new_level_leaf_counts[level - l] + 2**(l * dimensions) * new_level_leaf_counts[level - l] for l in range(1, 1 + min(level, lr_lookbehind_levels))) nboxes_minimal = \ @@ -674,22 +709,25 @@ def fin_debug(s): # Recompute the level padding. for ulevel in range(level): upper_level_padding[ulevel] = sum( - 2**(l*dimensions) * new_level_leaf_counts[ulevel - l] - for l in range( - 1, 1 + min(ulevel, lr_lookbehind_levels))) + 2**(l * dimensions) * new_level_leaf_counts[ulevel - l] + for l in range(1, + 1 + min(ulevel, lr_lookbehind_levels))) new_upper_level_unused_box_counts = np.max( - [upper_level_padding, - minimal_upper_level_lengths - new_level_used_box_counts[:-1]], + [ + upper_level_padding, minimal_upper_level_lengths - + new_level_used_box_counts[:-1] + ], axis=0) new_level_start_box_nrs = np.empty(level + 1, dtype=int) new_level_start_box_nrs[0] = 0 - new_level_start_box_nrs[1:] = np.cumsum( - new_level_used_box_counts[:-1] - + new_upper_level_unused_box_counts) + new_level_start_box_nrs[ + 1:] = np.cumsum(new_level_used_box_counts[:-1] + + new_upper_level_unused_box_counts) - assert not (level_start_box_nrs == new_level_start_box_nrs).all() + assert not ( + level_start_box_nrs == new_level_start_box_nrs).all() # }}} @@ -697,8 +735,8 @@ def fin_debug(s): old_box_count = level_start_box_nrs[-1] # Where should I put this box? - dst_box_id = cl.array.empty(queue, - shape=old_box_count, dtype=box_id_dtype) + dst_box_id = cl.array.empty( + queue, shape=old_box_count, dtype=box_id_dtype) for level_start, new_level_start, level_len in zip( level_start_box_nrs, new_level_start_box_nrs, @@ -711,12 +749,17 @@ def fin_debug(s): wait_for.extend(dst_box_id.events) - realloc_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, range=slice(old_box_count), - debug=debug) - realloc_and_renumber_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, map_values=dst_box_id, - range=slice(old_box_count), debug=debug) + realloc_array = partial( + self.gappy_copy_and_map, + dst_indices=dst_box_id, + range=slice(old_box_count), + debug=debug) + realloc_and_renumber_array = partial( + self.gappy_copy_and_map, + dst_indices=dst_box_id, + map_values=dst_box_id, + range=slice(old_box_count), + debug=debug) renumber_array = partial(self.map_values_kernel, dst_box_id) # }}} @@ -753,22 +796,40 @@ def fin_debug(s): nboxes_guess *= 2 def my_realloc_nocopy(ary): - return cl.array.empty(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) + return cl.array.empty( + queue, + allocator=allocator, + shape=nboxes_guess, + dtype=ary.dtype) def my_realloc_zeros_nocopy(ary): - result = cl.array.zeros(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) + result = cl.array.zeros( + queue, + allocator=allocator, + shape=nboxes_guess, + dtype=ary.dtype) return result, result.events[0] - my_realloc = partial(realloc_array, - queue, allocator, nboxes_guess, wait_for=wait_for) - my_realloc_zeros = partial(realloc_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) - my_realloc_zeros_and_renumber = partial(realloc_and_renumber_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) + my_realloc = partial( + realloc_array, + queue, + allocator, + nboxes_guess, + wait_for=wait_for) + my_realloc_zeros = partial( + realloc_array, + queue, + allocator, + nboxes_guess, + zero_fill=True, + wait_for=wait_for) + my_realloc_zeros_and_renumber = partial( + realloc_and_renumber_array, + queue, + allocator, + nboxes_guess, + zero_fill=True, + wait_for=wait_for) resize_events = [] @@ -780,7 +841,7 @@ def my_realloc_zeros_nocopy(ary): # currently being processed are written-but we need to # retain the box morton bin counts from the higher levels. box_morton_bin_counts, evt = my_realloc_zeros( - box_morton_bin_counts) + box_morton_bin_counts) resize_events.append(evt) # force_split_box is unused unless level restriction is enabled. @@ -798,16 +859,16 @@ def my_realloc_zeros_nocopy(ary): box_has_children, evt = my_realloc_zeros(box_has_children) resize_events.append(evt) - box_centers, evts = zip( - *(my_realloc(ary) for ary in box_centers)) + box_centers, evts = zip(*(my_realloc(ary) + for ary in box_centers)) resize_events.extend(evts) - box_child_ids, evts = zip( - *(my_realloc_zeros_and_renumber(ary) - for ary in box_child_ids)) + box_child_ids, evts = zip(*(my_realloc_zeros_and_renumber(ary) + for ary in box_child_ids)) resize_events.extend(evts) - box_parent_ids, evt = my_realloc_zeros_and_renumber(box_parent_ids) + box_parent_ids, evt = my_realloc_zeros_and_renumber( + box_parent_ids) resize_events.append(evt) if not level_start_box_nrs_updated: @@ -816,8 +877,8 @@ def my_realloc_zeros_nocopy(ary): else: box_levels, evt = my_realloc_zeros_nocopy(box_levels) cl.wait_for_events([evt]) - for box_level, (level_start, level_end) in enumerate(zip( - level_start_box_nrs, level_start_box_nrs[1:])): + for box_level, (level_start, level_end) in enumerate( + zip(level_start_box_nrs, level_start_box_nrs[1:])): box_levels[level_start:level_end].fill(box_level) resize_events.extend(box_levels.events) @@ -845,10 +906,8 @@ def my_realloc_zeros_nocopy(ary): logger.info("LEVEL %d -> %d boxes" % (level, nboxes_new)) - assert ( - level_start_box_nrs[-1] != nboxes_new or - srcntgts_have_extent or - final_level_restrict_iteration) + assert (level_start_box_nrs[-1] != nboxes_new + or srcntgts_have_extent or final_level_restrict_iteration) if level_start_box_nrs[-1] == nboxes_new: # We haven't created new boxes in this level loop trip. @@ -875,15 +934,14 @@ def my_realloc_zeros_nocopy(ary): level_leaf_counts = new_level_leaf_counts if debug: for level_start, level_nboxes, leaf_count in zip( - level_start_box_nrs, - level_used_box_counts, + level_start_box_nrs, level_used_box_counts, level_leaf_counts): if level_nboxes == 0: assert leaf_count == 0 continue nleaves_actual = level_nboxes - int( - cl.array.sum(box_has_children[ - level_start:level_start + level_nboxes]).get()) + cl.array.sum(box_has_children[level_start:level_start + + level_nboxes]).get()) assert leaf_count == nleaves_actual # Can't del in Py2.7 - see note below @@ -896,15 +954,14 @@ def my_realloc_zeros_nocopy(ary): # {{{ split boxes - box_splitter_args = ( - common_args - + (box_has_children, force_split_box, root_extent) - + box_child_ids - + box_centers) + box_splitter_args = (common_args + + (box_has_children, force_split_box, + root_extent) + box_child_ids + box_centers) - evt = knl_info.box_splitter_kernel(*box_splitter_args, - range=slice(level_start_box_nrs[-1]), - wait_for=wait_for) + evt = knl_info.box_splitter_kernel( + *box_splitter_args, + range=slice(level_start_box_nrs[-1]), + wait_for=wait_for) wait_for = [evt] @@ -919,8 +976,8 @@ def my_realloc_zeros_nocopy(ary): if debug: box_levels.finish() - level_bl_chunk = box_levels.get()[ - level_start_box_nrs[-2]:level_start_box_nrs[-1]] + level_bl_chunk = box_levels.get()[level_start_box_nrs[-2]: + level_start_box_nrs[-1]] assert (level_bl_chunk == level).all() del level_bl_chunk @@ -935,12 +992,13 @@ def my_realloc_zeros_nocopy(ary): new_srcntgt_box_ids = cl.array.empty_like(srcntgt_box_ids) particle_renumberer_args = ( - common_args - + (box_has_children, force_split_box, - new_user_srcntgt_ids, new_srcntgt_box_ids)) + common_args + (box_has_children, force_split_box, + new_user_srcntgt_ids, new_srcntgt_box_ids)) - evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args, - range=slice(nsrcntgts), wait_for=wait_for) + evt = knl_info.particle_renumberer_kernel( + *particle_renumberer_args, + range=slice(nsrcntgts), + wait_for=wait_for) wait_for = [evt] @@ -978,7 +1036,7 @@ def my_realloc_zeros_nocopy(ary): LEVEL_STEP = 10 # noqa if level % LEVEL_STEP == 1: level_restrict_kernel = knl_info.level_restrict_kernel_builder( - LEVEL_STEP * div_ceil(level, LEVEL_STEP)) + LEVEL_STEP * div_ceil(level, LEVEL_STEP)) # Upward pass - check if leaf boxes at higher levels need # further splitting. @@ -1003,7 +1061,8 @@ def my_realloc_zeros_nocopy(ary): level_used_box_counts[-3::-1]): upper_level_slice = slice( - upper_level_start, upper_level_start + upper_level_box_count) + upper_level_start, + upper_level_start + upper_level_box_count) have_upper_level_split_box.fill(0) wait_for.extend(have_upper_level_split_box.events) @@ -1023,8 +1082,10 @@ def my_realloc_zeros_nocopy(ary): if debug: force_split_box.finish() - boxes_split.append(int(cl.array.sum( - force_split_box[upper_level_slice]).get())) + boxes_split.append( + int( + cl.array.sum( + force_split_box[upper_level_slice]).get())) if int(have_upper_level_split_box.get()) == 0: break @@ -1033,16 +1094,20 @@ def my_realloc_zeros_nocopy(ary): if debug: total_boxes_split = sum(boxes_split) - logger.debug("level restriction: {total_boxes_split} boxes split" - .format(total_boxes_split=total_boxes_split)) + logger.debug( + "level restriction: {total_boxes_split} boxes split" + .format(total_boxes_split=total_boxes_split)) from itertools import count for level_, nboxes_split in zip( count(level - 2, step=-1), boxes_split[:-1]): logger.debug("level {level}: {nboxes_split} boxes split" - .format(level=level_, nboxes_split=nboxes_split)) + .format( + level=level_, + nboxes_split=nboxes_split)) del boxes_split - if int(have_oversize_split_box.get()) == 0 and did_upper_level_split: + if int(have_oversize_split_box.get() + ) == 0 and did_upper_level_split: # We are in the situation where there are boxes left to # split on upper levels, and the level loop is done creating # lower levels. @@ -1080,16 +1145,12 @@ def my_realloc_zeros_nocopy(ary): # empty boxes don't have box_morton_bin_counts written continue - kid_sum = sum( - h_box_srcntgt_counts_cumul[bci[ibox]] - for bci in h_box_child_ids - if bci[ibox] != 0) + kid_sum = sum(h_box_srcntgt_counts_cumul[bci[ibox]] + for bci in h_box_child_ids if bci[ibox] != 0) - if ( - h_box_srcntgt_counts_cumul[ibox] - != - h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] - + kid_sum): + if (h_box_srcntgt_counts_cumul[ibox] != + h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] + + kid_sum): print("MISMATCH", level, ibox) has_mismatch = True @@ -1105,10 +1166,10 @@ def my_realloc_zeros_nocopy(ary): # }}} end_time = time() - elapsed = end_time-start_time - npasses = level+1 - logger.info("elapsed time: %g s (%g s/particle/pass)" % ( - elapsed, elapsed/(npasses*nsrcntgts))) + elapsed = end_time - start_time + npasses = level + 1 + logger.info("elapsed time: %g s (%g s/particle/pass)" % + (elapsed, elapsed / (npasses * nsrcntgts))) del npasses nboxes = level_start_box_nrs[-1] @@ -1125,25 +1186,26 @@ def my_realloc_zeros_nocopy(ary): highest_possibly_split_box_nr = level_start_box_nrs[-2] evt = knl_info.extract_nonchild_srcntgt_count_kernel( - # input - box_morton_bin_counts, - box_srcntgt_counts_cumul, - highest_possibly_split_box_nr, - - # output - box_srcntgt_counts_nonchild, - - range=slice(nboxes), wait_for=wait_for) + # input + box_morton_bin_counts, + box_srcntgt_counts_cumul, + highest_possibly_split_box_nr, + + # output + box_srcntgt_counts_nonchild, + range=slice(nboxes), + wait_for=wait_for) wait_for = [evt] del highest_possibly_split_box_nr if debug: - h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get() + h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get( + ) h_box_srcntgt_counts_cumul = box_srcntgt_counts_cumul.get() - assert (h_box_srcntgt_counts_nonchild - <= h_box_srcntgt_counts_cumul[:nboxes]).all() + assert (h_box_srcntgt_counts_nonchild <= + h_box_srcntgt_counts_cumul[:nboxes]).all() del h_box_srcntgt_counts_nonchild @@ -1174,14 +1236,17 @@ def my_realloc_zeros_nocopy(ary): nboxes_post_prune_dev = empty((), dtype=box_id_dtype) evt = knl_info.find_prune_indices_kernel( - box_srcntgt_counts_cumul, - src_box_id, dst_box_id, nboxes_post_prune_dev, - size=nboxes, wait_for=wait_for) + box_srcntgt_counts_cumul, + src_box_id, + dst_box_id, + nboxes_post_prune_dev, + size=nboxes, + wait_for=wait_for) wait_for = [evt] nboxes_post_prune = int(nboxes_post_prune_dev.get()) logger.info("{} boxes after pruning " - "({} empty leaves and/or unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + "({} empty leaves and/or unused boxes removed)".format( + nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True elif knl_info.level_restrict: # Remove unused boxes from the tree. @@ -1194,21 +1259,27 @@ def my_realloc_zeros_nocopy(ary): for level_start, new_level_start, level_used_box_count in zip( level_start_box_nrs, new_level_start_box_nrs, level_used_box_counts): + def make_slice(start): return slice(start, start + level_used_box_count) def make_arange(start): - return cl.array.arange(queue, start, - start + level_used_box_count, dtype=box_id_dtype) - - src_box_id[make_slice(new_level_start)] = make_arange(level_start) - dst_box_id[make_slice(level_start)] = make_arange(new_level_start) + return cl.array.arange( + queue, + start, + start + level_used_box_count, + dtype=box_id_dtype) + + src_box_id[make_slice(new_level_start)] = make_arange( + level_start) + dst_box_id[make_slice(level_start)] = make_arange( + new_level_start) wait_for.extend(src_box_id.events + dst_box_id.events) nboxes_post_prune = new_level_start_box_nrs[-1] logger.info("{} boxes after pruning ({} unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + .format(nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True else: should_prune = False @@ -1216,25 +1287,31 @@ def make_arange(start): if should_prune: prune_events = [] - prune_empty = partial(self.gappy_copy_and_map, - queue, allocator, nboxes_post_prune, - src_indices=src_box_id, - range=slice(nboxes_post_prune), debug=debug) + prune_empty = partial( + self.gappy_copy_and_map, + queue, + allocator, + nboxes_post_prune, + src_indices=src_box_id, + range=slice(nboxes_post_prune), + debug=debug) box_srcntgt_starts, evt = prune_empty(box_srcntgt_starts) prune_events.append(evt) - box_srcntgt_counts_cumul, evt = prune_empty(box_srcntgt_counts_cumul) + box_srcntgt_counts_cumul, evt = prune_empty( + box_srcntgt_counts_cumul) prune_events.append(evt) if debug and prune_empty_leaves: assert (box_srcntgt_counts_cumul.get() > 0).all() srcntgt_box_ids, evt = self.map_values_kernel( - dst_box_id, srcntgt_box_ids) + dst_box_id, srcntgt_box_ids) prune_events.append(evt) - box_parent_ids, evt = prune_empty(box_parent_ids, map_values=dst_box_id) + box_parent_ids, evt = prune_empty( + box_parent_ids, map_values=dst_box_id) prune_events.append(evt) box_levels, evt = prune_empty(box_levels) @@ -1242,19 +1319,17 @@ def make_arange(start): if srcntgts_have_extent: box_srcntgt_counts_nonchild, evt = prune_empty( - box_srcntgt_counts_nonchild) + box_srcntgt_counts_nonchild) prune_events.append(evt) box_has_children, evt = prune_empty(box_has_children) prune_events.append(evt) - box_child_ids, evts = zip( - *(prune_empty(ary, map_values=dst_box_id) - for ary in box_child_ids)) + box_child_ids, evts = zip(*(prune_empty(ary, map_values=dst_box_id) + for ary in box_child_ids)) prune_events.extend(evts) - box_centers, evts = zip( - *(prune_empty(ary) for ary in box_centers)) + box_centers, evts = zip(*(prune_empty(ary) for ary in box_centers)) prune_events.extend(evts) # Update box counts and level start box indices. @@ -1302,9 +1377,13 @@ def make_arange(start): source_numbers = empty(nsrcntgts, particle_id_dtype) fin_debug("source counter") - evt = knl_info.source_counter(user_srcntgt_ids, nsources, - source_numbers, queue=queue, allocator=allocator, - wait_for=wait_for) + evt = knl_info.source_counter( + user_srcntgt_ids, + nsources, + source_numbers, + queue=queue, + allocator=allocator, + wait_for=wait_for) wait_for = [evt] user_source_ids = empty(nsources, particle_id_dtype) @@ -1315,59 +1394,61 @@ def make_arange(start): # need to use zeros because parent boxes won't be initialized box_source_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_source_counts_cumul, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_cumul, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) - box_target_starts, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_counts_cumul, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_cumul, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) if srcntgts_have_extent: - box_source_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) - box_target_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) fin_debug("source and target index finder") - evt = knl_info.source_and_target_index_finder(*( - # input: - ( - user_srcntgt_ids, nsources, srcntgt_box_ids, - box_parent_ids, - - box_srcntgt_starts, box_srcntgt_counts_cumul, - source_numbers, - ) - + ((box_srcntgt_counts_nonchild,) - if srcntgts_have_extent else ()) + evt = knl_info.source_and_target_index_finder( + *( + # input: + ( + user_srcntgt_ids, + nsources, + srcntgt_box_ids, + box_parent_ids, + box_srcntgt_starts, + box_srcntgt_counts_cumul, + source_numbers, + ) + ((box_srcntgt_counts_nonchild, ) + if srcntgts_have_extent else ()) - # output: - + ( - user_source_ids, srcntgt_target_ids, sorted_target_ids, - box_source_starts, box_source_counts_cumul, - box_target_starts, box_target_counts_cumul, - ) - + (( - box_source_counts_nonchild, - box_target_counts_nonchild, - ) if srcntgts_have_extent else ()) - ), - queue=queue, range=slice(nsrcntgts), + # output: + + ( + user_source_ids, + srcntgt_target_ids, + sorted_target_ids, + box_source_starts, + box_source_counts_cumul, + box_target_starts, + box_target_counts_cumul, + ) + (( + box_source_counts_nonchild, + box_target_counts_nonchild, + ) if srcntgts_have_extent else ())), + queue=queue, + range=slice(nsrcntgts), wait_for=wait_for) wait_for = [evt] if srcntgts_have_extent: if debug: - assert ( - box_srcntgt_counts_nonchild.get() - == - (box_source_counts_nonchild - + box_target_counts_nonchild).get()).all() + assert (box_srcntgt_counts_nonchild.get() == ( + box_source_counts_nonchild + + box_target_counts_nonchild).get()).all() if debug: usi_host = user_source_ids.get() @@ -1376,13 +1457,13 @@ def make_arange(start): del usi_host sti_host = srcntgt_target_ids.get() - assert (sti_host < nsources+ntargets).all() + assert (sti_host < nsources + ntargets).all() assert (nsources <= sti_host).all() del sti_host - assert (box_source_counts_cumul.get() - + box_target_counts_cumul.get() - == box_srcntgt_counts_cumul.get()).all() + assert (box_source_counts_cumul.get() + + box_target_counts_cumul.get() == + box_srcntgt_counts_cumul.get()).all() del source_numbers @@ -1395,49 +1476,55 @@ def make_arange(start): # {{{ permute and source/target-split (if necessary) particle array if targets is None: - sources = targets = make_obj_array([ - cl.array.empty_like(pt) for pt in srcntgts]) + sources = targets = make_obj_array( + [cl.array.empty_like(pt) for pt in srcntgts]) fin_debug("srcntgt permuter (particles)") evt = knl_info.srcntgt_permuter( - user_srcntgt_ids, - *(tuple(srcntgts) + tuple(sources)), - wait_for=wait_for) + user_srcntgt_ids, + *(tuple(srcntgts) + tuple(sources)), + wait_for=wait_for) wait_for = [evt] assert srcntgt_radii is None else: - sources = make_obj_array([ - empty(nsources, coord_dtype) for i in range(dimensions)]) + sources = make_obj_array( + [empty(nsources, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (sources)") evt = knl_info.srcntgt_permuter( - user_source_ids, - *(tuple(srcntgts) + tuple(sources)), - queue=queue, range=slice(nsources), - wait_for=wait_for) + user_source_ids, + *(tuple(srcntgts) + tuple(sources)), + queue=queue, + range=slice(nsources), + wait_for=wait_for) wait_for = [evt] - targets = make_obj_array([ - empty(ntargets, coord_dtype) for i in range(dimensions)]) + targets = make_obj_array( + [empty(ntargets, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (targets)") evt = knl_info.srcntgt_permuter( - srcntgt_target_ids, - *(tuple(srcntgts) + tuple(targets)), - queue=queue, range=slice(ntargets), - wait_for=wait_for) + srcntgt_target_ids, + *(tuple(srcntgts) + tuple(targets)), + queue=queue, + range=slice(ntargets), + wait_for=wait_for) wait_for = [evt] if srcntgt_radii is not None: fin_debug("srcntgt permuter (source radii)") source_radii = cl.array.take( - srcntgt_radii, user_source_ids, queue=queue, - wait_for=wait_for) + srcntgt_radii, + user_source_ids, + queue=queue, + wait_for=wait_for) fin_debug("srcntgt permuter (target radii)") target_radii = cl.array.take( - srcntgt_radii, srcntgt_target_ids, queue=queue, - wait_for=wait_for) + srcntgt_radii, + srcntgt_target_ids, + queue=queue, + wait_for=wait_for) wait_for = source_radii.events + target_radii.events @@ -1452,7 +1539,7 @@ def make_arange(start): nlevels = len(level_start_box_nrs) - 1 assert nlevels == len(level_used_box_counts) - assert level + 1 == nlevels, (level+1, nlevels) + assert level + 1 == nlevels, (level + 1, nlevels) if debug: max_level = np.max(box_levels.get()) assert max_level + 1 == nlevels @@ -1462,9 +1549,10 @@ def make_arange(start): # A number of arrays below are nominally 2-dimensional and stored with # the box index as the fastest-moving index. To make sure that accesses # remain aligned, we round up the number of boxes used for indexing. - aligned_nboxes = div_ceil(nboxes_post_prune, 32)*32 + aligned_nboxes = div_ceil(nboxes_post_prune, 32) * 32 - box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), box_id_dtype) + box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), + box_id_dtype) wait_for.append(evt) box_centers_new = empty((dimensions, aligned_nboxes), coord_dtype) @@ -1474,7 +1562,8 @@ def make_arange(start): wait_for.extend(box_child_ids_new.events) for dim, center_row in enumerate(box_centers): - box_centers_new[dim, :nboxes_post_prune] = center_row[:nboxes_post_prune] + box_centers_new[dim, : + nboxes_post_prune] = center_row[:nboxes_post_prune] wait_for.extend(box_centers_new.events) cl.wait_for_events(wait_for) @@ -1518,33 +1607,38 @@ def make_arange(start): # }}} - box_source_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) if sources_are_targets: box_target_counts_nonchild = box_source_counts_nonchild else: - box_target_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) fin_debug("compute box info") evt = knl_info.box_info_kernel( - *( - # input: - box_parent_ids, box_srcntgt_counts_cumul, - box_source_counts_cumul, box_target_counts_cumul, - box_has_children, box_levels, nlevels, - - # output if srcntgts_have_extent, input+output otherwise - box_source_counts_nonchild, box_target_counts_nonchild, + *( + # input: + box_parent_ids, + box_srcntgt_counts_cumul, + box_source_counts_cumul, + box_target_counts_cumul, + box_has_children, + box_levels, + nlevels, + + # output if srcntgts_have_extent, input+output otherwise + box_source_counts_nonchild, + box_target_counts_nonchild, - # output: - box_flags, - ), - range=slice(nboxes_post_prune), - wait_for=wait_for) + # output: + box_flags, + ), + range=slice(nboxes_post_prune), + wait_for=wait_for) # }}} @@ -1562,52 +1656,42 @@ def make_arange(start): logger.info("tree build complete") return Tree( - # If you change this, also change the documentation - # of what's in the tree, above. - - sources_are_targets=sources_are_targets, - sources_have_extent=sources_have_extent, - targets_have_extent=targets_have_extent, - - particle_id_dtype=knl_info.particle_id_dtype, - box_id_dtype=knl_info.box_id_dtype, - coord_dtype=coord_dtype, - box_level_dtype=self.box_level_dtype, - - root_extent=root_extent, - stick_out_factor=stick_out_factor, - extent_norm=srcntgts_extent_norm, - - bounding_box=(bbox_min, bbox_max), - level_start_box_nrs=level_start_box_nrs, - level_start_box_nrs_dev=level_start_box_nrs_dev, - - sources=sources, - targets=targets, - - box_source_starts=box_source_starts, - box_source_counts_nonchild=box_source_counts_nonchild, - box_source_counts_cumul=box_source_counts_cumul, - box_target_starts=box_target_starts, - box_target_counts_nonchild=box_target_counts_nonchild, - box_target_counts_cumul=box_target_counts_cumul, - - box_parent_ids=box_parent_ids, - box_child_ids=box_child_ids, - box_centers=box_centers, - box_levels=box_levels, - box_flags=box_flags, - - user_source_ids=user_source_ids, - sorted_target_ids=sorted_target_ids, - - _is_pruned=prune_empty_leaves, - - **extra_tree_attrs - ).with_queue(None), evt + # If you change this, also change the documentation + # of what's in the tree, above. + sources_are_targets=sources_are_targets, + sources_have_extent=sources_have_extent, + targets_have_extent=targets_have_extent, + particle_id_dtype=knl_info.particle_id_dtype, + box_id_dtype=knl_info.box_id_dtype, + coord_dtype=coord_dtype, + box_level_dtype=self.box_level_dtype, + root_extent=root_extent, + stick_out_factor=stick_out_factor, + extent_norm=srcntgts_extent_norm, + bounding_box=(bbox_min, bbox_max), + level_start_box_nrs=level_start_box_nrs, + level_start_box_nrs_dev=level_start_box_nrs_dev, + sources=sources, + targets=targets, + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + box_source_counts_cumul=box_source_counts_cumul, + box_target_starts=box_target_starts, + box_target_counts_nonchild=box_target_counts_nonchild, + box_target_counts_cumul=box_target_counts_cumul, + box_parent_ids=box_parent_ids, + box_child_ids=box_child_ids, + box_centers=box_centers, + box_levels=box_levels, + box_flags=box_flags, + user_source_ids=user_source_ids, + sorted_target_ids=sorted_target_ids, + _is_pruned=prune_empty_leaves, + **extra_tree_attrs).with_queue(None), evt # }}} # }}} + # vim: foldmethod=marker:filetype=pyopencl From f8f2123d2e978d80b5d301204b894fd1bb107fbc Mon Sep 17 00:00:00 2001 From: xywei Date: Sat, 11 Nov 2017 09:58:13 -0600 Subject: [PATCH 02/25] Expose bounding box from tree builder --- boxtree/tree_build.py | 826 +++++++++++++++++++++++------------------- 1 file changed, 455 insertions(+), 371 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 2566dc44..afa9c318 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -22,7 +22,6 @@ THE SOFTWARE. """ - from six.moves import range, zip import numpy as np @@ -59,27 +58,42 @@ def __init__(self, context): ROOT_EXTENT_STRETCH_FACTOR = 1e-4 @memoize_method - def get_kernel_info(self, dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - kind): + def get_kernel_info(self, dimensions, coord_dtype, particle_id_dtype, + box_id_dtype, sources_are_targets, srcntgts_extent_norm, + kind): from boxtree.tree_build_kernels import get_tree_build_kernel_info - return get_tree_build_kernel_info(self.context, dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - self.morton_nr_dtype, self.box_level_dtype, + return get_tree_build_kernel_info( + self.context, + dimensions, + coord_dtype, + particle_id_dtype, + box_id_dtype, + sources_are_targets, + srcntgts_extent_norm, + self.morton_nr_dtype, + self.box_level_dtype, kind=kind) # {{{ run control - def __call__(self, queue, particles, kind="adaptive", - max_particles_in_box=None, allocator=None, debug=False, - targets=None, source_radii=None, target_radii=None, - stick_out_factor=None, refine_weights=None, - max_leaf_refine_weight=None, wait_for=None, - extent_norm=None, - **kwargs): + def __call__(self, + queue, + particles, + kind="adaptive", + max_particles_in_box=None, + allocator=None, + debug=False, + targets=None, + source_radii=None, + target_radii=None, + stick_out_factor=None, + refine_weights=None, + max_leaf_refine_weight=None, + wait_for=None, + extent_norm=None, + bbox=None, + **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. @@ -118,6 +132,8 @@ def __call__(self, queue, particles, kind="adaptive", execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. + :arg bbox: When specified, the bounding box is used for tree + building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -127,7 +143,9 @@ def __call__(self, queue, particles, kind="adaptive", # {{{ input processing - if kind not in ["adaptive", "adaptive-level-restricted", "non-adaptive"]: + if kind not in [ + "adaptive", "adaptive-level-restricted", "non-adaptive" + ]: raise ValueError("unknown tree kind \"{0}\"".format(kind)) # we'll modify this below, so copy it @@ -149,8 +167,8 @@ def __call__(self, queue, particles, kind="adaptive", extent_norm = "linf" if extent_norm not in ["linf", "l2"]: - raise ValueError("unexpected value of 'extent_norm': %s" - % extent_norm) + raise ValueError( + "unexpected value of 'extent_norm': %s" % extent_norm) srcntgts_extent_norm = extent_norm srcntgts_have_extent = sources_have_extent or targets_have_extent @@ -161,7 +179,7 @@ def __call__(self, queue, particles, kind="adaptive", if srcntgts_extent_norm and targets is None: raise ValueError("must specify targets when specifying " - "any kind of radii") + "any kind of radii") from pytools import single_valued particle_id_dtype = np.int32 @@ -176,25 +194,26 @@ def __call__(self, queue, particles, kind="adaptive", nsrcntgts = nsources + ntargets if source_radii is not None: - if source_radii.shape != (nsources,): + if source_radii.shape != (nsources, ): raise ValueError("source_radii has an invalid shape") if source_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "source_radii must agree") + "source_radii must agree") if target_radii is not None: - if target_radii.shape != (ntargets,): + if target_radii.shape != (ntargets, ): raise ValueError("target_radii has an invalid shape") if target_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "target_radii must agree") + "target_radii must agree") if sources_have_extent or targets_have_extent: if stick_out_factor is None: - raise ValueError("if sources or targets have extent, " - "stick_out_factor must be explicitly specified") + raise ValueError( + "if sources or targets have extent, " + "stick_out_factor must be explicitly specified") else: stick_out_factor = 0 @@ -207,10 +226,14 @@ def zeros(shape, dtype): event, = result.events return result, event - knl_info = self.get_kernel_info(dimensions, coord_dtype, - particle_id_dtype, box_id_dtype, - sources_are_targets, srcntgts_extent_norm, - kind=kind) + knl_info = self.get_kernel_info( + dimensions, + coord_dtype, + particle_id_dtype, + box_id_dtype, + sources_are_targets, + srcntgts_extent_norm, + kind=kind) logger.info("tree build: start") @@ -240,7 +263,7 @@ def zeros(shape, dtype): if target_coord_dtype != coord_dtype: raise TypeError("sources and targets must have same coordinate " - "dtype") + "dtype") def combine_srcntgt_arrays(ary1, ary2=None): if ary2 is None: @@ -264,10 +287,11 @@ def combine_srcntgt_arrays(ary1, ary2=None): srcntgts = make_obj_array([ combine_srcntgt_arrays(src_i, tgt_i) for src_i, tgt_i in zip(particles, targets) - ]) + ]) if srcntgts_have_extent: - srcntgt_radii = combine_srcntgt_arrays(source_radii, target_radii) + srcntgt_radii = combine_srcntgt_arrays(source_radii, + target_radii) else: srcntgt_radii = None @@ -276,8 +300,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): del particles - user_srcntgt_ids = cl.array.arange(queue, nsrcntgts, dtype=particle_id_dtype, - allocator=allocator) + user_srcntgt_ids = cl.array.arange( + queue, nsrcntgts, dtype=particle_id_dtype, allocator=allocator) evt, = user_srcntgt_ids.events wait_for.append(evt) @@ -295,33 +319,34 @@ def combine_srcntgt_arrays(ary1, ary2=None): if specified_max_particles_in_box and specified_refine_weights: raise ValueError("may only specify one of max_particles_in_box and " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif not specified_max_particles_in_box and not specified_refine_weights: raise ValueError("must specify either max_particles_in_box or " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif specified_max_particles_in_box: - refine_weights = ( - cl.array.empty( - queue, nsrcntgts, refine_weight_dtype, allocator=allocator) - .fill(1)) + refine_weights = (cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) event, = refine_weights.events prep_events.append(event) max_leaf_refine_weight = max_particles_in_box elif specified_refine_weights: if refine_weights.dtype != refine_weight_dtype: - raise TypeError("refine_weights must have dtype '%s'" - % refine_weight_dtype) + raise TypeError( + "refine_weights must have dtype '%s'" % refine_weight_dtype) if max_leaf_refine_weight < cl.array.max(refine_weights).get(): raise ValueError( - "entries of refine_weights cannot exceed max_leaf_refine_weight") + "entries of refine_weights cannot exceed max_leaf_refine_weight" + ) if 0 > cl.array.min(refine_weights).get(): - raise ValueError("all entries of refine_weights must be nonnegative") + raise ValueError( + "all entries of refine_weights must be nonnegative") if max_leaf_refine_weight <= 0: raise ValueError("max_leaf_refine_weight must be positive") total_refine_weight = cl.array.sum( - refine_weights, dtype=np.dtype(np.int64)).get() + refine_weights, dtype=np.dtype(np.int64)).get() del max_particles_in_box del specified_max_particles_in_box @@ -331,22 +356,35 @@ def combine_srcntgt_arrays(ary1, ary2=None): # {{{ find and process bounding box - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + if bbox is None: + bbox, _ = self.bbox_finder( + srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_"+ax] - bbox["min_"+ax] - for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( + 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_"+ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_"+ax] = bbox_max[i] + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_" + ax] = bbox_max[i] + else: + # all coordinates must be at the interior (boundary excluded) + # of the bbox + # Currently only cubic domain has ensured mesh reconstruction. + bbox_min = np.empty(dimensions, coord_dtype) + bbox_max = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] + bbox_max[i] = bbox["max_" + ax] + assert (bbox_min[i] < bbox_max[i]) + root_extent = max(bbox_max - bbox_min) # }}} @@ -356,7 +394,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): # box-local morton bin counts for each particle at the current level # only valid from scan -> split'n'sort - morton_bin_counts = empty(nsrcntgts, dtype=knl_info.morton_bin_count_dtype) + morton_bin_counts = empty( + nsrcntgts, dtype=knl_info.morton_bin_count_dtype) # (local) morton nrs for each particle at the current level # only valid from scan -> split'n'sort @@ -376,8 +415,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: nboxes_guess = 2**dimensions * ( - (max_leaf_refine_weight + total_refine_weight - 1) - // max_leaf_refine_weight) + (max_leaf_refine_weight + total_refine_weight - 1 + ) // max_leaf_refine_weight) assert nboxes_guess > 0 @@ -398,8 +437,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): prep_events.append(evt) # per-box morton bin counts - box_morton_bin_counts, evt = zeros(nboxes_guess, - dtype=knl_info.morton_bin_count_dtype) + box_morton_bin_counts, evt = zeros( + nboxes_guess, dtype=knl_info.morton_bin_count_dtype) prep_events.append(evt) # particle# at which each box starts @@ -411,18 +450,19 @@ def combine_srcntgt_arrays(ary1, ary2=None): prep_events.append(evt) # pointer to child box, by morton number - box_child_ids, evts = zip( - *(zeros(nboxes_guess, dtype=box_id_dtype) for d in range(2**dimensions))) + box_child_ids, evts = zip(*(zeros(nboxes_guess, dtype=box_id_dtype) + for d in range(2**dimensions))) prep_events.extend(evts) # box centers, by dimension - box_centers, evts = zip( - *(zeros(nboxes_guess, dtype=coord_dtype) for d in range(dimensions))) + box_centers, evts = zip(*(zeros(nboxes_guess, dtype=coord_dtype) + for d in range(dimensions))) prep_events.extend(evts) # Initialize box_centers[0] to contain the root box's center for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_"+ax] + (bbox["max_"+ax] - bbox["min_"+ax]) / 2 + center_ax = bbox["min_" + + ax] + (bbox["max_" + ax] - bbox["min_" + ax]) / 2 box_centers[d][0].fill(center_ax, wait_for=[evt]) # box -> level map @@ -431,12 +471,12 @@ def combine_srcntgt_arrays(ary1, ary2=None): # number of particles in each box # needs to be globally initialized because empty boxes never get touched - box_srcntgt_counts_cumul, evt = zeros(nboxes_guess, dtype=particle_id_dtype) + box_srcntgt_counts_cumul, evt = zeros( + nboxes_guess, dtype=particle_id_dtype) prep_events.append(evt) # Initalize box 0 to contain all particles - box_srcntgt_counts_cumul[0].fill( - nsrcntgts, queue=queue, wait_for=[evt]) + box_srcntgt_counts_cumul[0].fill(nsrcntgts, queue=queue, wait_for=[evt]) # box -> whether the box has a child. FIXME: use smaller integer type box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) @@ -444,14 +484,14 @@ def combine_srcntgt_arrays(ary1, ary2=None): # box -> whether the box needs a splitting to enforce level restriction. # FIXME: use smaller integer type - force_split_box, evt = zeros(nboxes_guess - if knl_info.level_restrict - else 0, dtype=np.dtype(np.int32)) + force_split_box, evt = zeros( + nboxes_guess if knl_info.level_restrict else 0, + dtype=np.dtype(np.int32)) prep_events.append(evt) # set parent of root box to itself - evt = cl.enqueue_copy( - queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) + evt = cl.enqueue_copy(queue, box_parent_ids.data, + np.zeros((), dtype=box_parent_ids.dtype)) prep_events.append(evt) nlevels_max = np.iinfo(self.box_level_dtype).max @@ -543,55 +583,50 @@ def fin_debug(s): if level > np.iinfo(self.box_level_dtype).max: raise RuntimeError("level count exceeded maximum") - common_args = ((morton_bin_counts, morton_nrs, - box_start_flags, - srcntgt_box_ids, split_box_ids, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_srcntgt_starts, box_srcntgt_counts_cumul, - box_parent_ids, box_levels, - level, bbox, - user_srcntgt_ids) - + tuple(srcntgts) - + ((srcntgt_radii,) if srcntgts_have_extent else ()) - ) + common_args = ( + (morton_bin_counts, morton_nrs, box_start_flags, + srcntgt_box_ids, split_box_ids, box_morton_bin_counts, + refine_weights, max_leaf_refine_weight, box_srcntgt_starts, + box_srcntgt_counts_cumul, box_parent_ids, box_levels, level, + bbox, user_srcntgt_ids) + tuple(srcntgts) + + ((srcntgt_radii, ) if srcntgts_have_extent else ())) fin_debug("morton count scan") morton_count_args = common_args if srcntgts_have_extent: - morton_count_args += (stick_out_factor,) + morton_count_args += (stick_out_factor, ) # writes: box_morton_bin_counts evt = knl_info.morton_count_scan( - *morton_count_args, queue=queue, size=nsrcntgts, - wait_for=wait_for) + *morton_count_args, + queue=queue, + size=nsrcntgts, + wait_for=wait_for) wait_for = [evt] fin_debug("split box id scan") # writes: box_has_children, split_box_ids evt = knl_info.split_box_id_scan( - srcntgt_box_ids, - box_srcntgt_counts_cumul, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_levels, - level_start_box_nrs_dev, - level_used_box_counts_dev, - force_split_box, - level, + srcntgt_box_ids, + box_srcntgt_counts_cumul, + box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + box_levels, + level_start_box_nrs_dev, + level_used_box_counts_dev, + force_split_box, + level, - # output: - box_has_children, - split_box_ids, - have_oversize_split_box, - - queue=queue, - size=level_start_box_nrs[level], - wait_for=wait_for) + # output: + box_has_children, + split_box_ids, + have_oversize_split_box, + queue=queue, + size=level_start_box_nrs[level], + wait_for=wait_for) wait_for = [evt] # {{{ compute new level_used_box_counts, level_leaf_counts @@ -603,21 +638,20 @@ def fin_debug(s): last_box_on_prev_level = level_start_box_id - 1 new_level_used_box_counts.append( # FIXME: Get this all at once. - int(split_box_ids[last_box_on_prev_level].get()) - - level_start_box_id) + int(split_box_ids[last_box_on_prev_level].get()) - + level_start_box_id) # New leaf count = # old leaf count # + nr. new boxes from splitting parent's leaves # - nr. new boxes from splitting current level's leaves / 2**d - level_used_box_counts_diff = (new_level_used_box_counts - - np.append(level_used_box_counts, [0])) - new_level_leaf_counts = (level_leaf_counts - + level_used_box_counts_diff[:-1] - - level_used_box_counts_diff[1:] // 2 ** dimensions) - new_level_leaf_counts = np.append( - new_level_leaf_counts, - [level_used_box_counts_diff[-1]]) + level_used_box_counts_diff = (new_level_used_box_counts - + np.append(level_used_box_counts, [0])) + new_level_leaf_counts = ( + level_leaf_counts + level_used_box_counts_diff[:-1] - + level_used_box_counts_diff[1:] // 2**dimensions) + new_level_leaf_counts = np.append(new_level_leaf_counts, + [level_used_box_counts_diff[-1]]) del level_used_box_counts_diff # }}} @@ -638,7 +672,8 @@ def fin_debug(s): curr_upper_level_lengths = np.diff(level_start_box_nrs) minimal_upper_level_lengths = np.max( - [new_level_used_box_counts[:-1], curr_upper_level_lengths], axis=0) + [new_level_used_box_counts[:-1], curr_upper_level_lengths], + axis=0) minimal_new_level_length = new_level_used_box_counts[-1] # Allocate extra space at the end of the current level for higher @@ -652,7 +687,7 @@ def fin_debug(s): # Currently undocumented. lr_lookbehind_levels = kwargs.get("lr_lookbehind", 1) minimal_new_level_length += sum( - 2**(l*dimensions) * new_level_leaf_counts[level - l] + 2**(l * dimensions) * new_level_leaf_counts[level - l] for l in range(1, 1 + min(level, lr_lookbehind_levels))) nboxes_minimal = \ @@ -674,22 +709,25 @@ def fin_debug(s): # Recompute the level padding. for ulevel in range(level): upper_level_padding[ulevel] = sum( - 2**(l*dimensions) * new_level_leaf_counts[ulevel - l] - for l in range( - 1, 1 + min(ulevel, lr_lookbehind_levels))) + 2**(l * dimensions) * new_level_leaf_counts[ulevel - l] + for l in range(1, + 1 + min(ulevel, lr_lookbehind_levels))) new_upper_level_unused_box_counts = np.max( - [upper_level_padding, - minimal_upper_level_lengths - new_level_used_box_counts[:-1]], + [ + upper_level_padding, minimal_upper_level_lengths - + new_level_used_box_counts[:-1] + ], axis=0) new_level_start_box_nrs = np.empty(level + 1, dtype=int) new_level_start_box_nrs[0] = 0 - new_level_start_box_nrs[1:] = np.cumsum( - new_level_used_box_counts[:-1] - + new_upper_level_unused_box_counts) + new_level_start_box_nrs[ + 1:] = np.cumsum(new_level_used_box_counts[:-1] + + new_upper_level_unused_box_counts) - assert not (level_start_box_nrs == new_level_start_box_nrs).all() + assert not ( + level_start_box_nrs == new_level_start_box_nrs).all() # }}} @@ -697,8 +735,8 @@ def fin_debug(s): old_box_count = level_start_box_nrs[-1] # Where should I put this box? - dst_box_id = cl.array.empty(queue, - shape=old_box_count, dtype=box_id_dtype) + dst_box_id = cl.array.empty( + queue, shape=old_box_count, dtype=box_id_dtype) for level_start, new_level_start, level_len in zip( level_start_box_nrs, new_level_start_box_nrs, @@ -711,12 +749,17 @@ def fin_debug(s): wait_for.extend(dst_box_id.events) - realloc_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, range=slice(old_box_count), - debug=debug) - realloc_and_renumber_array = partial(self.gappy_copy_and_map, - dst_indices=dst_box_id, map_values=dst_box_id, - range=slice(old_box_count), debug=debug) + realloc_array = partial( + self.gappy_copy_and_map, + dst_indices=dst_box_id, + range=slice(old_box_count), + debug=debug) + realloc_and_renumber_array = partial( + self.gappy_copy_and_map, + dst_indices=dst_box_id, + map_values=dst_box_id, + range=slice(old_box_count), + debug=debug) renumber_array = partial(self.map_values_kernel, dst_box_id) # }}} @@ -753,22 +796,40 @@ def fin_debug(s): nboxes_guess *= 2 def my_realloc_nocopy(ary): - return cl.array.empty(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) + return cl.array.empty( + queue, + allocator=allocator, + shape=nboxes_guess, + dtype=ary.dtype) def my_realloc_zeros_nocopy(ary): - result = cl.array.zeros(queue, allocator=allocator, - shape=nboxes_guess, dtype=ary.dtype) + result = cl.array.zeros( + queue, + allocator=allocator, + shape=nboxes_guess, + dtype=ary.dtype) return result, result.events[0] - my_realloc = partial(realloc_array, - queue, allocator, nboxes_guess, wait_for=wait_for) - my_realloc_zeros = partial(realloc_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) - my_realloc_zeros_and_renumber = partial(realloc_and_renumber_array, - queue, allocator, nboxes_guess, zero_fill=True, - wait_for=wait_for) + my_realloc = partial( + realloc_array, + queue, + allocator, + nboxes_guess, + wait_for=wait_for) + my_realloc_zeros = partial( + realloc_array, + queue, + allocator, + nboxes_guess, + zero_fill=True, + wait_for=wait_for) + my_realloc_zeros_and_renumber = partial( + realloc_and_renumber_array, + queue, + allocator, + nboxes_guess, + zero_fill=True, + wait_for=wait_for) resize_events = [] @@ -780,7 +841,7 @@ def my_realloc_zeros_nocopy(ary): # currently being processed are written-but we need to # retain the box morton bin counts from the higher levels. box_morton_bin_counts, evt = my_realloc_zeros( - box_morton_bin_counts) + box_morton_bin_counts) resize_events.append(evt) # force_split_box is unused unless level restriction is enabled. @@ -798,16 +859,16 @@ def my_realloc_zeros_nocopy(ary): box_has_children, evt = my_realloc_zeros(box_has_children) resize_events.append(evt) - box_centers, evts = zip( - *(my_realloc(ary) for ary in box_centers)) + box_centers, evts = zip(*(my_realloc(ary) + for ary in box_centers)) resize_events.extend(evts) - box_child_ids, evts = zip( - *(my_realloc_zeros_and_renumber(ary) - for ary in box_child_ids)) + box_child_ids, evts = zip(*(my_realloc_zeros_and_renumber(ary) + for ary in box_child_ids)) resize_events.extend(evts) - box_parent_ids, evt = my_realloc_zeros_and_renumber(box_parent_ids) + box_parent_ids, evt = my_realloc_zeros_and_renumber( + box_parent_ids) resize_events.append(evt) if not level_start_box_nrs_updated: @@ -816,8 +877,8 @@ def my_realloc_zeros_nocopy(ary): else: box_levels, evt = my_realloc_zeros_nocopy(box_levels) cl.wait_for_events([evt]) - for box_level, (level_start, level_end) in enumerate(zip( - level_start_box_nrs, level_start_box_nrs[1:])): + for box_level, (level_start, level_end) in enumerate( + zip(level_start_box_nrs, level_start_box_nrs[1:])): box_levels[level_start:level_end].fill(box_level) resize_events.extend(box_levels.events) @@ -845,10 +906,8 @@ def my_realloc_zeros_nocopy(ary): logger.info("LEVEL %d -> %d boxes" % (level, nboxes_new)) - assert ( - level_start_box_nrs[-1] != nboxes_new or - srcntgts_have_extent or - final_level_restrict_iteration) + assert (level_start_box_nrs[-1] != nboxes_new + or srcntgts_have_extent or final_level_restrict_iteration) if level_start_box_nrs[-1] == nboxes_new: # We haven't created new boxes in this level loop trip. @@ -875,15 +934,14 @@ def my_realloc_zeros_nocopy(ary): level_leaf_counts = new_level_leaf_counts if debug: for level_start, level_nboxes, leaf_count in zip( - level_start_box_nrs, - level_used_box_counts, + level_start_box_nrs, level_used_box_counts, level_leaf_counts): if level_nboxes == 0: assert leaf_count == 0 continue nleaves_actual = level_nboxes - int( - cl.array.sum(box_has_children[ - level_start:level_start + level_nboxes]).get()) + cl.array.sum(box_has_children[level_start:level_start + + level_nboxes]).get()) assert leaf_count == nleaves_actual # Can't del in Py2.7 - see note below @@ -896,15 +954,14 @@ def my_realloc_zeros_nocopy(ary): # {{{ split boxes - box_splitter_args = ( - common_args - + (box_has_children, force_split_box, root_extent) - + box_child_ids - + box_centers) + box_splitter_args = (common_args + + (box_has_children, force_split_box, + root_extent) + box_child_ids + box_centers) - evt = knl_info.box_splitter_kernel(*box_splitter_args, - range=slice(level_start_box_nrs[-1]), - wait_for=wait_for) + evt = knl_info.box_splitter_kernel( + *box_splitter_args, + range=slice(level_start_box_nrs[-1]), + wait_for=wait_for) wait_for = [evt] @@ -919,8 +976,8 @@ def my_realloc_zeros_nocopy(ary): if debug: box_levels.finish() - level_bl_chunk = box_levels.get()[ - level_start_box_nrs[-2]:level_start_box_nrs[-1]] + level_bl_chunk = box_levels.get()[level_start_box_nrs[-2]: + level_start_box_nrs[-1]] assert (level_bl_chunk == level).all() del level_bl_chunk @@ -935,12 +992,13 @@ def my_realloc_zeros_nocopy(ary): new_srcntgt_box_ids = cl.array.empty_like(srcntgt_box_ids) particle_renumberer_args = ( - common_args - + (box_has_children, force_split_box, - new_user_srcntgt_ids, new_srcntgt_box_ids)) + common_args + (box_has_children, force_split_box, + new_user_srcntgt_ids, new_srcntgt_box_ids)) - evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args, - range=slice(nsrcntgts), wait_for=wait_for) + evt = knl_info.particle_renumberer_kernel( + *particle_renumberer_args, + range=slice(nsrcntgts), + wait_for=wait_for) wait_for = [evt] @@ -978,7 +1036,7 @@ def my_realloc_zeros_nocopy(ary): LEVEL_STEP = 10 # noqa if level % LEVEL_STEP == 1: level_restrict_kernel = knl_info.level_restrict_kernel_builder( - LEVEL_STEP * div_ceil(level, LEVEL_STEP)) + LEVEL_STEP * div_ceil(level, LEVEL_STEP)) # Upward pass - check if leaf boxes at higher levels need # further splitting. @@ -1003,7 +1061,8 @@ def my_realloc_zeros_nocopy(ary): level_used_box_counts[-3::-1]): upper_level_slice = slice( - upper_level_start, upper_level_start + upper_level_box_count) + upper_level_start, + upper_level_start + upper_level_box_count) have_upper_level_split_box.fill(0) wait_for.extend(have_upper_level_split_box.events) @@ -1023,8 +1082,10 @@ def my_realloc_zeros_nocopy(ary): if debug: force_split_box.finish() - boxes_split.append(int(cl.array.sum( - force_split_box[upper_level_slice]).get())) + boxes_split.append( + int( + cl.array.sum( + force_split_box[upper_level_slice]).get())) if int(have_upper_level_split_box.get()) == 0: break @@ -1033,16 +1094,20 @@ def my_realloc_zeros_nocopy(ary): if debug: total_boxes_split = sum(boxes_split) - logger.debug("level restriction: {total_boxes_split} boxes split" - .format(total_boxes_split=total_boxes_split)) + logger.debug( + "level restriction: {total_boxes_split} boxes split" + .format(total_boxes_split=total_boxes_split)) from itertools import count for level_, nboxes_split in zip( count(level - 2, step=-1), boxes_split[:-1]): logger.debug("level {level}: {nboxes_split} boxes split" - .format(level=level_, nboxes_split=nboxes_split)) + .format( + level=level_, + nboxes_split=nboxes_split)) del boxes_split - if int(have_oversize_split_box.get()) == 0 and did_upper_level_split: + if int(have_oversize_split_box.get() + ) == 0 and did_upper_level_split: # We are in the situation where there are boxes left to # split on upper levels, and the level loop is done creating # lower levels. @@ -1080,16 +1145,12 @@ def my_realloc_zeros_nocopy(ary): # empty boxes don't have box_morton_bin_counts written continue - kid_sum = sum( - h_box_srcntgt_counts_cumul[bci[ibox]] - for bci in h_box_child_ids - if bci[ibox] != 0) + kid_sum = sum(h_box_srcntgt_counts_cumul[bci[ibox]] + for bci in h_box_child_ids if bci[ibox] != 0) - if ( - h_box_srcntgt_counts_cumul[ibox] - != - h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] - + kid_sum): + if (h_box_srcntgt_counts_cumul[ibox] != + h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] + + kid_sum): print("MISMATCH", level, ibox) has_mismatch = True @@ -1105,10 +1166,10 @@ def my_realloc_zeros_nocopy(ary): # }}} end_time = time() - elapsed = end_time-start_time - npasses = level+1 - logger.info("elapsed time: %g s (%g s/particle/pass)" % ( - elapsed, elapsed/(npasses*nsrcntgts))) + elapsed = end_time - start_time + npasses = level + 1 + logger.info("elapsed time: %g s (%g s/particle/pass)" % + (elapsed, elapsed / (npasses * nsrcntgts))) del npasses nboxes = level_start_box_nrs[-1] @@ -1125,25 +1186,26 @@ def my_realloc_zeros_nocopy(ary): highest_possibly_split_box_nr = level_start_box_nrs[-2] evt = knl_info.extract_nonchild_srcntgt_count_kernel( - # input - box_morton_bin_counts, - box_srcntgt_counts_cumul, - highest_possibly_split_box_nr, - - # output - box_srcntgt_counts_nonchild, - - range=slice(nboxes), wait_for=wait_for) + # input + box_morton_bin_counts, + box_srcntgt_counts_cumul, + highest_possibly_split_box_nr, + + # output + box_srcntgt_counts_nonchild, + range=slice(nboxes), + wait_for=wait_for) wait_for = [evt] del highest_possibly_split_box_nr if debug: - h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get() + h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get( + ) h_box_srcntgt_counts_cumul = box_srcntgt_counts_cumul.get() - assert (h_box_srcntgt_counts_nonchild - <= h_box_srcntgt_counts_cumul[:nboxes]).all() + assert (h_box_srcntgt_counts_nonchild <= + h_box_srcntgt_counts_cumul[:nboxes]).all() del h_box_srcntgt_counts_nonchild @@ -1174,14 +1236,17 @@ def my_realloc_zeros_nocopy(ary): nboxes_post_prune_dev = empty((), dtype=box_id_dtype) evt = knl_info.find_prune_indices_kernel( - box_srcntgt_counts_cumul, - src_box_id, dst_box_id, nboxes_post_prune_dev, - size=nboxes, wait_for=wait_for) + box_srcntgt_counts_cumul, + src_box_id, + dst_box_id, + nboxes_post_prune_dev, + size=nboxes, + wait_for=wait_for) wait_for = [evt] nboxes_post_prune = int(nboxes_post_prune_dev.get()) logger.info("{} boxes after pruning " - "({} empty leaves and/or unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + "({} empty leaves and/or unused boxes removed)".format( + nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True elif knl_info.level_restrict: # Remove unused boxes from the tree. @@ -1194,21 +1259,27 @@ def my_realloc_zeros_nocopy(ary): for level_start, new_level_start, level_used_box_count in zip( level_start_box_nrs, new_level_start_box_nrs, level_used_box_counts): + def make_slice(start): return slice(start, start + level_used_box_count) def make_arange(start): - return cl.array.arange(queue, start, - start + level_used_box_count, dtype=box_id_dtype) - - src_box_id[make_slice(new_level_start)] = make_arange(level_start) - dst_box_id[make_slice(level_start)] = make_arange(new_level_start) + return cl.array.arange( + queue, + start, + start + level_used_box_count, + dtype=box_id_dtype) + + src_box_id[make_slice(new_level_start)] = make_arange( + level_start) + dst_box_id[make_slice(level_start)] = make_arange( + new_level_start) wait_for.extend(src_box_id.events + dst_box_id.events) nboxes_post_prune = new_level_start_box_nrs[-1] logger.info("{} boxes after pruning ({} unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + .format(nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True else: should_prune = False @@ -1216,25 +1287,31 @@ def make_arange(start): if should_prune: prune_events = [] - prune_empty = partial(self.gappy_copy_and_map, - queue, allocator, nboxes_post_prune, - src_indices=src_box_id, - range=slice(nboxes_post_prune), debug=debug) + prune_empty = partial( + self.gappy_copy_and_map, + queue, + allocator, + nboxes_post_prune, + src_indices=src_box_id, + range=slice(nboxes_post_prune), + debug=debug) box_srcntgt_starts, evt = prune_empty(box_srcntgt_starts) prune_events.append(evt) - box_srcntgt_counts_cumul, evt = prune_empty(box_srcntgt_counts_cumul) + box_srcntgt_counts_cumul, evt = prune_empty( + box_srcntgt_counts_cumul) prune_events.append(evt) if debug and prune_empty_leaves: assert (box_srcntgt_counts_cumul.get() > 0).all() srcntgt_box_ids, evt = self.map_values_kernel( - dst_box_id, srcntgt_box_ids) + dst_box_id, srcntgt_box_ids) prune_events.append(evt) - box_parent_ids, evt = prune_empty(box_parent_ids, map_values=dst_box_id) + box_parent_ids, evt = prune_empty( + box_parent_ids, map_values=dst_box_id) prune_events.append(evt) box_levels, evt = prune_empty(box_levels) @@ -1242,19 +1319,17 @@ def make_arange(start): if srcntgts_have_extent: box_srcntgt_counts_nonchild, evt = prune_empty( - box_srcntgt_counts_nonchild) + box_srcntgt_counts_nonchild) prune_events.append(evt) box_has_children, evt = prune_empty(box_has_children) prune_events.append(evt) - box_child_ids, evts = zip( - *(prune_empty(ary, map_values=dst_box_id) - for ary in box_child_ids)) + box_child_ids, evts = zip(*(prune_empty(ary, map_values=dst_box_id) + for ary in box_child_ids)) prune_events.extend(evts) - box_centers, evts = zip( - *(prune_empty(ary) for ary in box_centers)) + box_centers, evts = zip(*(prune_empty(ary) for ary in box_centers)) prune_events.extend(evts) # Update box counts and level start box indices. @@ -1302,9 +1377,13 @@ def make_arange(start): source_numbers = empty(nsrcntgts, particle_id_dtype) fin_debug("source counter") - evt = knl_info.source_counter(user_srcntgt_ids, nsources, - source_numbers, queue=queue, allocator=allocator, - wait_for=wait_for) + evt = knl_info.source_counter( + user_srcntgt_ids, + nsources, + source_numbers, + queue=queue, + allocator=allocator, + wait_for=wait_for) wait_for = [evt] user_source_ids = empty(nsources, particle_id_dtype) @@ -1315,59 +1394,61 @@ def make_arange(start): # need to use zeros because parent boxes won't be initialized box_source_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_source_counts_cumul, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_cumul, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) - box_target_starts, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_counts_cumul, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_cumul, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) if srcntgts_have_extent: - box_source_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) - box_target_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) fin_debug("source and target index finder") - evt = knl_info.source_and_target_index_finder(*( - # input: - ( - user_srcntgt_ids, nsources, srcntgt_box_ids, - box_parent_ids, - - box_srcntgt_starts, box_srcntgt_counts_cumul, - source_numbers, - ) - + ((box_srcntgt_counts_nonchild,) - if srcntgts_have_extent else ()) + evt = knl_info.source_and_target_index_finder( + *( + # input: + ( + user_srcntgt_ids, + nsources, + srcntgt_box_ids, + box_parent_ids, + box_srcntgt_starts, + box_srcntgt_counts_cumul, + source_numbers, + ) + ((box_srcntgt_counts_nonchild, ) + if srcntgts_have_extent else ()) - # output: - + ( - user_source_ids, srcntgt_target_ids, sorted_target_ids, - box_source_starts, box_source_counts_cumul, - box_target_starts, box_target_counts_cumul, - ) - + (( - box_source_counts_nonchild, - box_target_counts_nonchild, - ) if srcntgts_have_extent else ()) - ), - queue=queue, range=slice(nsrcntgts), + # output: + + ( + user_source_ids, + srcntgt_target_ids, + sorted_target_ids, + box_source_starts, + box_source_counts_cumul, + box_target_starts, + box_target_counts_cumul, + ) + (( + box_source_counts_nonchild, + box_target_counts_nonchild, + ) if srcntgts_have_extent else ())), + queue=queue, + range=slice(nsrcntgts), wait_for=wait_for) wait_for = [evt] if srcntgts_have_extent: if debug: - assert ( - box_srcntgt_counts_nonchild.get() - == - (box_source_counts_nonchild - + box_target_counts_nonchild).get()).all() + assert (box_srcntgt_counts_nonchild.get() == ( + box_source_counts_nonchild + + box_target_counts_nonchild).get()).all() if debug: usi_host = user_source_ids.get() @@ -1376,13 +1457,13 @@ def make_arange(start): del usi_host sti_host = srcntgt_target_ids.get() - assert (sti_host < nsources+ntargets).all() + assert (sti_host < nsources + ntargets).all() assert (nsources <= sti_host).all() del sti_host - assert (box_source_counts_cumul.get() - + box_target_counts_cumul.get() - == box_srcntgt_counts_cumul.get()).all() + assert (box_source_counts_cumul.get() + + box_target_counts_cumul.get() == + box_srcntgt_counts_cumul.get()).all() del source_numbers @@ -1395,49 +1476,55 @@ def make_arange(start): # {{{ permute and source/target-split (if necessary) particle array if targets is None: - sources = targets = make_obj_array([ - cl.array.empty_like(pt) for pt in srcntgts]) + sources = targets = make_obj_array( + [cl.array.empty_like(pt) for pt in srcntgts]) fin_debug("srcntgt permuter (particles)") evt = knl_info.srcntgt_permuter( - user_srcntgt_ids, - *(tuple(srcntgts) + tuple(sources)), - wait_for=wait_for) + user_srcntgt_ids, + *(tuple(srcntgts) + tuple(sources)), + wait_for=wait_for) wait_for = [evt] assert srcntgt_radii is None else: - sources = make_obj_array([ - empty(nsources, coord_dtype) for i in range(dimensions)]) + sources = make_obj_array( + [empty(nsources, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (sources)") evt = knl_info.srcntgt_permuter( - user_source_ids, - *(tuple(srcntgts) + tuple(sources)), - queue=queue, range=slice(nsources), - wait_for=wait_for) + user_source_ids, + *(tuple(srcntgts) + tuple(sources)), + queue=queue, + range=slice(nsources), + wait_for=wait_for) wait_for = [evt] - targets = make_obj_array([ - empty(ntargets, coord_dtype) for i in range(dimensions)]) + targets = make_obj_array( + [empty(ntargets, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (targets)") evt = knl_info.srcntgt_permuter( - srcntgt_target_ids, - *(tuple(srcntgts) + tuple(targets)), - queue=queue, range=slice(ntargets), - wait_for=wait_for) + srcntgt_target_ids, + *(tuple(srcntgts) + tuple(targets)), + queue=queue, + range=slice(ntargets), + wait_for=wait_for) wait_for = [evt] if srcntgt_radii is not None: fin_debug("srcntgt permuter (source radii)") source_radii = cl.array.take( - srcntgt_radii, user_source_ids, queue=queue, - wait_for=wait_for) + srcntgt_radii, + user_source_ids, + queue=queue, + wait_for=wait_for) fin_debug("srcntgt permuter (target radii)") target_radii = cl.array.take( - srcntgt_radii, srcntgt_target_ids, queue=queue, - wait_for=wait_for) + srcntgt_radii, + srcntgt_target_ids, + queue=queue, + wait_for=wait_for) wait_for = source_radii.events + target_radii.events @@ -1452,7 +1539,7 @@ def make_arange(start): nlevels = len(level_start_box_nrs) - 1 assert nlevels == len(level_used_box_counts) - assert level + 1 == nlevels, (level+1, nlevels) + assert level + 1 == nlevels, (level + 1, nlevels) if debug: max_level = np.max(box_levels.get()) assert max_level + 1 == nlevels @@ -1462,9 +1549,10 @@ def make_arange(start): # A number of arrays below are nominally 2-dimensional and stored with # the box index as the fastest-moving index. To make sure that accesses # remain aligned, we round up the number of boxes used for indexing. - aligned_nboxes = div_ceil(nboxes_post_prune, 32)*32 + aligned_nboxes = div_ceil(nboxes_post_prune, 32) * 32 - box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), box_id_dtype) + box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), + box_id_dtype) wait_for.append(evt) box_centers_new = empty((dimensions, aligned_nboxes), coord_dtype) @@ -1474,7 +1562,8 @@ def make_arange(start): wait_for.extend(box_child_ids_new.events) for dim, center_row in enumerate(box_centers): - box_centers_new[dim, :nboxes_post_prune] = center_row[:nboxes_post_prune] + box_centers_new[dim, : + nboxes_post_prune] = center_row[:nboxes_post_prune] wait_for.extend(box_centers_new.events) cl.wait_for_events(wait_for) @@ -1518,33 +1607,38 @@ def make_arange(start): # }}} - box_source_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_source_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) if sources_are_targets: box_target_counts_nonchild = box_source_counts_nonchild else: - box_target_counts_nonchild, evt = zeros( - nboxes_post_prune, particle_id_dtype) + box_target_counts_nonchild, evt = zeros(nboxes_post_prune, + particle_id_dtype) wait_for.append(evt) fin_debug("compute box info") evt = knl_info.box_info_kernel( - *( - # input: - box_parent_ids, box_srcntgt_counts_cumul, - box_source_counts_cumul, box_target_counts_cumul, - box_has_children, box_levels, nlevels, - - # output if srcntgts_have_extent, input+output otherwise - box_source_counts_nonchild, box_target_counts_nonchild, + *( + # input: + box_parent_ids, + box_srcntgt_counts_cumul, + box_source_counts_cumul, + box_target_counts_cumul, + box_has_children, + box_levels, + nlevels, + + # output if srcntgts_have_extent, input+output otherwise + box_source_counts_nonchild, + box_target_counts_nonchild, - # output: - box_flags, - ), - range=slice(nboxes_post_prune), - wait_for=wait_for) + # output: + box_flags, + ), + range=slice(nboxes_post_prune), + wait_for=wait_for) # }}} @@ -1562,52 +1656,42 @@ def make_arange(start): logger.info("tree build complete") return Tree( - # If you change this, also change the documentation - # of what's in the tree, above. - - sources_are_targets=sources_are_targets, - sources_have_extent=sources_have_extent, - targets_have_extent=targets_have_extent, - - particle_id_dtype=knl_info.particle_id_dtype, - box_id_dtype=knl_info.box_id_dtype, - coord_dtype=coord_dtype, - box_level_dtype=self.box_level_dtype, - - root_extent=root_extent, - stick_out_factor=stick_out_factor, - extent_norm=srcntgts_extent_norm, - - bounding_box=(bbox_min, bbox_max), - level_start_box_nrs=level_start_box_nrs, - level_start_box_nrs_dev=level_start_box_nrs_dev, - - sources=sources, - targets=targets, - - box_source_starts=box_source_starts, - box_source_counts_nonchild=box_source_counts_nonchild, - box_source_counts_cumul=box_source_counts_cumul, - box_target_starts=box_target_starts, - box_target_counts_nonchild=box_target_counts_nonchild, - box_target_counts_cumul=box_target_counts_cumul, - - box_parent_ids=box_parent_ids, - box_child_ids=box_child_ids, - box_centers=box_centers, - box_levels=box_levels, - box_flags=box_flags, - - user_source_ids=user_source_ids, - sorted_target_ids=sorted_target_ids, - - _is_pruned=prune_empty_leaves, - - **extra_tree_attrs - ).with_queue(None), evt + # If you change this, also change the documentation + # of what's in the tree, above. + sources_are_targets=sources_are_targets, + sources_have_extent=sources_have_extent, + targets_have_extent=targets_have_extent, + particle_id_dtype=knl_info.particle_id_dtype, + box_id_dtype=knl_info.box_id_dtype, + coord_dtype=coord_dtype, + box_level_dtype=self.box_level_dtype, + root_extent=root_extent, + stick_out_factor=stick_out_factor, + extent_norm=srcntgts_extent_norm, + bounding_box=(bbox_min, bbox_max), + level_start_box_nrs=level_start_box_nrs, + level_start_box_nrs_dev=level_start_box_nrs_dev, + sources=sources, + targets=targets, + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + box_source_counts_cumul=box_source_counts_cumul, + box_target_starts=box_target_starts, + box_target_counts_nonchild=box_target_counts_nonchild, + box_target_counts_cumul=box_target_counts_cumul, + box_parent_ids=box_parent_ids, + box_child_ids=box_child_ids, + box_centers=box_centers, + box_levels=box_levels, + box_flags=box_flags, + user_source_ids=user_source_ids, + sorted_target_ids=sorted_target_ids, + _is_pruned=prune_empty_leaves, + **extra_tree_attrs).with_queue(None), evt # }}} # }}} + # vim: foldmethod=marker:filetype=pyopencl From 8012d886324185dd06fefcf136979cb59a37e28f Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 7 Feb 2018 20:32:00 -0600 Subject: [PATCH 03/25] Run through yapf --- boxtree/tree_build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index afa9c318..06beb85e 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -461,8 +461,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): # Initialize box_centers[0] to contain the root box's center for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_" - + ax] + (bbox["max_" + ax] - bbox["min_" + ax]) / 2 + center_ax = bbox["min_" + ax] + ( + bbox["max_" + ax] - bbox["min_" + ax]) / 2 box_centers[d][0].fill(center_ax, wait_for=[evt]) # box -> level map From b8030db3f2b7dcf9225ce257340e8b23db638ff4 Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 7 Feb 2018 20:33:00 -0600 Subject: [PATCH 04/25] Swap in yapf formatted tree builder from upstream --- boxtree/tree_build.py | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 06beb85e..bf2e949c 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -92,7 +92,6 @@ def __call__(self, max_leaf_refine_weight=None, wait_for=None, extent_norm=None, - bbox=None, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance @@ -132,8 +131,6 @@ def __call__(self, execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. - :arg bbox: When specified, the bounding box is used for tree - building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -356,35 +353,22 @@ def combine_srcntgt_arrays(ary1, ary2=None): # {{{ find and process bounding box - if bbox is None: - bbox, _ = self.bbox_finder( - srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( - 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_" + ax] - bbox["min_" + ax] + for ax in axis_names) * (1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_" + ax] = bbox_max[i] - else: - # all coordinates must be at the interior (boundary excluded) - # of the bbox - # Currently only cubic domain has ensured mesh reconstruction. - bbox_min = np.empty(dimensions, coord_dtype) - bbox_max = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] - bbox_max[i] = bbox["max_" + ax] - assert (bbox_min[i] < bbox_max[i]) - root_extent = max(bbox_max - bbox_min) + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_" + ax] = bbox_max[i] # }}} From 04855371f5309b6aec67718004bcdfc9d0d60a79 Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 7 Feb 2018 20:34:47 -0600 Subject: [PATCH 05/25] Reapply changes --- boxtree/tree_build.py | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index bf2e949c..06beb85e 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -92,6 +92,7 @@ def __call__(self, max_leaf_refine_weight=None, wait_for=None, extent_norm=None, + bbox=None, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance @@ -131,6 +132,8 @@ def __call__(self, execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. + :arg bbox: When specified, the bounding box is used for tree + building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -353,22 +356,35 @@ def combine_srcntgt_arrays(ary1, ary2=None): # {{{ find and process bounding box - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + if bbox is None: + bbox, _ = self.bbox_finder( + srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_" + ax] - bbox["min_" + ax] - for ax in axis_names) * (1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( + 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_" + ax] = bbox_max[i] + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_" + ax] = bbox_max[i] + else: + # all coordinates must be at the interior (boundary excluded) + # of the bbox + # Currently only cubic domain has ensured mesh reconstruction. + bbox_min = np.empty(dimensions, coord_dtype) + bbox_max = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] + bbox_max[i] = bbox["max_" + ax] + assert (bbox_min[i] < bbox_max[i]) + root_extent = max(bbox_max - bbox_min) # }}} From 114025162d2dd88e11d38f2ab91d490ce44add71 Mon Sep 17 00:00:00 2001 From: xywei Date: Thu, 8 Feb 2018 09:24:26 -0600 Subject: [PATCH 06/25] Restart from current master --- boxtree/tree_build.py | 826 +++++++++++++++++++----------------------- 1 file changed, 371 insertions(+), 455 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 06beb85e..2566dc44 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -22,6 +22,7 @@ THE SOFTWARE. """ + from six.moves import range, zip import numpy as np @@ -58,42 +59,27 @@ def __init__(self, context): ROOT_EXTENT_STRETCH_FACTOR = 1e-4 @memoize_method - def get_kernel_info(self, dimensions, coord_dtype, particle_id_dtype, - box_id_dtype, sources_are_targets, srcntgts_extent_norm, - kind): + def get_kernel_info(self, dimensions, coord_dtype, + particle_id_dtype, box_id_dtype, + sources_are_targets, srcntgts_extent_norm, + kind): from boxtree.tree_build_kernels import get_tree_build_kernel_info - return get_tree_build_kernel_info( - self.context, - dimensions, - coord_dtype, - particle_id_dtype, - box_id_dtype, - sources_are_targets, - srcntgts_extent_norm, - self.morton_nr_dtype, - self.box_level_dtype, + return get_tree_build_kernel_info(self.context, dimensions, coord_dtype, + particle_id_dtype, box_id_dtype, + sources_are_targets, srcntgts_extent_norm, + self.morton_nr_dtype, self.box_level_dtype, kind=kind) # {{{ run control - def __call__(self, - queue, - particles, - kind="adaptive", - max_particles_in_box=None, - allocator=None, - debug=False, - targets=None, - source_radii=None, - target_radii=None, - stick_out_factor=None, - refine_weights=None, - max_leaf_refine_weight=None, - wait_for=None, - extent_norm=None, - bbox=None, - **kwargs): + def __call__(self, queue, particles, kind="adaptive", + max_particles_in_box=None, allocator=None, debug=False, + targets=None, source_radii=None, target_radii=None, + stick_out_factor=None, refine_weights=None, + max_leaf_refine_weight=None, wait_for=None, + extent_norm=None, + **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. @@ -132,8 +118,6 @@ def __call__(self, execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. - :arg bbox: When specified, the bounding box is used for tree - building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -143,9 +127,7 @@ def __call__(self, # {{{ input processing - if kind not in [ - "adaptive", "adaptive-level-restricted", "non-adaptive" - ]: + if kind not in ["adaptive", "adaptive-level-restricted", "non-adaptive"]: raise ValueError("unknown tree kind \"{0}\"".format(kind)) # we'll modify this below, so copy it @@ -167,8 +149,8 @@ def __call__(self, extent_norm = "linf" if extent_norm not in ["linf", "l2"]: - raise ValueError( - "unexpected value of 'extent_norm': %s" % extent_norm) + raise ValueError("unexpected value of 'extent_norm': %s" + % extent_norm) srcntgts_extent_norm = extent_norm srcntgts_have_extent = sources_have_extent or targets_have_extent @@ -179,7 +161,7 @@ def __call__(self, if srcntgts_extent_norm and targets is None: raise ValueError("must specify targets when specifying " - "any kind of radii") + "any kind of radii") from pytools import single_valued particle_id_dtype = np.int32 @@ -194,26 +176,25 @@ def __call__(self, nsrcntgts = nsources + ntargets if source_radii is not None: - if source_radii.shape != (nsources, ): + if source_radii.shape != (nsources,): raise ValueError("source_radii has an invalid shape") if source_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "source_radii must agree") + "source_radii must agree") if target_radii is not None: - if target_radii.shape != (ntargets, ): + if target_radii.shape != (ntargets,): raise ValueError("target_radii has an invalid shape") if target_radii.dtype != coord_dtype: raise TypeError("dtypes of coordinate arrays and " - "target_radii must agree") + "target_radii must agree") if sources_have_extent or targets_have_extent: if stick_out_factor is None: - raise ValueError( - "if sources or targets have extent, " - "stick_out_factor must be explicitly specified") + raise ValueError("if sources or targets have extent, " + "stick_out_factor must be explicitly specified") else: stick_out_factor = 0 @@ -226,14 +207,10 @@ def zeros(shape, dtype): event, = result.events return result, event - knl_info = self.get_kernel_info( - dimensions, - coord_dtype, - particle_id_dtype, - box_id_dtype, - sources_are_targets, - srcntgts_extent_norm, - kind=kind) + knl_info = self.get_kernel_info(dimensions, coord_dtype, + particle_id_dtype, box_id_dtype, + sources_are_targets, srcntgts_extent_norm, + kind=kind) logger.info("tree build: start") @@ -263,7 +240,7 @@ def zeros(shape, dtype): if target_coord_dtype != coord_dtype: raise TypeError("sources and targets must have same coordinate " - "dtype") + "dtype") def combine_srcntgt_arrays(ary1, ary2=None): if ary2 is None: @@ -287,11 +264,10 @@ def combine_srcntgt_arrays(ary1, ary2=None): srcntgts = make_obj_array([ combine_srcntgt_arrays(src_i, tgt_i) for src_i, tgt_i in zip(particles, targets) - ]) + ]) if srcntgts_have_extent: - srcntgt_radii = combine_srcntgt_arrays(source_radii, - target_radii) + srcntgt_radii = combine_srcntgt_arrays(source_radii, target_radii) else: srcntgt_radii = None @@ -300,8 +276,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): del particles - user_srcntgt_ids = cl.array.arange( - queue, nsrcntgts, dtype=particle_id_dtype, allocator=allocator) + user_srcntgt_ids = cl.array.arange(queue, nsrcntgts, dtype=particle_id_dtype, + allocator=allocator) evt, = user_srcntgt_ids.events wait_for.append(evt) @@ -319,34 +295,33 @@ def combine_srcntgt_arrays(ary1, ary2=None): if specified_max_particles_in_box and specified_refine_weights: raise ValueError("may only specify one of max_particles_in_box and " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif not specified_max_particles_in_box and not specified_refine_weights: raise ValueError("must specify either max_particles_in_box or " - "refine_weights/max_leaf_refine_weight") + "refine_weights/max_leaf_refine_weight") elif specified_max_particles_in_box: - refine_weights = (cl.array.empty( - queue, nsrcntgts, refine_weight_dtype, allocator=allocator) - .fill(1)) + refine_weights = ( + cl.array.empty( + queue, nsrcntgts, refine_weight_dtype, allocator=allocator) + .fill(1)) event, = refine_weights.events prep_events.append(event) max_leaf_refine_weight = max_particles_in_box elif specified_refine_weights: if refine_weights.dtype != refine_weight_dtype: - raise TypeError( - "refine_weights must have dtype '%s'" % refine_weight_dtype) + raise TypeError("refine_weights must have dtype '%s'" + % refine_weight_dtype) if max_leaf_refine_weight < cl.array.max(refine_weights).get(): raise ValueError( - "entries of refine_weights cannot exceed max_leaf_refine_weight" - ) + "entries of refine_weights cannot exceed max_leaf_refine_weight") if 0 > cl.array.min(refine_weights).get(): - raise ValueError( - "all entries of refine_weights must be nonnegative") + raise ValueError("all entries of refine_weights must be nonnegative") if max_leaf_refine_weight <= 0: raise ValueError("max_leaf_refine_weight must be positive") total_refine_weight = cl.array.sum( - refine_weights, dtype=np.dtype(np.int64)).get() + refine_weights, dtype=np.dtype(np.int64)).get() del max_particles_in_box del specified_max_particles_in_box @@ -356,35 +331,22 @@ def combine_srcntgt_arrays(ary1, ary2=None): # {{{ find and process bounding box - if bbox is None: - bbox, _ = self.bbox_finder( - srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( - bbox["max_" + ax] - bbox["min_" + ax] for ax in axis_names) * ( - 1 + TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) + root_extent = max( + bbox["max_"+ax] - bbox["min_"+ax] + for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_"+ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_" + ax] = bbox_max[i] - else: - # all coordinates must be at the interior (boundary excluded) - # of the bbox - # Currently only cubic domain has ensured mesh reconstruction. - bbox_min = np.empty(dimensions, coord_dtype) - bbox_max = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_" + ax] - bbox_max[i] = bbox["max_" + ax] - assert (bbox_min[i] < bbox_max[i]) - root_extent = max(bbox_max - bbox_min) + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_"+ax] = bbox_max[i] # }}} @@ -394,8 +356,7 @@ def combine_srcntgt_arrays(ary1, ary2=None): # box-local morton bin counts for each particle at the current level # only valid from scan -> split'n'sort - morton_bin_counts = empty( - nsrcntgts, dtype=knl_info.morton_bin_count_dtype) + morton_bin_counts = empty(nsrcntgts, dtype=knl_info.morton_bin_count_dtype) # (local) morton nrs for each particle at the current level # only valid from scan -> split'n'sort @@ -415,8 +376,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): nboxes_guess = kwargs.get("nboxes_guess") if nboxes_guess is None: nboxes_guess = 2**dimensions * ( - (max_leaf_refine_weight + total_refine_weight - 1 - ) // max_leaf_refine_weight) + (max_leaf_refine_weight + total_refine_weight - 1) + // max_leaf_refine_weight) assert nboxes_guess > 0 @@ -437,8 +398,8 @@ def combine_srcntgt_arrays(ary1, ary2=None): prep_events.append(evt) # per-box morton bin counts - box_morton_bin_counts, evt = zeros( - nboxes_guess, dtype=knl_info.morton_bin_count_dtype) + box_morton_bin_counts, evt = zeros(nboxes_guess, + dtype=knl_info.morton_bin_count_dtype) prep_events.append(evt) # particle# at which each box starts @@ -450,19 +411,18 @@ def combine_srcntgt_arrays(ary1, ary2=None): prep_events.append(evt) # pointer to child box, by morton number - box_child_ids, evts = zip(*(zeros(nboxes_guess, dtype=box_id_dtype) - for d in range(2**dimensions))) + box_child_ids, evts = zip( + *(zeros(nboxes_guess, dtype=box_id_dtype) for d in range(2**dimensions))) prep_events.extend(evts) # box centers, by dimension - box_centers, evts = zip(*(zeros(nboxes_guess, dtype=coord_dtype) - for d in range(dimensions))) + box_centers, evts = zip( + *(zeros(nboxes_guess, dtype=coord_dtype) for d in range(dimensions))) prep_events.extend(evts) # Initialize box_centers[0] to contain the root box's center for d, (ax, evt) in enumerate(zip(axis_names, evts)): - center_ax = bbox["min_" + ax] + ( - bbox["max_" + ax] - bbox["min_" + ax]) / 2 + center_ax = bbox["min_"+ax] + (bbox["max_"+ax] - bbox["min_"+ax]) / 2 box_centers[d][0].fill(center_ax, wait_for=[evt]) # box -> level map @@ -471,12 +431,12 @@ def combine_srcntgt_arrays(ary1, ary2=None): # number of particles in each box # needs to be globally initialized because empty boxes never get touched - box_srcntgt_counts_cumul, evt = zeros( - nboxes_guess, dtype=particle_id_dtype) + box_srcntgt_counts_cumul, evt = zeros(nboxes_guess, dtype=particle_id_dtype) prep_events.append(evt) # Initalize box 0 to contain all particles - box_srcntgt_counts_cumul[0].fill(nsrcntgts, queue=queue, wait_for=[evt]) + box_srcntgt_counts_cumul[0].fill( + nsrcntgts, queue=queue, wait_for=[evt]) # box -> whether the box has a child. FIXME: use smaller integer type box_has_children, evt = zeros(nboxes_guess, dtype=np.dtype(np.int32)) @@ -484,14 +444,14 @@ def combine_srcntgt_arrays(ary1, ary2=None): # box -> whether the box needs a splitting to enforce level restriction. # FIXME: use smaller integer type - force_split_box, evt = zeros( - nboxes_guess if knl_info.level_restrict else 0, - dtype=np.dtype(np.int32)) + force_split_box, evt = zeros(nboxes_guess + if knl_info.level_restrict + else 0, dtype=np.dtype(np.int32)) prep_events.append(evt) # set parent of root box to itself - evt = cl.enqueue_copy(queue, box_parent_ids.data, - np.zeros((), dtype=box_parent_ids.dtype)) + evt = cl.enqueue_copy( + queue, box_parent_ids.data, np.zeros((), dtype=box_parent_ids.dtype)) prep_events.append(evt) nlevels_max = np.iinfo(self.box_level_dtype).max @@ -583,50 +543,55 @@ def fin_debug(s): if level > np.iinfo(self.box_level_dtype).max: raise RuntimeError("level count exceeded maximum") - common_args = ( - (morton_bin_counts, morton_nrs, box_start_flags, - srcntgt_box_ids, split_box_ids, box_morton_bin_counts, - refine_weights, max_leaf_refine_weight, box_srcntgt_starts, - box_srcntgt_counts_cumul, box_parent_ids, box_levels, level, - bbox, user_srcntgt_ids) + tuple(srcntgts) + - ((srcntgt_radii, ) if srcntgts_have_extent else ())) + common_args = ((morton_bin_counts, morton_nrs, + box_start_flags, + srcntgt_box_ids, split_box_ids, + box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + box_srcntgt_starts, box_srcntgt_counts_cumul, + box_parent_ids, box_levels, + level, bbox, + user_srcntgt_ids) + + tuple(srcntgts) + + ((srcntgt_radii,) if srcntgts_have_extent else ()) + ) fin_debug("morton count scan") morton_count_args = common_args if srcntgts_have_extent: - morton_count_args += (stick_out_factor, ) + morton_count_args += (stick_out_factor,) # writes: box_morton_bin_counts evt = knl_info.morton_count_scan( - *morton_count_args, - queue=queue, - size=nsrcntgts, - wait_for=wait_for) + *morton_count_args, queue=queue, size=nsrcntgts, + wait_for=wait_for) wait_for = [evt] fin_debug("split box id scan") # writes: box_has_children, split_box_ids evt = knl_info.split_box_id_scan( - srcntgt_box_ids, - box_srcntgt_counts_cumul, - box_morton_bin_counts, - refine_weights, - max_leaf_refine_weight, - box_levels, - level_start_box_nrs_dev, - level_used_box_counts_dev, - force_split_box, - level, + srcntgt_box_ids, + box_srcntgt_counts_cumul, + box_morton_bin_counts, + refine_weights, + max_leaf_refine_weight, + box_levels, + level_start_box_nrs_dev, + level_used_box_counts_dev, + force_split_box, + level, - # output: - box_has_children, - split_box_ids, - have_oversize_split_box, - queue=queue, - size=level_start_box_nrs[level], - wait_for=wait_for) + # output: + box_has_children, + split_box_ids, + have_oversize_split_box, + + queue=queue, + size=level_start_box_nrs[level], + wait_for=wait_for) wait_for = [evt] # {{{ compute new level_used_box_counts, level_leaf_counts @@ -638,20 +603,21 @@ def fin_debug(s): last_box_on_prev_level = level_start_box_id - 1 new_level_used_box_counts.append( # FIXME: Get this all at once. - int(split_box_ids[last_box_on_prev_level].get()) - - level_start_box_id) + int(split_box_ids[last_box_on_prev_level].get()) + - level_start_box_id) # New leaf count = # old leaf count # + nr. new boxes from splitting parent's leaves # - nr. new boxes from splitting current level's leaves / 2**d - level_used_box_counts_diff = (new_level_used_box_counts - - np.append(level_used_box_counts, [0])) - new_level_leaf_counts = ( - level_leaf_counts + level_used_box_counts_diff[:-1] - - level_used_box_counts_diff[1:] // 2**dimensions) - new_level_leaf_counts = np.append(new_level_leaf_counts, - [level_used_box_counts_diff[-1]]) + level_used_box_counts_diff = (new_level_used_box_counts + - np.append(level_used_box_counts, [0])) + new_level_leaf_counts = (level_leaf_counts + + level_used_box_counts_diff[:-1] + - level_used_box_counts_diff[1:] // 2 ** dimensions) + new_level_leaf_counts = np.append( + new_level_leaf_counts, + [level_used_box_counts_diff[-1]]) del level_used_box_counts_diff # }}} @@ -672,8 +638,7 @@ def fin_debug(s): curr_upper_level_lengths = np.diff(level_start_box_nrs) minimal_upper_level_lengths = np.max( - [new_level_used_box_counts[:-1], curr_upper_level_lengths], - axis=0) + [new_level_used_box_counts[:-1], curr_upper_level_lengths], axis=0) minimal_new_level_length = new_level_used_box_counts[-1] # Allocate extra space at the end of the current level for higher @@ -687,7 +652,7 @@ def fin_debug(s): # Currently undocumented. lr_lookbehind_levels = kwargs.get("lr_lookbehind", 1) minimal_new_level_length += sum( - 2**(l * dimensions) * new_level_leaf_counts[level - l] + 2**(l*dimensions) * new_level_leaf_counts[level - l] for l in range(1, 1 + min(level, lr_lookbehind_levels))) nboxes_minimal = \ @@ -709,25 +674,22 @@ def fin_debug(s): # Recompute the level padding. for ulevel in range(level): upper_level_padding[ulevel] = sum( - 2**(l * dimensions) * new_level_leaf_counts[ulevel - l] - for l in range(1, - 1 + min(ulevel, lr_lookbehind_levels))) + 2**(l*dimensions) * new_level_leaf_counts[ulevel - l] + for l in range( + 1, 1 + min(ulevel, lr_lookbehind_levels))) new_upper_level_unused_box_counts = np.max( - [ - upper_level_padding, minimal_upper_level_lengths - - new_level_used_box_counts[:-1] - ], + [upper_level_padding, + minimal_upper_level_lengths - new_level_used_box_counts[:-1]], axis=0) new_level_start_box_nrs = np.empty(level + 1, dtype=int) new_level_start_box_nrs[0] = 0 - new_level_start_box_nrs[ - 1:] = np.cumsum(new_level_used_box_counts[:-1] + - new_upper_level_unused_box_counts) + new_level_start_box_nrs[1:] = np.cumsum( + new_level_used_box_counts[:-1] + + new_upper_level_unused_box_counts) - assert not ( - level_start_box_nrs == new_level_start_box_nrs).all() + assert not (level_start_box_nrs == new_level_start_box_nrs).all() # }}} @@ -735,8 +697,8 @@ def fin_debug(s): old_box_count = level_start_box_nrs[-1] # Where should I put this box? - dst_box_id = cl.array.empty( - queue, shape=old_box_count, dtype=box_id_dtype) + dst_box_id = cl.array.empty(queue, + shape=old_box_count, dtype=box_id_dtype) for level_start, new_level_start, level_len in zip( level_start_box_nrs, new_level_start_box_nrs, @@ -749,17 +711,12 @@ def fin_debug(s): wait_for.extend(dst_box_id.events) - realloc_array = partial( - self.gappy_copy_and_map, - dst_indices=dst_box_id, - range=slice(old_box_count), - debug=debug) - realloc_and_renumber_array = partial( - self.gappy_copy_and_map, - dst_indices=dst_box_id, - map_values=dst_box_id, - range=slice(old_box_count), - debug=debug) + realloc_array = partial(self.gappy_copy_and_map, + dst_indices=dst_box_id, range=slice(old_box_count), + debug=debug) + realloc_and_renumber_array = partial(self.gappy_copy_and_map, + dst_indices=dst_box_id, map_values=dst_box_id, + range=slice(old_box_count), debug=debug) renumber_array = partial(self.map_values_kernel, dst_box_id) # }}} @@ -796,40 +753,22 @@ def fin_debug(s): nboxes_guess *= 2 def my_realloc_nocopy(ary): - return cl.array.empty( - queue, - allocator=allocator, - shape=nboxes_guess, - dtype=ary.dtype) + return cl.array.empty(queue, allocator=allocator, + shape=nboxes_guess, dtype=ary.dtype) def my_realloc_zeros_nocopy(ary): - result = cl.array.zeros( - queue, - allocator=allocator, - shape=nboxes_guess, - dtype=ary.dtype) + result = cl.array.zeros(queue, allocator=allocator, + shape=nboxes_guess, dtype=ary.dtype) return result, result.events[0] - my_realloc = partial( - realloc_array, - queue, - allocator, - nboxes_guess, - wait_for=wait_for) - my_realloc_zeros = partial( - realloc_array, - queue, - allocator, - nboxes_guess, - zero_fill=True, - wait_for=wait_for) - my_realloc_zeros_and_renumber = partial( - realloc_and_renumber_array, - queue, - allocator, - nboxes_guess, - zero_fill=True, - wait_for=wait_for) + my_realloc = partial(realloc_array, + queue, allocator, nboxes_guess, wait_for=wait_for) + my_realloc_zeros = partial(realloc_array, + queue, allocator, nboxes_guess, zero_fill=True, + wait_for=wait_for) + my_realloc_zeros_and_renumber = partial(realloc_and_renumber_array, + queue, allocator, nboxes_guess, zero_fill=True, + wait_for=wait_for) resize_events = [] @@ -841,7 +780,7 @@ def my_realloc_zeros_nocopy(ary): # currently being processed are written-but we need to # retain the box morton bin counts from the higher levels. box_morton_bin_counts, evt = my_realloc_zeros( - box_morton_bin_counts) + box_morton_bin_counts) resize_events.append(evt) # force_split_box is unused unless level restriction is enabled. @@ -859,16 +798,16 @@ def my_realloc_zeros_nocopy(ary): box_has_children, evt = my_realloc_zeros(box_has_children) resize_events.append(evt) - box_centers, evts = zip(*(my_realloc(ary) - for ary in box_centers)) + box_centers, evts = zip( + *(my_realloc(ary) for ary in box_centers)) resize_events.extend(evts) - box_child_ids, evts = zip(*(my_realloc_zeros_and_renumber(ary) - for ary in box_child_ids)) + box_child_ids, evts = zip( + *(my_realloc_zeros_and_renumber(ary) + for ary in box_child_ids)) resize_events.extend(evts) - box_parent_ids, evt = my_realloc_zeros_and_renumber( - box_parent_ids) + box_parent_ids, evt = my_realloc_zeros_and_renumber(box_parent_ids) resize_events.append(evt) if not level_start_box_nrs_updated: @@ -877,8 +816,8 @@ def my_realloc_zeros_nocopy(ary): else: box_levels, evt = my_realloc_zeros_nocopy(box_levels) cl.wait_for_events([evt]) - for box_level, (level_start, level_end) in enumerate( - zip(level_start_box_nrs, level_start_box_nrs[1:])): + for box_level, (level_start, level_end) in enumerate(zip( + level_start_box_nrs, level_start_box_nrs[1:])): box_levels[level_start:level_end].fill(box_level) resize_events.extend(box_levels.events) @@ -906,8 +845,10 @@ def my_realloc_zeros_nocopy(ary): logger.info("LEVEL %d -> %d boxes" % (level, nboxes_new)) - assert (level_start_box_nrs[-1] != nboxes_new - or srcntgts_have_extent or final_level_restrict_iteration) + assert ( + level_start_box_nrs[-1] != nboxes_new or + srcntgts_have_extent or + final_level_restrict_iteration) if level_start_box_nrs[-1] == nboxes_new: # We haven't created new boxes in this level loop trip. @@ -934,14 +875,15 @@ def my_realloc_zeros_nocopy(ary): level_leaf_counts = new_level_leaf_counts if debug: for level_start, level_nboxes, leaf_count in zip( - level_start_box_nrs, level_used_box_counts, + level_start_box_nrs, + level_used_box_counts, level_leaf_counts): if level_nboxes == 0: assert leaf_count == 0 continue nleaves_actual = level_nboxes - int( - cl.array.sum(box_has_children[level_start:level_start + - level_nboxes]).get()) + cl.array.sum(box_has_children[ + level_start:level_start + level_nboxes]).get()) assert leaf_count == nleaves_actual # Can't del in Py2.7 - see note below @@ -954,14 +896,15 @@ def my_realloc_zeros_nocopy(ary): # {{{ split boxes - box_splitter_args = (common_args + - (box_has_children, force_split_box, - root_extent) + box_child_ids + box_centers) + box_splitter_args = ( + common_args + + (box_has_children, force_split_box, root_extent) + + box_child_ids + + box_centers) - evt = knl_info.box_splitter_kernel( - *box_splitter_args, - range=slice(level_start_box_nrs[-1]), - wait_for=wait_for) + evt = knl_info.box_splitter_kernel(*box_splitter_args, + range=slice(level_start_box_nrs[-1]), + wait_for=wait_for) wait_for = [evt] @@ -976,8 +919,8 @@ def my_realloc_zeros_nocopy(ary): if debug: box_levels.finish() - level_bl_chunk = box_levels.get()[level_start_box_nrs[-2]: - level_start_box_nrs[-1]] + level_bl_chunk = box_levels.get()[ + level_start_box_nrs[-2]:level_start_box_nrs[-1]] assert (level_bl_chunk == level).all() del level_bl_chunk @@ -992,13 +935,12 @@ def my_realloc_zeros_nocopy(ary): new_srcntgt_box_ids = cl.array.empty_like(srcntgt_box_ids) particle_renumberer_args = ( - common_args + (box_has_children, force_split_box, - new_user_srcntgt_ids, new_srcntgt_box_ids)) + common_args + + (box_has_children, force_split_box, + new_user_srcntgt_ids, new_srcntgt_box_ids)) - evt = knl_info.particle_renumberer_kernel( - *particle_renumberer_args, - range=slice(nsrcntgts), - wait_for=wait_for) + evt = knl_info.particle_renumberer_kernel(*particle_renumberer_args, + range=slice(nsrcntgts), wait_for=wait_for) wait_for = [evt] @@ -1036,7 +978,7 @@ def my_realloc_zeros_nocopy(ary): LEVEL_STEP = 10 # noqa if level % LEVEL_STEP == 1: level_restrict_kernel = knl_info.level_restrict_kernel_builder( - LEVEL_STEP * div_ceil(level, LEVEL_STEP)) + LEVEL_STEP * div_ceil(level, LEVEL_STEP)) # Upward pass - check if leaf boxes at higher levels need # further splitting. @@ -1061,8 +1003,7 @@ def my_realloc_zeros_nocopy(ary): level_used_box_counts[-3::-1]): upper_level_slice = slice( - upper_level_start, - upper_level_start + upper_level_box_count) + upper_level_start, upper_level_start + upper_level_box_count) have_upper_level_split_box.fill(0) wait_for.extend(have_upper_level_split_box.events) @@ -1082,10 +1023,8 @@ def my_realloc_zeros_nocopy(ary): if debug: force_split_box.finish() - boxes_split.append( - int( - cl.array.sum( - force_split_box[upper_level_slice]).get())) + boxes_split.append(int(cl.array.sum( + force_split_box[upper_level_slice]).get())) if int(have_upper_level_split_box.get()) == 0: break @@ -1094,20 +1033,16 @@ def my_realloc_zeros_nocopy(ary): if debug: total_boxes_split = sum(boxes_split) - logger.debug( - "level restriction: {total_boxes_split} boxes split" - .format(total_boxes_split=total_boxes_split)) + logger.debug("level restriction: {total_boxes_split} boxes split" + .format(total_boxes_split=total_boxes_split)) from itertools import count for level_, nboxes_split in zip( count(level - 2, step=-1), boxes_split[:-1]): logger.debug("level {level}: {nboxes_split} boxes split" - .format( - level=level_, - nboxes_split=nboxes_split)) + .format(level=level_, nboxes_split=nboxes_split)) del boxes_split - if int(have_oversize_split_box.get() - ) == 0 and did_upper_level_split: + if int(have_oversize_split_box.get()) == 0 and did_upper_level_split: # We are in the situation where there are boxes left to # split on upper levels, and the level loop is done creating # lower levels. @@ -1145,12 +1080,16 @@ def my_realloc_zeros_nocopy(ary): # empty boxes don't have box_morton_bin_counts written continue - kid_sum = sum(h_box_srcntgt_counts_cumul[bci[ibox]] - for bci in h_box_child_ids if bci[ibox] != 0) + kid_sum = sum( + h_box_srcntgt_counts_cumul[bci[ibox]] + for bci in h_box_child_ids + if bci[ibox] != 0) - if (h_box_srcntgt_counts_cumul[ibox] != - h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] + - kid_sum): + if ( + h_box_srcntgt_counts_cumul[ibox] + != + h_box_morton_bin_counts[ibox]["nonchild_srcntgts"] + + kid_sum): print("MISMATCH", level, ibox) has_mismatch = True @@ -1166,10 +1105,10 @@ def my_realloc_zeros_nocopy(ary): # }}} end_time = time() - elapsed = end_time - start_time - npasses = level + 1 - logger.info("elapsed time: %g s (%g s/particle/pass)" % - (elapsed, elapsed / (npasses * nsrcntgts))) + elapsed = end_time-start_time + npasses = level+1 + logger.info("elapsed time: %g s (%g s/particle/pass)" % ( + elapsed, elapsed/(npasses*nsrcntgts))) del npasses nboxes = level_start_box_nrs[-1] @@ -1186,26 +1125,25 @@ def my_realloc_zeros_nocopy(ary): highest_possibly_split_box_nr = level_start_box_nrs[-2] evt = knl_info.extract_nonchild_srcntgt_count_kernel( - # input - box_morton_bin_counts, - box_srcntgt_counts_cumul, - highest_possibly_split_box_nr, - - # output - box_srcntgt_counts_nonchild, - range=slice(nboxes), - wait_for=wait_for) + # input + box_morton_bin_counts, + box_srcntgt_counts_cumul, + highest_possibly_split_box_nr, + + # output + box_srcntgt_counts_nonchild, + + range=slice(nboxes), wait_for=wait_for) wait_for = [evt] del highest_possibly_split_box_nr if debug: - h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get( - ) + h_box_srcntgt_counts_nonchild = box_srcntgt_counts_nonchild.get() h_box_srcntgt_counts_cumul = box_srcntgt_counts_cumul.get() - assert (h_box_srcntgt_counts_nonchild <= - h_box_srcntgt_counts_cumul[:nboxes]).all() + assert (h_box_srcntgt_counts_nonchild + <= h_box_srcntgt_counts_cumul[:nboxes]).all() del h_box_srcntgt_counts_nonchild @@ -1236,17 +1174,14 @@ def my_realloc_zeros_nocopy(ary): nboxes_post_prune_dev = empty((), dtype=box_id_dtype) evt = knl_info.find_prune_indices_kernel( - box_srcntgt_counts_cumul, - src_box_id, - dst_box_id, - nboxes_post_prune_dev, - size=nboxes, - wait_for=wait_for) + box_srcntgt_counts_cumul, + src_box_id, dst_box_id, nboxes_post_prune_dev, + size=nboxes, wait_for=wait_for) wait_for = [evt] nboxes_post_prune = int(nboxes_post_prune_dev.get()) logger.info("{} boxes after pruning " - "({} empty leaves and/or unused boxes removed)".format( - nboxes_post_prune, nboxes - nboxes_post_prune)) + "({} empty leaves and/or unused boxes removed)" + .format(nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True elif knl_info.level_restrict: # Remove unused boxes from the tree. @@ -1259,27 +1194,21 @@ def my_realloc_zeros_nocopy(ary): for level_start, new_level_start, level_used_box_count in zip( level_start_box_nrs, new_level_start_box_nrs, level_used_box_counts): - def make_slice(start): return slice(start, start + level_used_box_count) def make_arange(start): - return cl.array.arange( - queue, - start, - start + level_used_box_count, - dtype=box_id_dtype) - - src_box_id[make_slice(new_level_start)] = make_arange( - level_start) - dst_box_id[make_slice(level_start)] = make_arange( - new_level_start) + return cl.array.arange(queue, start, + start + level_used_box_count, dtype=box_id_dtype) + + src_box_id[make_slice(new_level_start)] = make_arange(level_start) + dst_box_id[make_slice(level_start)] = make_arange(new_level_start) wait_for.extend(src_box_id.events + dst_box_id.events) nboxes_post_prune = new_level_start_box_nrs[-1] logger.info("{} boxes after pruning ({} unused boxes removed)" - .format(nboxes_post_prune, nboxes - nboxes_post_prune)) + .format(nboxes_post_prune, nboxes - nboxes_post_prune)) should_prune = True else: should_prune = False @@ -1287,31 +1216,25 @@ def make_arange(start): if should_prune: prune_events = [] - prune_empty = partial( - self.gappy_copy_and_map, - queue, - allocator, - nboxes_post_prune, - src_indices=src_box_id, - range=slice(nboxes_post_prune), - debug=debug) + prune_empty = partial(self.gappy_copy_and_map, + queue, allocator, nboxes_post_prune, + src_indices=src_box_id, + range=slice(nboxes_post_prune), debug=debug) box_srcntgt_starts, evt = prune_empty(box_srcntgt_starts) prune_events.append(evt) - box_srcntgt_counts_cumul, evt = prune_empty( - box_srcntgt_counts_cumul) + box_srcntgt_counts_cumul, evt = prune_empty(box_srcntgt_counts_cumul) prune_events.append(evt) if debug and prune_empty_leaves: assert (box_srcntgt_counts_cumul.get() > 0).all() srcntgt_box_ids, evt = self.map_values_kernel( - dst_box_id, srcntgt_box_ids) + dst_box_id, srcntgt_box_ids) prune_events.append(evt) - box_parent_ids, evt = prune_empty( - box_parent_ids, map_values=dst_box_id) + box_parent_ids, evt = prune_empty(box_parent_ids, map_values=dst_box_id) prune_events.append(evt) box_levels, evt = prune_empty(box_levels) @@ -1319,17 +1242,19 @@ def make_arange(start): if srcntgts_have_extent: box_srcntgt_counts_nonchild, evt = prune_empty( - box_srcntgt_counts_nonchild) + box_srcntgt_counts_nonchild) prune_events.append(evt) box_has_children, evt = prune_empty(box_has_children) prune_events.append(evt) - box_child_ids, evts = zip(*(prune_empty(ary, map_values=dst_box_id) - for ary in box_child_ids)) + box_child_ids, evts = zip( + *(prune_empty(ary, map_values=dst_box_id) + for ary in box_child_ids)) prune_events.extend(evts) - box_centers, evts = zip(*(prune_empty(ary) for ary in box_centers)) + box_centers, evts = zip( + *(prune_empty(ary) for ary in box_centers)) prune_events.extend(evts) # Update box counts and level start box indices. @@ -1377,13 +1302,9 @@ def make_arange(start): source_numbers = empty(nsrcntgts, particle_id_dtype) fin_debug("source counter") - evt = knl_info.source_counter( - user_srcntgt_ids, - nsources, - source_numbers, - queue=queue, - allocator=allocator, - wait_for=wait_for) + evt = knl_info.source_counter(user_srcntgt_ids, nsources, + source_numbers, queue=queue, allocator=allocator, + wait_for=wait_for) wait_for = [evt] user_source_ids = empty(nsources, particle_id_dtype) @@ -1394,61 +1315,59 @@ def make_arange(start): # need to use zeros because parent boxes won't be initialized box_source_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_source_counts_cumul, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_source_counts_cumul, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_starts, evt = zeros(nboxes_post_prune, particle_id_dtype) + box_target_starts, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_counts_cumul, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_target_counts_cumul, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) if srcntgts_have_extent: - box_source_counts_nonchild, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_source_counts_nonchild, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) - box_target_counts_nonchild, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_target_counts_nonchild, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) fin_debug("source and target index finder") - evt = knl_info.source_and_target_index_finder( - *( - # input: - ( - user_srcntgt_ids, - nsources, - srcntgt_box_ids, - box_parent_ids, - box_srcntgt_starts, - box_srcntgt_counts_cumul, - source_numbers, - ) + ((box_srcntgt_counts_nonchild, ) - if srcntgts_have_extent else ()) + evt = knl_info.source_and_target_index_finder(*( + # input: + ( + user_srcntgt_ids, nsources, srcntgt_box_ids, + box_parent_ids, - # output: - + ( - user_source_ids, - srcntgt_target_ids, - sorted_target_ids, - box_source_starts, - box_source_counts_cumul, - box_target_starts, - box_target_counts_cumul, - ) + (( - box_source_counts_nonchild, - box_target_counts_nonchild, - ) if srcntgts_have_extent else ())), - queue=queue, - range=slice(nsrcntgts), + box_srcntgt_starts, box_srcntgt_counts_cumul, + source_numbers, + ) + + ((box_srcntgt_counts_nonchild,) + if srcntgts_have_extent else ()) + + # output: + + ( + user_source_ids, srcntgt_target_ids, sorted_target_ids, + box_source_starts, box_source_counts_cumul, + box_target_starts, box_target_counts_cumul, + ) + + (( + box_source_counts_nonchild, + box_target_counts_nonchild, + ) if srcntgts_have_extent else ()) + ), + queue=queue, range=slice(nsrcntgts), wait_for=wait_for) wait_for = [evt] if srcntgts_have_extent: if debug: - assert (box_srcntgt_counts_nonchild.get() == ( - box_source_counts_nonchild + - box_target_counts_nonchild).get()).all() + assert ( + box_srcntgt_counts_nonchild.get() + == + (box_source_counts_nonchild + + box_target_counts_nonchild).get()).all() if debug: usi_host = user_source_ids.get() @@ -1457,13 +1376,13 @@ def make_arange(start): del usi_host sti_host = srcntgt_target_ids.get() - assert (sti_host < nsources + ntargets).all() + assert (sti_host < nsources+ntargets).all() assert (nsources <= sti_host).all() del sti_host - assert (box_source_counts_cumul.get() + - box_target_counts_cumul.get() == - box_srcntgt_counts_cumul.get()).all() + assert (box_source_counts_cumul.get() + + box_target_counts_cumul.get() + == box_srcntgt_counts_cumul.get()).all() del source_numbers @@ -1476,55 +1395,49 @@ def make_arange(start): # {{{ permute and source/target-split (if necessary) particle array if targets is None: - sources = targets = make_obj_array( - [cl.array.empty_like(pt) for pt in srcntgts]) + sources = targets = make_obj_array([ + cl.array.empty_like(pt) for pt in srcntgts]) fin_debug("srcntgt permuter (particles)") evt = knl_info.srcntgt_permuter( - user_srcntgt_ids, - *(tuple(srcntgts) + tuple(sources)), - wait_for=wait_for) + user_srcntgt_ids, + *(tuple(srcntgts) + tuple(sources)), + wait_for=wait_for) wait_for = [evt] assert srcntgt_radii is None else: - sources = make_obj_array( - [empty(nsources, coord_dtype) for i in range(dimensions)]) + sources = make_obj_array([ + empty(nsources, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (sources)") evt = knl_info.srcntgt_permuter( - user_source_ids, - *(tuple(srcntgts) + tuple(sources)), - queue=queue, - range=slice(nsources), - wait_for=wait_for) + user_source_ids, + *(tuple(srcntgts) + tuple(sources)), + queue=queue, range=slice(nsources), + wait_for=wait_for) wait_for = [evt] - targets = make_obj_array( - [empty(ntargets, coord_dtype) for i in range(dimensions)]) + targets = make_obj_array([ + empty(ntargets, coord_dtype) for i in range(dimensions)]) fin_debug("srcntgt permuter (targets)") evt = knl_info.srcntgt_permuter( - srcntgt_target_ids, - *(tuple(srcntgts) + tuple(targets)), - queue=queue, - range=slice(ntargets), - wait_for=wait_for) + srcntgt_target_ids, + *(tuple(srcntgts) + tuple(targets)), + queue=queue, range=slice(ntargets), + wait_for=wait_for) wait_for = [evt] if srcntgt_radii is not None: fin_debug("srcntgt permuter (source radii)") source_radii = cl.array.take( - srcntgt_radii, - user_source_ids, - queue=queue, - wait_for=wait_for) + srcntgt_radii, user_source_ids, queue=queue, + wait_for=wait_for) fin_debug("srcntgt permuter (target radii)") target_radii = cl.array.take( - srcntgt_radii, - srcntgt_target_ids, - queue=queue, - wait_for=wait_for) + srcntgt_radii, srcntgt_target_ids, queue=queue, + wait_for=wait_for) wait_for = source_radii.events + target_radii.events @@ -1539,7 +1452,7 @@ def make_arange(start): nlevels = len(level_start_box_nrs) - 1 assert nlevels == len(level_used_box_counts) - assert level + 1 == nlevels, (level + 1, nlevels) + assert level + 1 == nlevels, (level+1, nlevels) if debug: max_level = np.max(box_levels.get()) assert max_level + 1 == nlevels @@ -1549,10 +1462,9 @@ def make_arange(start): # A number of arrays below are nominally 2-dimensional and stored with # the box index as the fastest-moving index. To make sure that accesses # remain aligned, we round up the number of boxes used for indexing. - aligned_nboxes = div_ceil(nboxes_post_prune, 32) * 32 + aligned_nboxes = div_ceil(nboxes_post_prune, 32)*32 - box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), - box_id_dtype) + box_child_ids_new, evt = zeros((2**dimensions, aligned_nboxes), box_id_dtype) wait_for.append(evt) box_centers_new = empty((dimensions, aligned_nboxes), coord_dtype) @@ -1562,8 +1474,7 @@ def make_arange(start): wait_for.extend(box_child_ids_new.events) for dim, center_row in enumerate(box_centers): - box_centers_new[dim, : - nboxes_post_prune] = center_row[:nboxes_post_prune] + box_centers_new[dim, :nboxes_post_prune] = center_row[:nboxes_post_prune] wait_for.extend(box_centers_new.events) cl.wait_for_events(wait_for) @@ -1607,38 +1518,33 @@ def make_arange(start): # }}} - box_source_counts_nonchild, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_source_counts_nonchild, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) if sources_are_targets: box_target_counts_nonchild = box_source_counts_nonchild else: - box_target_counts_nonchild, evt = zeros(nboxes_post_prune, - particle_id_dtype) + box_target_counts_nonchild, evt = zeros( + nboxes_post_prune, particle_id_dtype) wait_for.append(evt) fin_debug("compute box info") evt = knl_info.box_info_kernel( - *( - # input: - box_parent_ids, - box_srcntgt_counts_cumul, - box_source_counts_cumul, - box_target_counts_cumul, - box_has_children, - box_levels, - nlevels, - - # output if srcntgts_have_extent, input+output otherwise - box_source_counts_nonchild, - box_target_counts_nonchild, + *( + # input: + box_parent_ids, box_srcntgt_counts_cumul, + box_source_counts_cumul, box_target_counts_cumul, + box_has_children, box_levels, nlevels, - # output: - box_flags, - ), - range=slice(nboxes_post_prune), - wait_for=wait_for) + # output if srcntgts_have_extent, input+output otherwise + box_source_counts_nonchild, box_target_counts_nonchild, + + # output: + box_flags, + ), + range=slice(nboxes_post_prune), + wait_for=wait_for) # }}} @@ -1656,42 +1562,52 @@ def make_arange(start): logger.info("tree build complete") return Tree( - # If you change this, also change the documentation - # of what's in the tree, above. - sources_are_targets=sources_are_targets, - sources_have_extent=sources_have_extent, - targets_have_extent=targets_have_extent, - particle_id_dtype=knl_info.particle_id_dtype, - box_id_dtype=knl_info.box_id_dtype, - coord_dtype=coord_dtype, - box_level_dtype=self.box_level_dtype, - root_extent=root_extent, - stick_out_factor=stick_out_factor, - extent_norm=srcntgts_extent_norm, - bounding_box=(bbox_min, bbox_max), - level_start_box_nrs=level_start_box_nrs, - level_start_box_nrs_dev=level_start_box_nrs_dev, - sources=sources, - targets=targets, - box_source_starts=box_source_starts, - box_source_counts_nonchild=box_source_counts_nonchild, - box_source_counts_cumul=box_source_counts_cumul, - box_target_starts=box_target_starts, - box_target_counts_nonchild=box_target_counts_nonchild, - box_target_counts_cumul=box_target_counts_cumul, - box_parent_ids=box_parent_ids, - box_child_ids=box_child_ids, - box_centers=box_centers, - box_levels=box_levels, - box_flags=box_flags, - user_source_ids=user_source_ids, - sorted_target_ids=sorted_target_ids, - _is_pruned=prune_empty_leaves, - **extra_tree_attrs).with_queue(None), evt + # If you change this, also change the documentation + # of what's in the tree, above. + + sources_are_targets=sources_are_targets, + sources_have_extent=sources_have_extent, + targets_have_extent=targets_have_extent, + + particle_id_dtype=knl_info.particle_id_dtype, + box_id_dtype=knl_info.box_id_dtype, + coord_dtype=coord_dtype, + box_level_dtype=self.box_level_dtype, + + root_extent=root_extent, + stick_out_factor=stick_out_factor, + extent_norm=srcntgts_extent_norm, + + bounding_box=(bbox_min, bbox_max), + level_start_box_nrs=level_start_box_nrs, + level_start_box_nrs_dev=level_start_box_nrs_dev, + + sources=sources, + targets=targets, + + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + box_source_counts_cumul=box_source_counts_cumul, + box_target_starts=box_target_starts, + box_target_counts_nonchild=box_target_counts_nonchild, + box_target_counts_cumul=box_target_counts_cumul, + + box_parent_ids=box_parent_ids, + box_child_ids=box_child_ids, + box_centers=box_centers, + box_levels=box_levels, + box_flags=box_flags, + + user_source_ids=user_source_ids, + sorted_target_ids=sorted_target_ids, + + _is_pruned=prune_empty_leaves, + + **extra_tree_attrs + ).with_queue(None), evt # }}} # }}} - # vim: foldmethod=marker:filetype=pyopencl From d1bce1dcc1206ded0880d861d28387a925a51313 Mon Sep 17 00:00:00 2001 From: xywei Date: Thu, 8 Feb 2018 10:03:49 -0600 Subject: [PATCH 07/25] Re-apply changes, with minimal diff --- boxtree/tree_build.py | 51 +++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 2566dc44..b3438efe 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -78,7 +78,7 @@ def __call__(self, queue, particles, kind="adaptive", targets=None, source_radii=None, target_radii=None, stick_out_factor=None, refine_weights=None, max_leaf_refine_weight=None, wait_for=None, - extent_norm=None, + extent_norm=None, bbox=None, **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance @@ -118,6 +118,10 @@ def __call__(self, queue, particles, kind="adaptive", execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. + :arg bbox: Bounding box of the same type as returned by + *boxtree.bounding_box.make_bounding_box_dtype*. + When given, this bounding box is used for tree + building. Otherwise, the bounding box is determined from particles. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of @@ -331,22 +335,45 @@ def combine_srcntgt_arrays(ary1, ary2=None): # {{{ find and process bounding box - bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) - bbox = bbox.get() + if bbox is None: + bbox, _ = self.bbox_finder(srcntgts, srcntgt_radii, wait_for=wait_for) + bbox = bbox.get() - root_extent = max( + root_extent = max( bbox["max_"+ax] - bbox["min_"+ax] for ax in axis_names) * (1+TreeBuilder.ROOT_EXTENT_STRETCH_FACTOR) - # make bbox square and slightly larger at the top, to ensure scaled - # coordinates are always < 1 - bbox_min = np.empty(dimensions, coord_dtype) - for i, ax in enumerate(axis_names): - bbox_min[i] = bbox["min_"+ax] + # make bbox square and slightly larger at the top, to ensure scaled + # coordinates are always < 1 + bbox_min = np.empty(dimensions, coord_dtype) + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_"+ax] - bbox_max = bbox_min + root_extent - for i, ax in enumerate(axis_names): - bbox["max_"+ax] = bbox_max[i] + bbox_max = bbox_min + root_extent + for i, ax in enumerate(axis_names): + bbox["max_"+ax] = bbox_max[i] + else: + # Validate that bbox is a superset of particle-derived bbox + bbox_auto, _ = self.bbox_finder( + srcntgts, srcntgt_radii, wait_for=wait_for) + bbox_auto = bbox_auto.get() + + bbox_min = np.empty(dimensions, coord_dtype) + bbox_max = np.empty(dimensions, coord_dtype) + + for i, ax in enumerate(axis_names): + bbox_min[i] = bbox["min_" + ax] + bbox_max[i] = bbox["max_" + ax] + assert bbox_min[i] < bbox_max[i] + assert bbox_min[i] <= bbox_auto["min_" + ax] + assert bbox_max[i] >= bbox_auto["max_" + ax] + + # bbox must be a square + bbox_exts = bbox_max - bbox_min + for ext in bbox_exts: + assert abs(ext - bbox_exts[0]) < 1e-15 + + root_extent = bbox_exts[0] # }}} From b777717dce3e52e878558bae3fb560b592f8a39b Mon Sep 17 00:00:00 2001 From: xywei Date: Sun, 17 Jun 2018 21:26:39 +0800 Subject: [PATCH 08/25] Add uniform boxtree --- boxtree/tree_interactive_build.py | 258 +++++++++++++++++++++++++++ boxtree/visualization.py | 94 ++++++++++ examples/interactive_boxtree_demo.py | 26 +++ 3 files changed, 378 insertions(+) create mode 100644 boxtree/tree_interactive_build.py create mode 100644 examples/interactive_boxtree_demo.py diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py new file mode 100644 index 00000000..0f3ab502 --- /dev/null +++ b/boxtree/tree_interactive_build.py @@ -0,0 +1,258 @@ +from __future__ import division, absolute_import + +__copyright__ = "Copyright (C) 2018 Xiaoyu Wei" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" +import numpy as np +from itertools import product + +import pyopencl as cl +import pyopencl.array # noqa +from pytools.obj_array import make_obj_array +from boxtree.tools import DeviceDataRecord + +# {{{ Data structure for a tree of boxes + +class BoxTree(DeviceDataRecord): + r"""A quad/oct-tree tree consisting of a hierarchy of boxes. + The intended use for this data structure is to generate particles + as quadrature points in each leaf box. Those particles can then + be used to build a Tree object and then generate the Traversal for + FMM. + + This class is designed such that it is easy to adaptively modify + the tree structure (refine/coarsen). Unlike flattened Tree class, + it handles itself and does not rely on external tree builders. + + .. ------------------------------------------------------------------------ + .. rubric:: Data types + .. ------------------------------------------------------------------------ + + .. attribute:: box_id_dtype + .. attribute:: coord_dtype + .. attribute:: box_level_dtype + + .. ------------------------------------------------------------------------ + .. rubric:: Counts and sizes + .. ------------------------------------------------------------------------ + + .. attribute:: root_extent + + the root box size, a scalar + + .. attribute:: nlevels + + .. attribute:: nboxes + + Can be larger than the actual number of boxes, since the box ids of + purged boxes during coarsening are not reused. + + .. attribute:: nboxes_level + + ``size_t [nlevels]`` + + Can be larger than the actual number of boxes at each level due to + the same reason for nboxes. + + .. attribute:: n_active_boxes + + .. ------------------------------------------------------------------------ + .. rubric:: Box properties + .. ------------------------------------------------------------------------ + + .. attribute:: box_centers + + ``coord_t [dimensions, nboxes]`` + + (C order, 'structure of arrays') + + .. attribute:: box_levels + + :attr:`box_level_dtype` ``box_level_t [nboxes]`` + + .. attribute:: box_is_active + + :attr:`bool` ``bool [nboxes]`` + + FIXME: pyopencl cannot map 'bool'. Resort to use an int for now. + + .. ------------------------------------------------------------------------ + .. rubric:: Structural properties + .. ------------------------------------------------------------------------ + + .. attribute:: level_boxes + + ``numpy.ndarray [nlevels]`` + ``box_id_t [nlevels][nboxes_level[level]]`` + + A :class:`numpy.ndarray` of box ids at each level. It acts as an + inverse lookup table of box_levels. The outer layer is an object + array to account for variable lengths at each level. + + .. attribute:: box_parent_ids + + ``box_id_t [nboxes]`` + + Box 0 (the root) has 0 as its parent. + + .. attribute:: box_child_ids + + ``box_id_t [2**dimensions, nboxes]`` + + (C order, 'structure of arrays') + + "0" is used as a 'no child' marker, as the root box can never + occur as any box's child. Boxes with no child are called active + boxes. + + .. attribute:: active_boxes + + ``box_id_t [n_active_boxes]`` + """ + + def __init__(self, queue, + root_vertex=np.zeros(2), + root_extent=1, + nlevels=1, + box_id_dtype=np.int32, + box_level_dtype=np.int32, + coord_dtype=np.float64): + """A plain boxtree with uniform levels (a complete tree). + The root box is given its vertex with the smallest coordinates + and its extent. + """ + self.size_t = np.int32 + self.box_id_dtype = box_id_dtype + self.box_level_dtype = box_level_dtype + self.coord_dtype = coord_dtype + + dim = len(root_vertex) + self.root_extent = root_extent + + self.nboxes_level = cl.array.to_device(queue, + np.array( + [1 << (dim * l) for l in range(nlevels)], + dtype=self.size_t)) + nboxes = self.size_t(cl.array.sum(self.nboxes_level).get()) + + self.box_levels = cl.array.zeros(queue, nboxes, box_level_dtype) + level_start = 0 + for l in range(nlevels): + offset = self.size_t(self.nboxes_level[l].get()) + self.box_levels[level_start:level_start + offset] = l + level_start += offset + + self.box_centers = cl.array.zeros(queue, (dim, nboxes), coord_dtype) + ibox = 0 + for l in range(nlevels): + dx = self.root_extent / (1 << l) + for cid in product(range(1 << l), repeat=dim): + for d in range(dim): + self.box_centers[d, ibox] = cid[d] * dx + ( + dx / 2 + root_vertex[d]) + ibox += 1 + + n_active_boxes = self.size_t(self.nboxes_level[nlevels - 1].get()) + self.active_boxes = cl.array.to_device(queue, + np.array( + range(nboxes - n_active_boxes, nboxes), + dtype=self.box_id_dtype)) + + # FIXME: map bool in pyopencl + # pyopencl/compyte/dtypes.py", line 107, in dtype_to_ctype + # raise ValueError("unable to map dtype '%s'" % dtype) + # ValueError: unable to map dtype 'bool' + self.box_is_active = cl.array.zeros(queue, nboxes, np.int32) + self.box_is_active[nboxes - n_active_boxes:] = 1 + + self.level_boxes = make_obj_array([ + cl.array.zeros(queue, 1 << (dim * l), self.box_id_dtype) + for l in range(nlevels)]) + ibox = 0 + for l in range(nlevels): + for b in range(len(self.level_boxes[l])): + self.level_boxes[l][b] = ibox + ibox += 1 + + self.box_parent_ids = cl.array.zeros(queue, nboxes, self.box_id_dtype) + self.box_child_ids = cl.array.zeros(queue, (1 << dim, nboxes), + self.box_id_dtype) + for l in range(nlevels): + if l == 0: + self.box_parent_ids[0] = 0 + else: + multi_index_bases = tuple( + 1 << ((dim - 1 - d) * (l-1)) for d in range(dim)) + for ilb, multi_ind in zip(range(len(self.level_boxes[l])), + product(range(1 << l), repeat=dim)): + ibox = self.box_id_dtype(self.level_boxes[l][ilb].get()) + parent_multi_ind = tuple(ind // 2 for ind in multi_ind) + parent_level_id = np.sum([ind * base for ind, base + in zip(parent_multi_ind, multi_index_bases)]) + self.box_parent_ids[ibox] = self.level_boxes[ + l-1][parent_level_id] + + child_multi_index_bases = tuple( + 1 << (dim - 1 - d) for d in range(dim)) + child_multi_ind = tuple(ind - pind * 2 for ind, pind + in zip(multi_ind, parent_multi_ind)) + child_id = np.sum([ind * base for ind, base + in zip(child_multi_ind, child_multi_index_bases)]) + self.box_child_ids[child_id][self.box_id_dtype( + self.level_boxes[l-1][parent_level_id].get()) + ] = ibox + + @property + def dimensions(self): + return len(self.box_centers) + + @property + def nboxes(self): + return len(self.box_levels) + + @property + def nlevels(self): + return len(self.level_boxes) + + @property + def n_active_boxes(self): + return len(self.active_boxes) + + fields = ["box_centers"] + + def plot(self, **kwargs): + from boxtree.visualization import BoxTreePlotter + plotter = BoxTreePlotter(self) + plotter.draw_tree(**kwargs) + plotter.set_bounding_box() + + def get_box_extent(self, ibox): + if isinstance(ibox, cl.array.Array): + ibox = self.box_id_dtype(ibox.get()) + lev = self.box_level_dtype(self.box_levels[ibox].get()) + box_size = self.root_extent / (1 << lev) + extent_low = np.zeros(self.dimensions, self.coord_dtype) + for d in range(self.dimensions): + extent_low[d] = self.box_centers[d, ibox].get() - 0.5 * box_size + extent_high = extent_low + box_size + return extent_low, extent_high + +# }}} End Data structure for a tree of boxes diff --git a/boxtree/visualization.py b/boxtree/visualization.py index c487a965..f9dbbb01 100644 --- a/boxtree/visualization.py +++ b/boxtree/visualization.py @@ -172,6 +172,100 @@ def get_tikz_for_tree(self): # }}} +# {{{ box tree plotting + +class BoxTreePlotter(TreePlotter): + """Assumes that the tree has data living on the device. + Only plots the 2D trees. + """ + + def __init__(self, tree): + self.tree = tree + + def draw_tree(self, **kwargs): + if self.tree.dimensions != 2: + raise NotImplementedError("can only plot 2D trees for now") + + fill = kwargs.pop("fill", False) + edgecolor = kwargs.pop("edgecolor", "black") + kwargs["fill"] = fill + kwargs["edgecolor"] = edgecolor + + for iabox in range(self.tree.n_active_boxes): + ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox].get()) + self.draw_box(ibox, **kwargs) + + def set_bounding_box(self): + import matplotlib.pyplot as pt + root_center = np.array([self.tree.box_centers[d, 0].get() for d + in range(self.tree.dimensions)]) + root_extent = self.tree.root_extent + pt.xlim(root_center[0] - root_extent / 2, + root_center[0] + root_extent / 2) + pt.ylim(root_center[1] - root_extent / 2, + root_center[1] + root_extent / 2) + + pt.gca().set_aspect("equal") + + def draw_box_numbers(self): + import matplotlib.pyplot as pt + + tree = self.tree + + for iabox in range(tree.n_active_boxes): + ibox = tree.box_id_dtype(tree.active_boxes[iabox].get()) + x, y = tree.box_centers[:, ibox] + lev = int(tree.box_levels[ibox]) + pt.text(x, y, str(ibox), fontsize=20*1.15**(-lev), + ha="center", va="center", + bbox=dict(facecolor='white', alpha=0.5, lw=0)) + + def get_tikz_for_tree(self): + if self.tree.dimensions != 2: + raise NotImplementedError("can only plot 2D trees for now") + + lines = [] + + lines.append(r"\def\nboxes{%d}" % self.tree.nboxes) + lines.append(r"\def\lastboxnr{%d}" % (self.tree.nboxes-1)) + + for iabox in range(self.tree.n_active_boxes): + ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox].get()) + el, eh = self.tree.get_box_extent(ibox) + + c = self.tree.box_centers[:, ibox] + + lines.append( + r"\coordinate (boxl%d) at (%r, %r);" + % (ibox, float(el[0]), float(el[1]))) + lines.append( + r"\coordinate (boxh%d) at (%r, %r);" + % (ibox, float(eh[0]), float(eh[1]))) + lines.append( + r"\coordinate (boxc%d) at (%r, %r);" + % (ibox, float(c[0]), float(c[1]))) + lines.append( + r"\def\boxsize%s{%r}" + % (int_to_roman(ibox), float(eh[0]-el[0]))) + lines.append( + r"\def\boxlevel%s{%r}" + % (int_to_roman(ibox), self.tree.box_levels[ibox])) + + lines.append( + r"\def\boxpath#1{(boxl#1) rectangle (boxh#1)}") + lines.append( + r"\def\drawboxes{" + r"\foreach \ibox in {0,...,\lastboxnr}{" + r"\draw \boxpath{\ibox};" + r"}}") + lines.append( + r"\def\drawboxnrs{" + r"\foreach \ibox in {0,...,\lastboxnr}{" + r"\node [font=\tiny] at (boxc\ibox) {\ibox};" + r"}}") + return "\n".join(lines) + +# }}} # {{{ traversal plotting diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py new file mode 100644 index 00000000..576315a1 --- /dev/null +++ b/examples/interactive_boxtree_demo.py @@ -0,0 +1,26 @@ +import pyopencl as cl +import boxtree.tree_interactive_build + +import matplotlib.pyplot as pt + +ctx = cl.create_some_context() +queue = cl.CommandQueue(ctx) + +tree = boxtree.tree_interactive_build.BoxTree(queue, nlevels=4) + +tree.plot() + +pt.tick_params( + axis='x', # changes apply to the x-axis + which='both', # both major and minor ticks are affected + bottom='off', # ticks along the bottom edge are off + top='off', # ticks along the top edge are off + labelbottom='off') +pt.tick_params( + axis='y', + which='both', + left='off', + top='off', + labelleft='off') + +pt.show() From 6abfae2cf5e5bec699fc55ffd4539fd58bcdc7ae Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 18 Jun 2018 11:38:36 +0800 Subject: [PATCH 09/25] Improve boxtree plotting --- boxtree/tree_interactive_build.py | 48 ++++++++++++++++++++++++---- boxtree/visualization.py | 12 +++---- examples/interactive_boxtree_demo.py | 10 ++++-- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 0f3ab502..d6a6e246 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -127,8 +127,27 @@ class BoxTree(DeviceDataRecord): ``box_id_t [n_active_boxes]`` """ - - def __init__(self, queue, + def get_copy_kwargs(self, **kwargs): + # cl arrays + for f in self.__class__.fields: + if f not in kwargs: + try: + kwargs[f] = getattr(self, f) + except AttributeError: + pass + + # others + kwargs.update({ + "size_t":self.size_t, + "box_id_dtype":self.box_id_dtype, + "box_level_dtype":self.box_level_dtype, + "coord_dtype":self.coord_dtype, + "root_extent":self.root_extent, + }) + + return kwargs + + def generate_uniform_boxtree(self, queue, root_vertex=np.zeros(2), root_extent=1, nlevels=1, @@ -150,7 +169,9 @@ def __init__(self, queue, self.nboxes_level = cl.array.to_device(queue, np.array( [1 << (dim * l) for l in range(nlevels)], - dtype=self.size_t)) + dtype=self.size_t)) + self.register_fields({"nboxes_level":self.nboxes_level}) + nboxes = self.size_t(cl.array.sum(self.nboxes_level).get()) self.box_levels = cl.array.zeros(queue, nboxes, box_level_dtype) @@ -159,6 +180,7 @@ def __init__(self, queue, offset = self.size_t(self.nboxes_level[l].get()) self.box_levels[level_start:level_start + offset] = l level_start += offset + self.register_fields({"box_levels":self.box_levels}) self.box_centers = cl.array.zeros(queue, (dim, nboxes), coord_dtype) ibox = 0 @@ -169,12 +191,14 @@ def __init__(self, queue, self.box_centers[d, ibox] = cid[d] * dx + ( dx / 2 + root_vertex[d]) ibox += 1 + self.register_fields({"box_centers":self.box_centers}) n_active_boxes = self.size_t(self.nboxes_level[nlevels - 1].get()) self.active_boxes = cl.array.to_device(queue, np.array( range(nboxes - n_active_boxes, nboxes), dtype=self.box_id_dtype)) + self.register_fields({"active_boxes":self.active_boxes}) # FIXME: map bool in pyopencl # pyopencl/compyte/dtypes.py", line 107, in dtype_to_ctype @@ -182,6 +206,7 @@ def __init__(self, queue, # ValueError: unable to map dtype 'bool' self.box_is_active = cl.array.zeros(queue, nboxes, np.int32) self.box_is_active[nboxes - n_active_boxes:] = 1 + self.register_fields({"box_is_active":self.box_is_active}) self.level_boxes = make_obj_array([ cl.array.zeros(queue, 1 << (dim * l), self.box_id_dtype) @@ -191,6 +216,7 @@ def __init__(self, queue, for b in range(len(self.level_boxes[l])): self.level_boxes[l][b] = ibox ibox += 1 + self.register_fields({"level_boxes":self.level_boxes}) self.box_parent_ids = cl.array.zeros(queue, nboxes, self.box_id_dtype) self.box_child_ids = cl.array.zeros(queue, (1 << dim, nboxes), @@ -219,6 +245,9 @@ def __init__(self, queue, self.box_child_ids[child_id][self.box_id_dtype( self.level_boxes[l-1][parent_level_id].get()) ] = ibox + self.register_fields({ + "box_parent_ids":self.box_parent_ids, + "box_child_ids":self.box_child_ids}) @property def dimensions(self): @@ -236,8 +265,6 @@ def nlevels(self): def n_active_boxes(self): return len(self.active_boxes) - fields = ["box_centers"] - def plot(self, **kwargs): from boxtree.visualization import BoxTreePlotter plotter = BoxTreePlotter(self) @@ -247,11 +274,18 @@ def plot(self, **kwargs): def get_box_extent(self, ibox): if isinstance(ibox, cl.array.Array): ibox = self.box_id_dtype(ibox.get()) - lev = self.box_level_dtype(self.box_levels[ibox].get()) + lev = self.box_level_dtype(self.box_levels[ibox].get()) + else: + lev = self.box_level_dtype(self.box_levels[ibox]) box_size = self.root_extent / (1 << lev) extent_low = np.zeros(self.dimensions, self.coord_dtype) for d in range(self.dimensions): - extent_low[d] = self.box_centers[d, ibox].get() - 0.5 * box_size + if isinstance(self.box_centers[0], cl.array.Array): + extent_low[d] = self.box_centers[ + d, ibox].get() - 0.5 * box_size + else: + extent_low[d] = self.box_centers[d, ibox] - 0.5 * box_size + extent_high = extent_low + box_size return extent_low, extent_high diff --git a/boxtree/visualization.py b/boxtree/visualization.py index f9dbbb01..a891265b 100644 --- a/boxtree/visualization.py +++ b/boxtree/visualization.py @@ -175,8 +175,8 @@ def get_tikz_for_tree(self): # {{{ box tree plotting class BoxTreePlotter(TreePlotter): - """Assumes that the tree has data living on the device. - Only plots the 2D trees. + """Assumes that the tree has data living on the host. + See :meth:`boxtree.BoxTree.get`. """ def __init__(self, tree): @@ -192,12 +192,12 @@ def draw_tree(self, **kwargs): kwargs["edgecolor"] = edgecolor for iabox in range(self.tree.n_active_boxes): - ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox].get()) + ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox]) self.draw_box(ibox, **kwargs) def set_bounding_box(self): import matplotlib.pyplot as pt - root_center = np.array([self.tree.box_centers[d, 0].get() for d + root_center = np.array([self.tree.box_centers[d, 0] for d in range(self.tree.dimensions)]) root_extent = self.tree.root_extent pt.xlim(root_center[0] - root_extent / 2, @@ -213,7 +213,7 @@ def draw_box_numbers(self): tree = self.tree for iabox in range(tree.n_active_boxes): - ibox = tree.box_id_dtype(tree.active_boxes[iabox].get()) + ibox = tree.box_id_dtype(tree.active_boxes[iabox]) x, y = tree.box_centers[:, ibox] lev = int(tree.box_levels[ibox]) pt.text(x, y, str(ibox), fontsize=20*1.15**(-lev), @@ -230,7 +230,7 @@ def get_tikz_for_tree(self): lines.append(r"\def\lastboxnr{%d}" % (self.tree.nboxes-1)) for iabox in range(self.tree.n_active_boxes): - ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox].get()) + ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox]) el, eh = self.tree.get_box_extent(ibox) c = self.tree.box_centers[:, ibox] diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py index 576315a1..69be5830 100644 --- a/examples/interactive_boxtree_demo.py +++ b/examples/interactive_boxtree_demo.py @@ -6,9 +6,15 @@ ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) -tree = boxtree.tree_interactive_build.BoxTree(queue, nlevels=4) +tree = boxtree.tree_interactive_build.BoxTree() +tree.generate_uniform_boxtree(queue, nlevels=4) -tree.plot() +# call get() before plotting +from boxtree.visualization import BoxTreePlotter +plt = BoxTreePlotter(tree.get(queue)) +plt.draw_tree() +plt.set_bounding_box() +plt.draw_box_numbers() pt.tick_params( axis='x', # changes apply to the x-axis From 4e7aa05cb008c644db323b973333216c00313bc7 Mon Sep 17 00:00:00 2001 From: xywei Date: Tue, 19 Jun 2018 09:59:46 +0800 Subject: [PATCH 10/25] Implement getters for centers and measures --- boxtree/tree_interactive_build.py | 139 ++++++++++++++++++++++++++- examples/interactive_boxtree_demo.py | 10 +- 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index d6a6e246..6af7339a 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -24,6 +24,7 @@ import numpy as np from itertools import product +import loopy as lp import pyopencl as cl import pyopencl.array # noqa from pytools.obj_array import make_obj_array @@ -204,7 +205,7 @@ def generate_uniform_boxtree(self, queue, # pyopencl/compyte/dtypes.py", line 107, in dtype_to_ctype # raise ValueError("unable to map dtype '%s'" % dtype) # ValueError: unable to map dtype 'bool' - self.box_is_active = cl.array.zeros(queue, nboxes, np.int32) + self.box_is_active = cl.array.zeros(queue, nboxes, np.int8) self.box_is_active[nboxes - n_active_boxes:] = 1 self.register_fields({"box_is_active":self.box_is_active}) @@ -290,3 +291,139 @@ def get_box_extent(self, ibox): return extent_low, extent_high # }}} End Data structure for a tree of boxes + +# {{{ Discretize a BoxTree using quadrature + +class QuadratureOnBoxTree(object): + """Tensor-product quadrature rules applied to each active box. + + This class only holds data on template quadrature rules, while it + responds to queries for: + - quadrature nodes + - quadrature weights + - active box centers + - active box measures (areas/volumes) + + The information is generated on the fly and is responsive to + changes made to the underlying BoxTree object. + + .. attribute:: quadrature_formula + + The 1D template quadrature formula defined on [-1, 1]. + + .. attribute:: boxtree + + The underlying BoxTree object, assumed to reside on the device, + i.e., its get() method was not called. + """ + + def __init__(self, boxtree, quadrature_formula=None): + """If no quadrature_formula is supplied, the trivial one is used + sum[ f(centers) * measures ] + """ + if not isinstance(boxtree, BoxTree): + raise RuntimeError("Invalid boxtree") + + self.boxtree = boxtree + + if quadrature_formula is None: + from modepy import GaussLegendreQuadrature + self.quadrature_formula = GaussLegendreQuadrature(0) + else: + self.quadrature_formula = quadrature_formula + + def get_q_points(self): + """Return quadrature points. + """ + pass + + def get_q_weights(self): + """Return quadrature weights. + """ + pass + + def get_cell_centers(self, queue): + """Return centers of active boxes. + """ + def get_knl(): + knl = lp.make_kernel( + "{[iabox, iaxis]: 0<=iabox box_id = active_boxes[iabox] + result[iaxis, iabox] = box_centers[iaxis, box_id] + """, + [ + lp.GlobalArg("result", self.boxtree.coord_dtype, + shape=lp.auto), + lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, + shape=lp.auto), + lp.GlobalArg("box_centers", self.boxtree.coord_dtype, + shape="(dims, nboxes)"), + lp.ValueArg("n_active_boxes", self.boxtree.size_t), + lp.ValueArg("dims", self.boxtree.size_t), + lp.ValueArg("nboxes", self.boxtree.size_t), + ], + name="get_active_box_centers") + + knl = lp.split_iname(knl, + "iabox", 128, outer_tag="g.0", inner_tag="l.0") + + return knl + + evt, result = get_knl()(queue, + active_boxes=self.boxtree.active_boxes, + box_centers=self.boxtree.box_centers, + n_active_boxes=self.boxtree.n_active_boxes, + dims=self.boxtree.dimensions, + nboxes=self.boxtree.nboxes) + + assert len(result) == 1 + return result[0] + + def get_cell_measures(self, queue): + """Return measures of active boxes. + This method does not call BoxTree.get_box_extent() for performance + reasons. + """ + def get_knl(): + knl = lp.make_kernel( + "{[iabox]: 0<=iabox box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + result[iabox] = root_measure / (2 ** (box_level * dims)) + """, + [ + lp.GlobalArg("result", self.boxtree.coord_dtype, + shape=lp.auto), + lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, + shape=lp.auto), + lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, + shape="nboxes"), + lp.ValueArg("root_measure", self.boxtree.coord_dtype), + lp.ValueArg("n_active_boxes", self.boxtree.size_t), + lp.ValueArg("dims", self.boxtree.size_t), + lp.ValueArg("nboxes", self.boxtree.size_t), + ], + name="get_active_box_measures") + + knl = lp.split_iname(knl, + "iabox", 128, outer_tag="g.0", inner_tag="l.0") + + return knl + + evt, result = get_knl()(queue, + active_boxes=self.boxtree.active_boxes, + box_levels=self.boxtree.box_levels, + root_measure=( + self.boxtree.root_extent ** self.boxtree.dimensions), + n_active_boxes=self.boxtree.n_active_boxes, + dims=self.boxtree.dimensions, + nboxes=self.boxtree.nboxes) + + assert len(result) == 1 + return result[0] + + +# }}} End Discretize a BoxTree using quadrature diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py index 69be5830..134e5701 100644 --- a/examples/interactive_boxtree_demo.py +++ b/examples/interactive_boxtree_demo.py @@ -2,12 +2,20 @@ import boxtree.tree_interactive_build import matplotlib.pyplot as pt +import numpy as np ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) tree = boxtree.tree_interactive_build.BoxTree() -tree.generate_uniform_boxtree(queue, nlevels=4) +tree.generate_uniform_boxtree(queue, nlevels=3) + +quad = boxtree.tree_interactive_build.QuadratureOnBoxTree(tree) +cell_centers = quad.get_cell_centers(queue) + +cell_measures = quad.get_cell_measures(queue) +print(cell_measures) +print(np.sum(cell_measures.get())) # call get() before plotting from boxtree.visualization import BoxTreePlotter From b2bbe01282d4ca7520a7efa9e66bc97c1a5e51a9 Mon Sep 17 00:00:00 2001 From: xywei Date: Tue, 19 Jun 2018 17:00:56 +0800 Subject: [PATCH 11/25] Adding quad info --- boxtree/tree_interactive_build.py | 104 +++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 6af7339a..8775124f 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -335,12 +335,114 @@ def __init__(self, boxtree, quadrature_formula=None): def get_q_points(self): """Return quadrature points. """ + q_nodes = cl.array.to_device(queue, self.quadrature_formula.nodes) + + def get_knl(dim): + quad_vars = ["iq%d" % i for i in range(dim)] + knl = lp.make_kernel( + "{[iabox, iaxis, QUAD_VARS]: 0<=iabox box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + <> box_relative_extent = root_extent / 2 ** (box_level+1) + result[iaxis, QUAD_VARS, iabox] = box_centers[iaxis] + ( + if(iaxis==0, q_nodes[iq0], + if(iaxis==1, q_nodes[iq1], q_nodes[iq2]) + ) * box_relative_extent) + """ + .replace("QUAD_VARS", ", ".join(quad_vars)), + [ + lp.GlobalArg("result", self.boxtree.coord_dtype, + shape=lp.auto), + lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, + shape=lp.auto), + lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, + shape="nboxes"), + lp.GlobalArg("q_nodes", self.boxtree.coord_dtype, + shape=lp.auto), + lp.ValueArg("n_active_boxes", self.boxtree.size_t), + lp.ValueArg("n_q_nodes", self.boxtree.size_t), + lp.ValueArg("nboxes", self.boxtree.size_t), + ], + name="get_active_box_quad_points") + + knl = lp.split_iname(knl, + "iabox", 128, outer_tag="g.0", inner_tag="l.0") + + return knl + + evt, result = get_knl(self.boxtree.dimensions)(queue, + active_boxes=self.boxtree.active_boxes, + box_levels=self.boxtree.box_levels, + q_weights=q_weights, + n_active_boxes=self.boxtree.n_active_boxes, + n_q_nodes=len(q_weights), + nboxes=self.boxtree.nboxes) + + assert len(result) == 1 + # TODO: If the ordering is not the same as dealii, reverse the order of quad vars + return result[0].ravel() + pass def get_q_weights(self): """Return quadrature weights. """ - pass + q_weights = cl.array.to_device(queue, self.quadrature_formula.weights) + + def get_knl(dim): + quad_vars = ["iq%d" % i for i in range(dim)] + knl = lp.make_kernel( + "{[iabox, QUAD_VARS]: 0<=iabox box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + <> box_relative_measure = root_measure / ( + 2 ** ((box_level+1) * DIM)) + result[QUAD_VARS, iabox] = QUAD_WEIGHT * box_relative_measure + """.replace("DIM", str(dim)) + .replace("QUAD_VARS", ", ".join(quad_vars)) + .replace("QUAD_WEIGHT", " * ".join( + ["q_weights[" + qvar + "]" for qvar in quad_vars])) + , + [ + lp.GlobalArg("result", self.boxtree.coord_dtype, + shape=lp.auto), + lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, + shape=lp.auto), + lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, + shape="nboxes"), + lp.GlobalArg("q_weights", self.boxtree.coord_dtype, + shape=lp.auto), + lp.ValueArg("n_active_boxes", self.boxtree.size_t), + lp.ValueArg("n_q_nodes", self.boxtree.size_t), + lp.ValueArg("nboxes", self.boxtree.size_t), + ], + name="get_active_box_quad_weights") + + knl = lp.split_iname(knl, + "iabox", 128, outer_tag="g.0", inner_tag="l.0") + + return knl + + evt, result = get_knl(self.boxtree.dimensions)(queue, + active_boxes=self.boxtree.active_boxes, + box_levels=self.boxtree.box_levels, + q_weights=q_weights, + n_active_boxes=self.boxtree.n_active_boxes, + n_q_nodes=len(q_weights), + nboxes=self.boxtree.nboxes) + + assert len(result) == 1 + # TODO: If the ordering is not the same as dealii, reverse the order of quad vars + return result[0].ravel() def get_cell_centers(self, queue): """Return centers of active boxes. From 93e3f85832c43318c8e6b2708cac07c95c8609be Mon Sep 17 00:00:00 2001 From: xywei Date: Sun, 24 Jun 2018 20:08:30 +0800 Subject: [PATCH 12/25] Sync --- boxtree/tree_interactive_build.py | 49 ++++++++++++++++++++-------- examples/interactive_boxtree_demo.py | 18 +++++++--- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 8775124f..1c45487e 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -332,16 +332,29 @@ def __init__(self, boxtree, quadrature_formula=None): else: self.quadrature_formula = quadrature_formula - def get_q_points(self): + def get_q_points(self, queue): """Return quadrature points. """ q_nodes = cl.array.to_device(queue, self.quadrature_formula.nodes) + def tensor_q_node_code(dim): + if dim == 1: + return "q_nodes[iq0]" + elif dim == 2: + return "if(iaxis==0, q_nodes[iq0], q_nodes[iq1])" + elif dim == 3: + return """if(iaxis==0, q_nodes[iq0], + if(iaxis==1, q_nodes[iq1], q_nodes[iq2]) + ) + """ + def get_knl(dim): quad_vars = ["iq%d" % i for i in range(dim)] knl = lp.make_kernel( - "{[iabox, iaxis, QUAD_VARS]: 0<=iabox box_id = active_boxes[iabox] <> box_level = box_levels[box_id] <> box_relative_extent = root_extent / 2 ** (box_level+1) - result[iaxis, QUAD_VARS, iabox] = box_centers[iaxis] + ( - if(iaxis==0, q_nodes[iq0], - if(iaxis==1, q_nodes[iq1], q_nodes[iq2]) - ) * box_relative_extent) + result[iaxis, iabox, QUAD_VARS] = box_centers[iaxis, box_id] + ( + GET_TENSOR_Q_NODE_IAXIS + ) * box_relative_extent """ + .replace("GET_TENSOR_Q_NODE_IAXIS", tensor_q_node_code(dim)) .replace("QUAD_VARS", ", ".join(quad_vars)), [ lp.GlobalArg("result", self.boxtree.coord_dtype, @@ -364,6 +377,9 @@ def get_knl(dim): shape="nboxes"), lp.GlobalArg("q_nodes", self.boxtree.coord_dtype, shape=lp.auto), + lp.GlobalArg("box_centers", self.boxtree.coord_dtype, + shape="(DIM, nboxes)".replace("DIM", str(dim))), + lp.ValueArg("root_extent", self.boxtree.coord_dtype), lp.ValueArg("n_active_boxes", self.boxtree.size_t), lp.ValueArg("n_q_nodes", self.boxtree.size_t), lp.ValueArg("nboxes", self.boxtree.size_t), @@ -378,18 +394,20 @@ def get_knl(dim): evt, result = get_knl(self.boxtree.dimensions)(queue, active_boxes=self.boxtree.active_boxes, box_levels=self.boxtree.box_levels, - q_weights=q_weights, + q_nodes=q_nodes, + box_centers=self.boxtree.box_centers, + root_extent=self.boxtree.root_extent, n_active_boxes=self.boxtree.n_active_boxes, - n_q_nodes=len(q_weights), + n_q_nodes=len(q_nodes), nboxes=self.boxtree.nboxes) assert len(result) == 1 # TODO: If the ordering is not the same as dealii, reverse the order of quad vars - return result[0].ravel() + return make_obj_array([cpnt.ravel() for cpnt in result[0]]) pass - def get_q_weights(self): + def get_q_weights(self, queue): """Return quadrature weights. """ q_weights = cl.array.to_device(queue, self.quadrature_formula.weights) @@ -397,7 +415,9 @@ def get_q_weights(self): def get_knl(dim): quad_vars = ["iq%d" % i for i in range(dim)] knl = lp.make_kernel( - "{[iabox, QUAD_VARS]: 0<=iabox Date: Sun, 24 Jun 2018 20:20:48 +0800 Subject: [PATCH 13/25] Quad points in the dicionary order for each box --- examples/interactive_boxtree_demo.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py index 6b9b859c..d2fbfd15 100644 --- a/examples/interactive_boxtree_demo.py +++ b/examples/interactive_boxtree_demo.py @@ -8,11 +8,14 @@ queue = cl.CommandQueue(ctx) tree = boxtree.tree_interactive_build.BoxTree() -tree.generate_uniform_boxtree(queue, nlevels=3) +tree.generate_uniform_boxtree(queue, nlevels=3, root_extent=2, + # root_vertex=(0,0,0)) + root_vertex=(0,0)) # the default quad formula uses cell centers and cell measures from modepy import GaussLegendreQuadrature -quadrature_formula = GaussLegendreQuadrature(1) +n_q_points = 5 +quadrature_formula = GaussLegendreQuadrature(n_q_points - 1) print(quadrature_formula.nodes, quadrature_formula.weights) quad = boxtree.tree_interactive_build.QuadratureOnBoxTree(tree, quadrature_formula) @@ -21,8 +24,8 @@ q_points = quad.get_q_points(queue) q_weights = quad.get_q_weights(queue) -print(q_points) -print(q_weights) +# print(q_points) +# print(q_weights) # print(q_points - cell_centers) # print(q_weights - cell_measures) @@ -34,6 +37,11 @@ plt.set_bounding_box() plt.draw_box_numbers() +qx = q_points[0].get(queue) +qy = q_points[1].get(queue) + +pt.plot(qx, qy, '*') + pt.tick_params( axis='x', # changes apply to the x-axis which='both', # both major and minor ticks are affected From 4f6e5455b6343ef3d10365cd29b9345588f466fd Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 25 Jun 2018 10:58:49 +0800 Subject: [PATCH 14/25] Bug fixes --- boxtree/tree_interactive_build.py | 46 ++++++++++++++++++---------- examples/interactive_boxtree_demo.py | 2 +- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 1c45487e..ff0cd7f0 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -25,6 +25,7 @@ from itertools import product import loopy as lp +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 import pyopencl as cl import pyopencl.array # noqa from pytools.obj_array import make_obj_array @@ -359,12 +360,16 @@ def get_knl(dim): ["0<=" + qvar + " box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - <> box_relative_extent = root_extent / 2 ** (box_level+1) - result[iaxis, iabox, QUAD_VARS] = box_centers[iaxis, box_id] + ( - GET_TENSOR_Q_NODE_IAXIS - ) * box_relative_extent + for iabox + <> box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + <> box_relative_extent = root_extent / 2 ** (box_level+1) + for iaxis, QUAD_VARS + result[iaxis, iabox, QUAD_VARS] = box_centers[ + iaxis, box_id] + ( + GET_TENSOR_Q_NODE_IAXIS * box_relative_extent) + end + end """ .replace("GET_TENSOR_Q_NODE_IAXIS", tensor_q_node_code(dim)) .replace("QUAD_VARS", ", ".join(quad_vars)), @@ -422,11 +427,16 @@ def get_knl(dim): ["0<=" + qvar + " box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - <> box_relative_measure = root_measure / ( - 2 ** ((box_level+1) * DIM)) - result[QUAD_VARS, iabox] = QUAD_WEIGHT * box_relative_measure + for iabox + <> box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + <> box_relative_measure = root_measure / ( + 2 ** ((box_level+1) * DIM)) + for QUAD_VARS + result[QUAD_VARS, iabox] = (QUAD_WEIGHT + * box_relative_measure) + end + end """.replace("DIM", str(dim)) .replace("QUAD_VARS", ", ".join(quad_vars)) .replace("QUAD_WEIGHT", " * ".join( @@ -475,8 +485,10 @@ def get_knl(): "{[iabox, iaxis]: 0<=iabox box_id = active_boxes[iabox] - result[iaxis, iabox] = box_centers[iaxis, box_id] + for iabox + <> box_id = active_boxes[iabox] + result[iaxis, iabox] = box_centers[iaxis, box_id] + end """, [ lp.GlobalArg("result", self.boxtree.coord_dtype, @@ -515,9 +527,11 @@ def get_knl(): knl = lp.make_kernel( "{[iabox]: 0<=iabox box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - result[iabox] = root_measure / (2 ** (box_level * dims)) + for iabox + <> box_id = active_boxes[iabox] + <> box_level = box_levels[box_id] + result[iabox] = root_measure / (2 ** (box_level * dims)) + end """, [ lp.GlobalArg("result", self.boxtree.coord_dtype, diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py index d2fbfd15..1a299058 100644 --- a/examples/interactive_boxtree_demo.py +++ b/examples/interactive_boxtree_demo.py @@ -25,7 +25,7 @@ q_weights = quad.get_q_weights(queue) # print(q_points) -# print(q_weights) +# print(q_weights, np.sum(q_weights.get())) # print(q_points - cell_centers) # print(q_weights - cell_measures) From aaaeb687cfc58c8bcf0853967336f51069117bc1 Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 25 Jun 2018 16:03:26 +0800 Subject: [PATCH 15/25] Bug fix --- boxtree/tree_interactive_build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index ff0cd7f0..7eca7573 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -433,7 +433,7 @@ def get_knl(dim): <> box_relative_measure = root_measure / ( 2 ** ((box_level+1) * DIM)) for QUAD_VARS - result[QUAD_VARS, iabox] = (QUAD_WEIGHT + result[iabox, QUAD_VARS] = (QUAD_WEIGHT * box_relative_measure) end end From 1650321ee55c649a4df4df81099cdfda45860d6a Mon Sep 17 00:00:00 2001 From: xywei Date: Wed, 27 Jun 2018 12:25:31 +0800 Subject: [PATCH 16/25] Add neighborhood info to BoxTree --- boxtree/tree_interactive_build.py | 92 +++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 7eca7573..30a10fbd 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -22,6 +22,7 @@ THE SOFTWARE. """ import numpy as np +from cgen import Enum from itertools import product import loopy as lp @@ -31,6 +32,20 @@ from pytools.obj_array import make_obj_array from boxtree.tools import DeviceDataRecord +# {{{ adaptivity_flags + +class adaptivity_flags_enum(Enum): # noqa + """Constants for box adaptivity flags bit field.""" + + c_name = "adaptivity_flags_t" + dtype = np.dtype(np.uint8) + c_value_prefix = "ADAPTIVITY_FLAG_" + + REFINE = 1 << 0 + COARSEN = 1 << 1 + +# }}} + # {{{ Data structure for a tree of boxes class BoxTree(DeviceDataRecord): @@ -128,6 +143,31 @@ class BoxTree(DeviceDataRecord): .. attribute:: active_boxes ``box_id_t [n_active_boxes]`` + + .. ------------------------------------------------------------------------ + .. rubric:: Adaptivity properties + .. ------------------------------------------------------------------------ + + .. attribute:: adaptivity_flags + + :attr:`adaptivity_flags_enum.dtype` ``[n_active_boxes]`` + + A bitwise combination of :class:`adaptivity_flags_enum` constants. + + .. attribute:: n_active_neighbors + + ``size_t [n_active_boxes]`` + + Number of adjacent active boxes for each active box. + + .. attribute:: neighboring_active_boxes + + ``box_id_t [n_active_boxes, n_active_neighbors[n_active_boxes]]`` + + List of adjacent active boxes for each active box. The root box + cannot be in this list of any box, and 0 is used as a placeholder + in the case that the actual number of neighbors is less than + the value held in n_active_neighbors[]. """ def get_copy_kwargs(self, **kwargs): # cl arrays @@ -200,6 +240,7 @@ def generate_uniform_boxtree(self, queue, np.array( range(nboxes - n_active_boxes, nboxes), dtype=self.box_id_dtype)) + assert len(self.active_boxes) == 1 << ((nlevels - 1) * dim) self.register_fields({"active_boxes":self.active_boxes}) # FIXME: map bool in pyopencl @@ -251,6 +292,57 @@ def generate_uniform_boxtree(self, queue, "box_parent_ids":self.box_parent_ids, "box_child_ids":self.box_child_ids}) + self.adaptivity_flags = cl.array.zeros(queue, + self.n_active_boxes, + adaptivity_flags_enum.dtype) + self.register_fields({"adaptivity_flags": self.adaptivity_flags}) + + n_active_neighbors_host = np.zeros(n_active_boxes, dtype=self.size_t) + nbpatch = [np.array(patch) + for patch in product(range(-1, 2), repeat=dim) + if sum(np.abs(patch)) > 0] + assert len(nbpatch) == 3 ** dim - 1 + maxid = (1 << (nlevels - 1)) - 1 + iabox = 0 + for multiabox in product(range(1 << (nlevels - 1)), repeat=dim): + if ( + min(multiabox) > 0 and + max(multiabox) < maxid + ): + n_active_neighbors_host[iabox] = 3 ** dim - 1 + else: + n_active_neighbors_host[iabox] = sum(1 for patch in nbpatch + if ( + np.min(np.array(multiabox) + patch) >= 0 and + np.max(np.array(multiabox) + patch) <= maxid + )) + iabox += 1 + self.n_active_neighbors = cl.array.to_device(queue, + n_active_neighbors_host) + self.register_fields({"n_active_neighbors": self.n_active_neighbors}) + + ind_bases = np.array([1 << (nlevels - 1) * (dim - d - 1) + for d in range(dim)]) + anb_pre = [] + iabox = 0 + for multiabox in product(range(1 << (nlevels - 1)), repeat=dim): + nbid = 0 + anb_pre.append(np.zeros( + n_active_neighbors_host[iabox], dtype=self.box_id_dtype)) + for patch in nbpatch: + tmp = np.array(multiabox) + patch + if np.min(tmp) >= 0 and np.max(tmp) <= maxid: + print(patch, tmp) + anb_pre[-1][nbid] = self.box_id_dtype( + nboxes - n_active_boxes + + np.sum(tmp * ind_bases)) + nbid += 1 + iabox += 1 + self.active_neighboring_boxes = make_obj_array([ + cl.array.to_device(queue, anb) for anb in anb_pre]) + self.register_fields({ + "active_neighboring_boxes": self.active_neighboring_boxes}) + @property def dimensions(self): return len(self.box_centers) From 9a8bdd8d0add922b38e846ad5489f3ff82344fbb Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 10 Sep 2018 12:09:30 -0500 Subject: [PATCH 17/25] Remove redundant print --- boxtree/tree_interactive_build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 30a10fbd..b2c3d4a4 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -332,7 +332,6 @@ def generate_uniform_boxtree(self, queue, for patch in nbpatch: tmp = np.array(multiabox) + patch if np.min(tmp) >= 0 and np.max(tmp) <= maxid: - print(patch, tmp) anb_pre[-1][nbid] = self.box_id_dtype( nboxes - n_active_boxes + np.sum(tmp * ind_bases)) From ea1c87a41271dad6e95dac8ef712c8b16d95dc88 Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 10 Sep 2018 14:19:10 -0500 Subject: [PATCH 18/25] Bug fix --- boxtree/tree_build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 85ef75df..8bb5bfef 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -377,11 +377,11 @@ def combine_srcntgt_arrays(ary1, ary2=None): bbox_auto = bbox_auto.get() # Convert unstructured numpy array to bbox_type - if issinstance(bbox, np.ndarrayJ): + if isinstance(bbox, np.ndarrayJ): if len(bbox) == dimensions: bbox_bak = bbox.copy() bbox = np.empty(1, bbox_auto.dtype) - for i, ax in enumeriate(axis_names): + for i, ax in enumerate(axis_names): bbox['min_'+ax] = bbox_bak[i][0] bbox['max_'+ax] = bbox_bak[i][1] else: From b02765033465d793c0322b4d89c387523d821cf8 Mon Sep 17 00:00:00 2001 From: xywei Date: Sat, 26 Jan 2019 22:02:31 -0600 Subject: [PATCH 19/25] Syntax fixes --- boxtree/tree_interactive_build.py | 59 ++++++++++++++++--------------- boxtree/visualization.py | 2 ++ 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index b2c3d4a4..1db12a58 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -26,7 +26,7 @@ from itertools import product import loopy as lp -from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa import pyopencl as cl import pyopencl.array # noqa from pytools.obj_array import make_obj_array @@ -48,6 +48,7 @@ class adaptivity_flags_enum(Enum): # noqa # {{{ Data structure for a tree of boxes + class BoxTree(DeviceDataRecord): r"""A quad/oct-tree tree consisting of a hierarchy of boxes. The intended use for this data structure is to generate particles @@ -180,11 +181,11 @@ def get_copy_kwargs(self, **kwargs): # others kwargs.update({ - "size_t":self.size_t, - "box_id_dtype":self.box_id_dtype, - "box_level_dtype":self.box_level_dtype, - "coord_dtype":self.coord_dtype, - "root_extent":self.root_extent, + "size_t": self.size_t, + "box_id_dtype": self.box_id_dtype, + "box_level_dtype": self.box_level_dtype, + "coord_dtype": self.coord_dtype, + "root_extent": self.root_extent, }) return kwargs @@ -212,7 +213,7 @@ def generate_uniform_boxtree(self, queue, np.array( [1 << (dim * l) for l in range(nlevels)], dtype=self.size_t)) - self.register_fields({"nboxes_level":self.nboxes_level}) + self.register_fields({"nboxes_level": self.nboxes_level}) nboxes = self.size_t(cl.array.sum(self.nboxes_level).get()) @@ -220,9 +221,9 @@ def generate_uniform_boxtree(self, queue, level_start = 0 for l in range(nlevels): offset = self.size_t(self.nboxes_level[l].get()) - self.box_levels[level_start:level_start + offset] = l + self.box_levels[level_start: level_start + offset] = l level_start += offset - self.register_fields({"box_levels":self.box_levels}) + self.register_fields({"box_levels": self.box_levels}) self.box_centers = cl.array.zeros(queue, (dim, nboxes), coord_dtype) ibox = 0 @@ -233,7 +234,7 @@ def generate_uniform_boxtree(self, queue, self.box_centers[d, ibox] = cid[d] * dx + ( dx / 2 + root_vertex[d]) ibox += 1 - self.register_fields({"box_centers":self.box_centers}) + self.register_fields({"box_centers": self.box_centers}) n_active_boxes = self.size_t(self.nboxes_level[nlevels - 1].get()) self.active_boxes = cl.array.to_device(queue, @@ -241,7 +242,7 @@ def generate_uniform_boxtree(self, queue, range(nboxes - n_active_boxes, nboxes), dtype=self.box_id_dtype)) assert len(self.active_boxes) == 1 << ((nlevels - 1) * dim) - self.register_fields({"active_boxes":self.active_boxes}) + self.register_fields({"active_boxes": self.active_boxes}) # FIXME: map bool in pyopencl # pyopencl/compyte/dtypes.py", line 107, in dtype_to_ctype @@ -249,7 +250,7 @@ def generate_uniform_boxtree(self, queue, # ValueError: unable to map dtype 'bool' self.box_is_active = cl.array.zeros(queue, nboxes, np.int8) self.box_is_active[nboxes - n_active_boxes:] = 1 - self.register_fields({"box_is_active":self.box_is_active}) + self.register_fields({"box_is_active": self.box_is_active}) self.level_boxes = make_obj_array([ cl.array.zeros(queue, 1 << (dim * l), self.box_id_dtype) @@ -259,13 +260,13 @@ def generate_uniform_boxtree(self, queue, for b in range(len(self.level_boxes[l])): self.level_boxes[l][b] = ibox ibox += 1 - self.register_fields({"level_boxes":self.level_boxes}) + self.register_fields({"level_boxes": self.level_boxes}) self.box_parent_ids = cl.array.zeros(queue, nboxes, self.box_id_dtype) self.box_child_ids = cl.array.zeros(queue, (1 << dim, nboxes), self.box_id_dtype) for l in range(nlevels): - if l == 0: + if l == 0: # noqa: E741 self.box_parent_ids[0] = 0 else: multi_index_bases = tuple( @@ -289,8 +290,8 @@ def generate_uniform_boxtree(self, queue, self.level_boxes[l-1][parent_level_id].get()) ] = ibox self.register_fields({ - "box_parent_ids":self.box_parent_ids, - "box_child_ids":self.box_child_ids}) + "box_parent_ids": self.box_parent_ids, + "box_child_ids": self.box_child_ids}) self.adaptivity_flags = cl.array.zeros(queue, self.n_active_boxes, @@ -306,15 +307,15 @@ def generate_uniform_boxtree(self, queue, iabox = 0 for multiabox in product(range(1 << (nlevels - 1)), repeat=dim): if ( - min(multiabox) > 0 and - max(multiabox) < maxid + min(multiabox) > 0 + and max(multiabox) < maxid ): n_active_neighbors_host[iabox] = 3 ** dim - 1 else: n_active_neighbors_host[iabox] = sum(1 for patch in nbpatch if ( - np.min(np.array(multiabox) + patch) >= 0 and - np.max(np.array(multiabox) + patch) <= maxid + np.min(np.array(multiabox) + patch) >= 0 + and np.max(np.array(multiabox) + patch) <= maxid )) iabox += 1 self.n_active_neighbors = cl.array.to_device(queue, @@ -333,8 +334,8 @@ def generate_uniform_boxtree(self, queue, tmp = np.array(multiabox) + patch if np.min(tmp) >= 0 and np.max(tmp) <= maxid: anb_pre[-1][nbid] = self.box_id_dtype( - nboxes - n_active_boxes + - np.sum(tmp * ind_bases)) + nboxes - n_active_boxes + + np.sum(tmp * ind_bases)) nbid += 1 iabox += 1 self.active_neighboring_boxes = make_obj_array([ @@ -386,6 +387,7 @@ def get_box_extent(self, ibox): # {{{ Discretize a BoxTree using quadrature + class QuadratureOnBoxTree(object): """Tensor-product quadrature rules applied to each active box. @@ -498,11 +500,10 @@ def get_knl(dim): nboxes=self.boxtree.nboxes) assert len(result) == 1 - # TODO: If the ordering is not the same as dealii, reverse the order of quad vars + # TODO: If the ordering is not the same as dealii, + # reverse the order of quad vars return make_obj_array([cpnt.ravel() for cpnt in result[0]]) - pass - def get_q_weights(self, queue): """Return quadrature weights. """ @@ -531,8 +532,7 @@ def get_knl(dim): """.replace("DIM", str(dim)) .replace("QUAD_VARS", ", ".join(quad_vars)) .replace("QUAD_WEIGHT", " * ".join( - ["q_weights[" + qvar + "]" for qvar in quad_vars])) - , + ["q_weights[" + qvar + "]" for qvar in quad_vars])), [ lp.GlobalArg("result", self.boxtree.coord_dtype, shape=lp.auto), @@ -565,14 +565,15 @@ def get_knl(dim): nboxes=self.boxtree.nboxes) assert len(result) == 1 - # TODO: If the ordering is not the same as dealii, reverse the order of quad vars + # TODO: If the ordering is not the same as dealii, + # reverse the order of quad vars return result[0].ravel() def get_cell_centers(self, queue): """Return centers of active boxes. """ def get_knl(): - knl = lp.make_kernel( + knl = lp.make_kernel( # noqa: E131 "{[iabox, iaxis]: 0<=iabox Date: Fri, 6 Sep 2019 11:15:26 -0500 Subject: [PATCH 20/25] Add dependency on modepy --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f7132cea..af8ac414 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ def main(): "pyopencl>=2018.2.2", "Mako>=0.7.3", "pytest>=2.3", + "modepy>=2016.1.2", "cgen>=2013.1.2", "six", ]) From a331a13cf778cea62f9184a873e8d07de9d0f805 Mon Sep 17 00:00:00 2001 From: xywei Date: Fri, 6 Sep 2019 12:05:42 -0500 Subject: [PATCH 21/25] Fix quad naming --- boxtree/tree_interactive_build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py index 1db12a58..63e0bc6c 100644 --- a/boxtree/tree_interactive_build.py +++ b/boxtree/tree_interactive_build.py @@ -421,8 +421,8 @@ def __init__(self, boxtree, quadrature_formula=None): self.boxtree = boxtree if quadrature_formula is None: - from modepy import GaussLegendreQuadrature - self.quadrature_formula = GaussLegendreQuadrature(0) + from modepy import LegendreGaussQuadrature + self.quadrature_formula = LegendreGaussQuadrature(0) else: self.quadrature_formula = quadrature_formula From 2b5148c993c4113992729284907459b84ed0ae5a Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Jan 2021 10:35:39 -0600 Subject: [PATCH 22/25] Very drafty work towards new tree build --- boxtree/tree_build.py | 95 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 10c89793..dd35b214 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -41,6 +41,42 @@ class MaxLevelsExceeded(RuntimeError): pass +# {{{ box-based tree + +class _TreeOfBoxes: + def __init__( + self, box_centers, root_box_extent, + box_parents, box_children, box_levels): + self.box_centers = box_centers + self.root_box_extent = root_box_extent + self.box_parents = box_parents + self.box_children = box_children + self.box_levels = box_levels + + @property + def dim(self): + return self.box_centers.shape[0] + + @property + def nboxes(self): + return self.box_centers.shape[1] + + def is_leaf(self): + return np.all(self.box_children == 0, axis=0) + + +def _make_tob_root(dim, bounding_box): + return _TreeOfBoxes( + box_centers=np.array(dim*[0.5]).reshape(dim, 1), + root_box_extent=1, + box_parents=np.array([0], np.intp), + box_children=np.array([0] * 2**dim, np.intp).reshape(2**dim, 1), + box_levels=np.array([0]), + ) + +# }}} + + class TreeBuilder(object): def __init__(self, context): """ @@ -76,6 +112,53 @@ def get_kernel_info(self, dimensions, coord_dtype, self.morton_nr_dtype, self.box_level_dtype, kind=kind) + def split_boxes(self, tob, refine_flags): + nchildren = 2**tob.dim + refine_parents, = np.where(refine_flags) + n_new_boxes = len(refine_parents) * nchildren + nboxes_new = tob.nboxes + n_new_boxes + + if refine_flags[~tob.is_leaf()].any(): + raise ValueError("attempting to split non-leaf") + + child_box_starts = ( + tob.nboxes + + nchildren * np.arange(len(refine_parents))) + + refine_parents_per_child = np.empty( + (nchildren, len(refine_parents)), + np.intp) + refine_parents_per_child[:] = refine_parents.reshape(-1) + refine_parents_per_child = refine_parents_per_child.reshape(-1) + + box_parents = resized_array(tob.box_parents, nboxes_new) + box_centers = resized_array(tob.box_centers, nboxes_new) + box_children = resized_array(tob.box_children, nboxes_new) + box_levels = resized_array(tob.box_levels, nboxes_new) + + box_parents[tob.nboxes:] = refine_parents_per_child + box_levels[tob.nboxes:] = tob.box_levels[box_parents[tob.nboxes:]] + 1 + box_children[:, refine_parents] = ( + child_box_starts + + np.arange(nchildren).reshape(-1, 1)) + + for i in range(2**tob.dim): + children_i = box_children[i, refine_parents] + offsets = ( + tob.root_box_extent + * vec_of_signs(tob.dim, i).reshape(-1, 1) + * (1/2**(1+box_levels[children_i]))) + box_centers[:, children_i] = ( + box_centers[:, refine_parents] + + offsets) + + return _TreeOfBoxes( + box_centers=box_centers, + root_box_extent=tob.root_box_extent, + box_parents=box_parents, + box_children=box_children, + box_levels=box_levels) + # {{{ run control def __call__(self, queue, particles, kind="adaptive", @@ -85,6 +168,18 @@ def __call__(self, queue, particles, kind="adaptive", max_leaf_refine_weight=None, wait_for=None, extent_norm=None, bbox=None, **kwargs): + tob = _make_tob_root(dim, bounding_box) + while True: + refine_flags = ... + tob = self._split + + def old__call__(self, queue, particles, kind="adaptive", + max_particles_in_box=None, allocator=None, debug=False, + targets=None, source_radii=None, target_radii=None, + stick_out_factor=None, refine_weights=None, + max_leaf_refine_weight=None, wait_for=None, + extent_norm=None, bbox=None, + **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. From d7874dfc7523f671a05ae87a11edb33beff29d43 Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 1 Mar 2021 19:43:05 -0600 Subject: [PATCH 23/25] Level-restriction, refine/coarsening, and some IOs --- boxtree/tree_build.py | 433 ++++++++++++++++++++++++++++++++++++------ examples/demo.py | 11 +- 2 files changed, 379 insertions(+), 65 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index dd35b214..d93dfd0a 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -43,6 +43,30 @@ class MaxLevelsExceeded(RuntimeError): # {{{ box-based tree +def resized_array(arr, new_size): + """Return a resized copy of the array. The new_size is a scalar which is + applied to the last dimension. + """ + old_size = arr.shape[-1] + prefix = (slice(None), ) * (arr.ndim - 1) + if old_size >= new_size: + return arr[prefix + (slice(new_size), )].copy() + else: + new_shape = list(arr.shape) + new_shape[-1] = new_size + new_arr = np.zeros(new_shape, arr.dtype) + new_arr[prefix + (slice(old_size), )] = arr + return new_arr + + +def vec_of_signs(dim, i): + """The sign vector is obtained by converting i to a dim-bit binary. + """ + # e.g. bin(10) = '0b1010' + binary_digits = [int(bd) for bd in bin(i)[2:]] + return np.array(binary_digits) * 2 - 1 + + class _TreeOfBoxes: def __init__( self, box_centers, root_box_extent, @@ -53,22 +77,364 @@ def __init__( self.box_children = box_children self.box_levels = box_levels + def copy(self): + return _TreeOfBoxes( + box_centers=self.box_centers.copy(), + root_box_extent=self.root_box_extent.copy(), + box_parents=self.box_parents.copy(), + box_children=self.box_children.copy(), + box_levels=self.box_levels.copy()) + @property def dim(self): return self.box_centers.shape[0] + # {{{ dummy interface for TreePlotter + + @property + def dimensions(self): + return self.dim + + @property + def bounding_box(self): + lows = self.box_centers[:, 0] - 0.5*self.root_box_extent + highs = lows + self.root_box_extent + return [lows, highs] + + def get_box_extent(self, ibox): + lev = int(self.box_levels[ibox]) + box_size = self.root_box_extent / (1 << lev) + extent_low = self.box_centers[:, ibox] - 0.5*box_size + extent_high = extent_low + box_size + return extent_low, extent_high + + # }}} End dummy interface for TreePlotter + @property def nboxes(self): return self.box_centers.shape[1] + def nlevels(self): + return max(self.box_levels) + 1 # level starts from 0 + def is_leaf(self): + # box_id -> whether the box is leaf return np.all(self.box_children == 0, axis=0) + def leaf_boxes(self): + boxes = np.arange(self.nboxes) + return boxes[self.is_leaf()] + + def is_active(self): + # box_id -> whether the box is active + # inactive boxes may arise from coarsening by resetting leaf pointers + # this function assumes that the parents of inactive boxes are set to -1 + return self.box_parents >= 0 + + def is_active_by_definition(self): + # box_id -> whether the box is active (reachable from root) + # checks the actual reachability + res = np.zeros(self.nboxes, bool) + res[0] = True + nlevels = self.nlevels() + for lv in range(nlevels): + front, = np.where(np.logical_and(self.box_levels == lv, res)) + next_reach = self.box_children[:, front].reshape(-1) + res[next_reach] = True + return res + + def active_leaf_boxes(self): + boxes = np.arange(self.nboxes) + return boxes[np.logical_and(self.is_leaf(), self.is_active())] + + def get_leaf_box_flags(self, queue): + """Make a flag vector that indicates that every leaf box has source. + """ + from boxtree.tree import box_flags_enum + box_flags = np.zeros(self.nboxes, box_flags_enum.dtype) + box_flags[~self.is_leaf()] = 0b0011 + box_flags = cl.array.to_device(queue, box_flags) + return box_flags + + # {{{ from tree with particles + + @classmethod + def from_tree_with_particles(cls, queue, tree): + """Note: this function is support-preserving, i.e., if the tree is + built without setting skip_prune, the resulting tree of boxes does not + cover the root box either. + """ + tob_root = _make_tob_root(tree.dimensions, tree.bounding_box) + + box_centers = tree.box_centers.get(queue) + box_parent_ids = tree.box_parent_ids.get(queue) + box_child_ids = tree.box_child_ids.get(queue) + box_levels = tree.box_levels.get(queue) + + tob_nboxes = min( # some arrays are padded + box_centers.shape[-1], box_parent_ids.shape[-1], + box_child_ids.shape[-1], box_levels.shape[-1]) + + tob = _TreeOfBoxes( + box_centers=box_centers[:, :tob_nboxes], + root_box_extent=tob_root.root_box_extent, + box_parents=box_parent_ids[:tob_nboxes], + box_children=box_child_ids[:, :tob_nboxes], + box_levels=box_levels[:tob_nboxes]) + return tob + + # }}} End from tree with particles + + # {{{ instantiate with particles + + def normalized(self): + """Make a normalized copy, such that boxes are sorted by levels. + """ + raise NotImplementedError + + def sort_particles(self, particles): + """Make a :class:`boxtree.Tree` from given particles. + """ + raise NotImplementedError + + def make_quadrature_tree(self, queue, quad): + """Generate :class:`boxtree.Tree` tree with a given quadrature rule + where particles are quadrature nodes in each leaf box. + """ + if not np.all(sorted(self.box_levels) == self.box_levels): + raise ValueError("calling the method requires a normalized tree") + levs = np.append(self.box_levels, self.nlevels) + lev_diffs = levs[1:] - levs[:-1] + level_starts, = np.where(lev_diffs == 1) + level_starts = np.insert(level_starts, 0, 0) + + particle_id_dtype = np.int32 + level_start_box_nrs_dev = np.array(level_starts) + level_start_box_nrs = cl.array.to_device( + queue, self.level_start_box_nrs) + + sources, box_source_starts, box_source_counts_nonchild, box_source_counts_cumul \ + = (None, None, None, None) # FIXME + user_src_ids = cl.array.arange(queue, n_quad_points, dtype=particle_id_dtype) + + return Tree( + sources_are_targets=True, + sources_have_extent=False, + targets_have_extent=False, + + particle_id_dtype=particle_id_dtype, + box_id_dtype=self.box_parents.dtype, + coord_dtype=self.box_centers.dtype, + box_level_dtype=self.box_level_dtype, + + root_extent=self.root_box_extent, + stick_out_factor=0, + extent_norm=None, + + bounding_box=self.bounding_box, + level_start_box_nrs=level_start_box_nrs, + level_start_box_nrs_dev=level_start_box_nrs_dev, + + sources=sources, + targets=sources, + + box_source_starts=box_source_starts, + box_source_counts_nonchild=box_source_counts_nonchild, + box_source_counts_cumul=box_source_counts_cumul, + box_target_starts=box_source_starts, + box_target_counts_nonchild=box_source_counts_nonchild, + box_target_counts_cumul=box_source_counts_cumul, + + box_parent_ids=cl.array.to_device(queue, self.box_parents), + box_child_ids=cl.array.to_device(queue, self.box_children), + box_centers=cl.array.to_device(queue, self.box_centers), + box_levels=cl.array.to_device(queue, self.box_levels), + + box_flags=self.get_leaf_box_flags(queue), + + user_source_ids=user_src_ids, + sorted_target_ids=user_src_ids, + + _is_pruned=True, + ).with_queue(None) + + # }}} End instantiate with particles + + # {{{ refine/coarsen + + def refined(self, refine_flags): + return self.refine_and_coarsened(refine_flags, None) + + def coarsened(self, coarsen_flags): + return self.refine_and_coarsened(None, coarsen_flags) + + def refine_and_coarsened(self, refine_flags=None, coarsen_flags=None): + """Make a refined/coarsened copy. When children of the same parent box + are marked differently, the refinement flag takes priority. + + Both refinement and coarsening flags can only be set of leafs. + To prevent drastic mesh change, coarsening is only executed when a leaf + box is marked for coarsening, and its parent's children are all leaf + boxes. + """ + nchildren = 2**self.dim + + if refine_flags is None: + refine_flags = np.zeros(self.nboxes, bool) + if refine_flags[~self.is_leaf()].any(): + raise ValueError("attempting to split non-leaf") + + if coarsen_flags is None: + coarsen_flags = np.zeros(self.nboxes, bool) + if coarsen_flags[~self.is_leaf()].any(): + raise ValueError("attempting to coarsen non-leaf") + + # ------------------- refinement always respects the flags + + refine_parents, = np.where(refine_flags) + n_new_boxes = len(refine_parents) * nchildren + nboxes_new = self.nboxes + n_new_boxes + + child_box_starts = ( + self.nboxes + + nchildren * np.arange(len(refine_parents))) + + refine_parents_per_child = np.empty( + (nchildren, len(refine_parents)), + np.intp) + refine_parents_per_child[:] = refine_parents.reshape(-1) + refine_parents_per_child = refine_parents_per_child.reshape(-1) + + box_parents = resized_array(self.box_parents, nboxes_new) + box_centers = resized_array(self.box_centers, nboxes_new) + box_children = resized_array(self.box_children, nboxes_new) + box_levels = resized_array(self.box_levels, nboxes_new) + + box_parents[self.nboxes:] = refine_parents_per_child + box_levels[self.nboxes:] = self.box_levels[box_parents[self.nboxes:]] + 1 + box_children[:, refine_parents] = ( + child_box_starts + + np.arange(nchildren).reshape(-1, 1)) + + for i in range(2**self.dim): + children_i = box_children[i, refine_parents] + offsets = ( + self.root_box_extent + * vec_of_signs(self.dim, i).reshape(-1, 1) + * (1/2**(1+box_levels[children_i]))) + box_centers[:, children_i] = ( + box_centers[:, refine_parents] + + offsets) + + # ------------------- coarsen only if all peers are leafs after refinement + + coarsen_flags = resized_array(coarsen_flags, nboxes_new) + coarsen_sources, = np.where(coarsen_flags) + coarsen_parents = self.box_parents[coarsen_sources] + coarsen_peers = self.box_children[:, coarsen_parents] + coarsen_peer_is_leaf = self.is_leaf()[coarsen_peers] + coarsen_exec_flags = np.all(coarsen_peer_is_leaf, axis=0) -def _make_tob_root(dim, bounding_box): + coarsen_parents = coarsen_parents[coarsen_exec_flags] + coarsen_peers = coarsen_peers[:, coarsen_exec_flags] + box_parents[coarsen_peers] = -1 + box_children[coarsen_parents] = 0 + + return _TreeOfBoxes( + box_centers=box_centers, + root_box_extent=self.root_box_extent, + box_parents=box_parents, + box_children=box_children, + box_levels=box_levels) + + # }}} End refine/coarsen + + # {{{ balance + + def balanced(self, context, queue=None): + """Make a level-restricted copy by adding necessary refinements. It + builds list 1 and apply refinement where needed, and iterate until the + level restriction is satisfied. + """ + tree = self.copy() + niter = 0 + while True: + niter += 1 + if niter > self.nlevels() + 1: + raise RuntimeError # this should not happen + nghb_starts, nghb_lists = tree.get_neighbor_lists(context, queue) + nghb_level_lists = tree.box_levels[nghb_lists] + leaf_levels = tree.box_levels[tree.is_leaf()] + max_levels = np.maximum.reduceat(nghb_level_lists, nghb_starts[:-1]) + + to_refine = (max_levels > leaf_levels + 1) + balance_refine_flags = np.zeros(tree.nboxes, bool) + balance_refine_flags[tree.is_leaf()] = to_refine + + if not np.any(balance_refine_flags): + print(niter) + break # converged + tree = tree.refined(balance_refine_flags) + + incre = tree.nboxes - self.nboxes + logger.debug(f"tree build: added {incre} boxes to maintain level restriction") + return tree + + def get_neighbor_lists(self, context, queue=None): + """Get list of neighboring boxes for leafs by calling the list 1 builder. + """ + from boxtree.traversal import FMMTraversalBuilder + trav_builder = FMMTraversalBuilder(context) + if not queue: + queue = cl.CommandQueue(context) + dimensions = self.dim + coord_dtype = self.box_centers.dtype + particle_id_dtype = self.box_children.dtype + box_id_dtype = self.box_children.dtype + box_level_dtype = np.dtype(np.uint8) + + from pytools import div_ceil + max_levels = div_ceil(self.nlevels(), 5) * 5 + target_boxes = self.active_leaf_boxes() + aligned_nboxes = self.nboxes + + box_flags = self.get_leaf_box_flags(queue) + + knl_info = trav_builder.get_kernel_info( + dimensions, particle_id_dtype, box_id_dtype, + coord_dtype, box_level_dtype, max_levels, + sources_are_targets=True, + sources_have_extent=False, targets_have_extent=False, + extent_norm=None) + + result, evt = knl_info.neighbor_source_boxes_builder( + queue, len(target_boxes), + cl.array.to_device(queue, self.box_centers.copy()).data, + self.root_box_extent, + cl.array.to_device(queue, self.box_levels), + aligned_nboxes, + cl.array.to_device(queue, self.box_children.copy()).data, + box_flags, + cl.array.to_device(queue, target_boxes), + wait_for=[]) + cl.wait_for_events([evt]) + neighbor_source_boxes = result["neighbor_source_boxes"] + starts = neighbor_source_boxes.starts.get(queue) + lists = neighbor_source_boxes.lists.get(queue) + return starts, lists + + # }}} End balance + + +def _make_tob_root(dim, bbox): + center = np.array( + [(bbox[0][iaxis] + bbox[1][iaxis]) * 0.5 for iaxis in range(dim)]) + extents = np.array( + [(bbox[1][iaxis] - bbox[0][iaxis]) for iaxis in range(dim)]) + from pytools import single_valued return _TreeOfBoxes( - box_centers=np.array(dim*[0.5]).reshape(dim, 1), - root_box_extent=1, + box_centers=center.reshape(dim, 1), + root_box_extent=single_valued(extents, np.allclose), box_parents=np.array([0], np.intp), box_children=np.array([0] * 2**dim, np.intp).reshape(2**dim, 1), box_levels=np.array([0]), @@ -112,53 +478,6 @@ def get_kernel_info(self, dimensions, coord_dtype, self.morton_nr_dtype, self.box_level_dtype, kind=kind) - def split_boxes(self, tob, refine_flags): - nchildren = 2**tob.dim - refine_parents, = np.where(refine_flags) - n_new_boxes = len(refine_parents) * nchildren - nboxes_new = tob.nboxes + n_new_boxes - - if refine_flags[~tob.is_leaf()].any(): - raise ValueError("attempting to split non-leaf") - - child_box_starts = ( - tob.nboxes - + nchildren * np.arange(len(refine_parents))) - - refine_parents_per_child = np.empty( - (nchildren, len(refine_parents)), - np.intp) - refine_parents_per_child[:] = refine_parents.reshape(-1) - refine_parents_per_child = refine_parents_per_child.reshape(-1) - - box_parents = resized_array(tob.box_parents, nboxes_new) - box_centers = resized_array(tob.box_centers, nboxes_new) - box_children = resized_array(tob.box_children, nboxes_new) - box_levels = resized_array(tob.box_levels, nboxes_new) - - box_parents[tob.nboxes:] = refine_parents_per_child - box_levels[tob.nboxes:] = tob.box_levels[box_parents[tob.nboxes:]] + 1 - box_children[:, refine_parents] = ( - child_box_starts - + np.arange(nchildren).reshape(-1, 1)) - - for i in range(2**tob.dim): - children_i = box_children[i, refine_parents] - offsets = ( - tob.root_box_extent - * vec_of_signs(tob.dim, i).reshape(-1, 1) - * (1/2**(1+box_levels[children_i]))) - box_centers[:, children_i] = ( - box_centers[:, refine_parents] - + offsets) - - return _TreeOfBoxes( - box_centers=box_centers, - root_box_extent=tob.root_box_extent, - box_parents=box_parents, - box_children=box_children, - box_levels=box_levels) - # {{{ run control def __call__(self, queue, particles, kind="adaptive", @@ -168,18 +487,6 @@ def __call__(self, queue, particles, kind="adaptive", max_leaf_refine_weight=None, wait_for=None, extent_norm=None, bbox=None, **kwargs): - tob = _make_tob_root(dim, bounding_box) - while True: - refine_flags = ... - tob = self._split - - def old__call__(self, queue, particles, kind="adaptive", - max_particles_in_box=None, allocator=None, debug=False, - targets=None, source_radii=None, target_radii=None, - stick_out_factor=None, refine_weights=None, - max_leaf_refine_weight=None, wait_for=None, - extent_norm=None, bbox=None, - **kwargs): """ :arg queue: a :class:`pyopencl.CommandQueue` instance :arg particles: an object array of (XYZ) point coordinate arrays. @@ -1713,6 +2020,7 @@ def make_arange(start): # }}} + del box_has_children # {{{ build output @@ -1733,7 +2041,6 @@ def make_arange(start): return Tree( # If you change this, also change the documentation # of what's in the tree, above. - sources_are_targets=sources_are_targets, sources_have_extent=sources_have_extent, targets_have_extent=targets_have_extent, diff --git a/examples/demo.py b/examples/demo.py index a798412a..7e4234b8 100644 --- a/examples/demo.py +++ b/examples/demo.py @@ -4,6 +4,9 @@ import numpy as np from six.moves import range +import logging +logging.basicConfig(level="DEBUG") + ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) @@ -28,6 +31,9 @@ tb = TreeBuilder(ctx) tree, _ = tb(queue, particles, max_particles_in_box=5) +from boxtree.tree_build import _TreeOfBoxes as TBox +tob = TBox.from_tree_with_particles(queue, tree) +tob = tob.balanced(ctx) from boxtree.traversal import FMMTraversalBuilder tg = FMMTraversalBuilder(ctx) trav, _ = tg(queue, tree) @@ -43,9 +49,10 @@ pt.plot(particles[0].get(), particles[1].get(), "+") from boxtree.visualization import TreePlotter -plotter = TreePlotter(tree.get(queue=queue)) +# tt = tree.get(queue=queue) +plotter = TreePlotter(tob) plotter.draw_tree(fill=False, edgecolor="black") -#plotter.draw_box_numbers() +plotter.draw_box_numbers() plotter.set_bounding_box() pt.gca().set_aspect("equal") pt.tight_layout() From ea21bc94106fdd5463e95370d607901bed934cd3 Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 1 Mar 2021 19:49:18 -0600 Subject: [PATCH 24/25] Revert "Merge branch 'master' of gitlab.tiker.net:xywei/boxtree into tree-build-refactor" This reverts commit d81a516883b3965f37cda4d66d24fd6fa1a2d057, reversing changes made to d7874dfc7523f671a05ae87a11edb33beff29d43. --- boxtree/tree_build.py | 6 +- boxtree/tree_interactive_build.py | 660 --------------------------- boxtree/visualization.py | 96 ---- examples/interactive_boxtree_demo.py | 58 --- setup.py | 1 - 5 files changed, 2 insertions(+), 819 deletions(-) delete mode 100644 boxtree/tree_interactive_build.py delete mode 100644 examples/interactive_boxtree_demo.py diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index 77d2caee..d93dfd0a 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -525,8 +525,6 @@ def __call__(self, queue, particles, kind="adaptive", execution. :arg extent_norm: ``"l2"`` or ``"linf"``. Indicates the norm with respect to which particle stick-out is measured. See :attr:`Tree.extent_norm`. - :arg bbox: Bounding box of the same type as returned by - *boxtree.bounding_box.make_bounding_box_dtype*. :arg bbox: Bounding box of either type: 1. A dim-by-2 array, with each row to be [min, max] coordinates in its corresponding axis direction. @@ -536,8 +534,8 @@ def __call__(self, queue, particles, kind="adaptive", building. Otherwise, the bounding box is determined from particles in such a way that it is square and is slightly larger at the top (so that scaled coordinates are always < 1). - When given, this bounding box is used for tree - building. Otherwise, the bounding box is determined from particles. + When supplied, the bounding box must be square and have all the + particles in its closure. :arg kwargs: Used internally for debugging. :returns: a tuple ``(tree, event)``, where *tree* is an instance of diff --git a/boxtree/tree_interactive_build.py b/boxtree/tree_interactive_build.py deleted file mode 100644 index 63e0bc6c..00000000 --- a/boxtree/tree_interactive_build.py +++ /dev/null @@ -1,660 +0,0 @@ -from __future__ import division, absolute_import - -__copyright__ = "Copyright (C) 2018 Xiaoyu Wei" - -__license__ = """ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -""" -import numpy as np -from cgen import Enum -from itertools import product - -import loopy as lp -from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa -import pyopencl as cl -import pyopencl.array # noqa -from pytools.obj_array import make_obj_array -from boxtree.tools import DeviceDataRecord - -# {{{ adaptivity_flags - -class adaptivity_flags_enum(Enum): # noqa - """Constants for box adaptivity flags bit field.""" - - c_name = "adaptivity_flags_t" - dtype = np.dtype(np.uint8) - c_value_prefix = "ADAPTIVITY_FLAG_" - - REFINE = 1 << 0 - COARSEN = 1 << 1 - -# }}} - -# {{{ Data structure for a tree of boxes - - -class BoxTree(DeviceDataRecord): - r"""A quad/oct-tree tree consisting of a hierarchy of boxes. - The intended use for this data structure is to generate particles - as quadrature points in each leaf box. Those particles can then - be used to build a Tree object and then generate the Traversal for - FMM. - - This class is designed such that it is easy to adaptively modify - the tree structure (refine/coarsen). Unlike flattened Tree class, - it handles itself and does not rely on external tree builders. - - .. ------------------------------------------------------------------------ - .. rubric:: Data types - .. ------------------------------------------------------------------------ - - .. attribute:: box_id_dtype - .. attribute:: coord_dtype - .. attribute:: box_level_dtype - - .. ------------------------------------------------------------------------ - .. rubric:: Counts and sizes - .. ------------------------------------------------------------------------ - - .. attribute:: root_extent - - the root box size, a scalar - - .. attribute:: nlevels - - .. attribute:: nboxes - - Can be larger than the actual number of boxes, since the box ids of - purged boxes during coarsening are not reused. - - .. attribute:: nboxes_level - - ``size_t [nlevels]`` - - Can be larger than the actual number of boxes at each level due to - the same reason for nboxes. - - .. attribute:: n_active_boxes - - .. ------------------------------------------------------------------------ - .. rubric:: Box properties - .. ------------------------------------------------------------------------ - - .. attribute:: box_centers - - ``coord_t [dimensions, nboxes]`` - - (C order, 'structure of arrays') - - .. attribute:: box_levels - - :attr:`box_level_dtype` ``box_level_t [nboxes]`` - - .. attribute:: box_is_active - - :attr:`bool` ``bool [nboxes]`` - - FIXME: pyopencl cannot map 'bool'. Resort to use an int for now. - - .. ------------------------------------------------------------------------ - .. rubric:: Structural properties - .. ------------------------------------------------------------------------ - - .. attribute:: level_boxes - - ``numpy.ndarray [nlevels]`` - ``box_id_t [nlevels][nboxes_level[level]]`` - - A :class:`numpy.ndarray` of box ids at each level. It acts as an - inverse lookup table of box_levels. The outer layer is an object - array to account for variable lengths at each level. - - .. attribute:: box_parent_ids - - ``box_id_t [nboxes]`` - - Box 0 (the root) has 0 as its parent. - - .. attribute:: box_child_ids - - ``box_id_t [2**dimensions, nboxes]`` - - (C order, 'structure of arrays') - - "0" is used as a 'no child' marker, as the root box can never - occur as any box's child. Boxes with no child are called active - boxes. - - .. attribute:: active_boxes - - ``box_id_t [n_active_boxes]`` - - .. ------------------------------------------------------------------------ - .. rubric:: Adaptivity properties - .. ------------------------------------------------------------------------ - - .. attribute:: adaptivity_flags - - :attr:`adaptivity_flags_enum.dtype` ``[n_active_boxes]`` - - A bitwise combination of :class:`adaptivity_flags_enum` constants. - - .. attribute:: n_active_neighbors - - ``size_t [n_active_boxes]`` - - Number of adjacent active boxes for each active box. - - .. attribute:: neighboring_active_boxes - - ``box_id_t [n_active_boxes, n_active_neighbors[n_active_boxes]]`` - - List of adjacent active boxes for each active box. The root box - cannot be in this list of any box, and 0 is used as a placeholder - in the case that the actual number of neighbors is less than - the value held in n_active_neighbors[]. - """ - def get_copy_kwargs(self, **kwargs): - # cl arrays - for f in self.__class__.fields: - if f not in kwargs: - try: - kwargs[f] = getattr(self, f) - except AttributeError: - pass - - # others - kwargs.update({ - "size_t": self.size_t, - "box_id_dtype": self.box_id_dtype, - "box_level_dtype": self.box_level_dtype, - "coord_dtype": self.coord_dtype, - "root_extent": self.root_extent, - }) - - return kwargs - - def generate_uniform_boxtree(self, queue, - root_vertex=np.zeros(2), - root_extent=1, - nlevels=1, - box_id_dtype=np.int32, - box_level_dtype=np.int32, - coord_dtype=np.float64): - """A plain boxtree with uniform levels (a complete tree). - The root box is given its vertex with the smallest coordinates - and its extent. - """ - self.size_t = np.int32 - self.box_id_dtype = box_id_dtype - self.box_level_dtype = box_level_dtype - self.coord_dtype = coord_dtype - - dim = len(root_vertex) - self.root_extent = root_extent - - self.nboxes_level = cl.array.to_device(queue, - np.array( - [1 << (dim * l) for l in range(nlevels)], - dtype=self.size_t)) - self.register_fields({"nboxes_level": self.nboxes_level}) - - nboxes = self.size_t(cl.array.sum(self.nboxes_level).get()) - - self.box_levels = cl.array.zeros(queue, nboxes, box_level_dtype) - level_start = 0 - for l in range(nlevels): - offset = self.size_t(self.nboxes_level[l].get()) - self.box_levels[level_start: level_start + offset] = l - level_start += offset - self.register_fields({"box_levels": self.box_levels}) - - self.box_centers = cl.array.zeros(queue, (dim, nboxes), coord_dtype) - ibox = 0 - for l in range(nlevels): - dx = self.root_extent / (1 << l) - for cid in product(range(1 << l), repeat=dim): - for d in range(dim): - self.box_centers[d, ibox] = cid[d] * dx + ( - dx / 2 + root_vertex[d]) - ibox += 1 - self.register_fields({"box_centers": self.box_centers}) - - n_active_boxes = self.size_t(self.nboxes_level[nlevels - 1].get()) - self.active_boxes = cl.array.to_device(queue, - np.array( - range(nboxes - n_active_boxes, nboxes), - dtype=self.box_id_dtype)) - assert len(self.active_boxes) == 1 << ((nlevels - 1) * dim) - self.register_fields({"active_boxes": self.active_boxes}) - - # FIXME: map bool in pyopencl - # pyopencl/compyte/dtypes.py", line 107, in dtype_to_ctype - # raise ValueError("unable to map dtype '%s'" % dtype) - # ValueError: unable to map dtype 'bool' - self.box_is_active = cl.array.zeros(queue, nboxes, np.int8) - self.box_is_active[nboxes - n_active_boxes:] = 1 - self.register_fields({"box_is_active": self.box_is_active}) - - self.level_boxes = make_obj_array([ - cl.array.zeros(queue, 1 << (dim * l), self.box_id_dtype) - for l in range(nlevels)]) - ibox = 0 - for l in range(nlevels): - for b in range(len(self.level_boxes[l])): - self.level_boxes[l][b] = ibox - ibox += 1 - self.register_fields({"level_boxes": self.level_boxes}) - - self.box_parent_ids = cl.array.zeros(queue, nboxes, self.box_id_dtype) - self.box_child_ids = cl.array.zeros(queue, (1 << dim, nboxes), - self.box_id_dtype) - for l in range(nlevels): - if l == 0: # noqa: E741 - self.box_parent_ids[0] = 0 - else: - multi_index_bases = tuple( - 1 << ((dim - 1 - d) * (l-1)) for d in range(dim)) - for ilb, multi_ind in zip(range(len(self.level_boxes[l])), - product(range(1 << l), repeat=dim)): - ibox = self.box_id_dtype(self.level_boxes[l][ilb].get()) - parent_multi_ind = tuple(ind // 2 for ind in multi_ind) - parent_level_id = np.sum([ind * base for ind, base - in zip(parent_multi_ind, multi_index_bases)]) - self.box_parent_ids[ibox] = self.level_boxes[ - l-1][parent_level_id] - - child_multi_index_bases = tuple( - 1 << (dim - 1 - d) for d in range(dim)) - child_multi_ind = tuple(ind - pind * 2 for ind, pind - in zip(multi_ind, parent_multi_ind)) - child_id = np.sum([ind * base for ind, base - in zip(child_multi_ind, child_multi_index_bases)]) - self.box_child_ids[child_id][self.box_id_dtype( - self.level_boxes[l-1][parent_level_id].get()) - ] = ibox - self.register_fields({ - "box_parent_ids": self.box_parent_ids, - "box_child_ids": self.box_child_ids}) - - self.adaptivity_flags = cl.array.zeros(queue, - self.n_active_boxes, - adaptivity_flags_enum.dtype) - self.register_fields({"adaptivity_flags": self.adaptivity_flags}) - - n_active_neighbors_host = np.zeros(n_active_boxes, dtype=self.size_t) - nbpatch = [np.array(patch) - for patch in product(range(-1, 2), repeat=dim) - if sum(np.abs(patch)) > 0] - assert len(nbpatch) == 3 ** dim - 1 - maxid = (1 << (nlevels - 1)) - 1 - iabox = 0 - for multiabox in product(range(1 << (nlevels - 1)), repeat=dim): - if ( - min(multiabox) > 0 - and max(multiabox) < maxid - ): - n_active_neighbors_host[iabox] = 3 ** dim - 1 - else: - n_active_neighbors_host[iabox] = sum(1 for patch in nbpatch - if ( - np.min(np.array(multiabox) + patch) >= 0 - and np.max(np.array(multiabox) + patch) <= maxid - )) - iabox += 1 - self.n_active_neighbors = cl.array.to_device(queue, - n_active_neighbors_host) - self.register_fields({"n_active_neighbors": self.n_active_neighbors}) - - ind_bases = np.array([1 << (nlevels - 1) * (dim - d - 1) - for d in range(dim)]) - anb_pre = [] - iabox = 0 - for multiabox in product(range(1 << (nlevels - 1)), repeat=dim): - nbid = 0 - anb_pre.append(np.zeros( - n_active_neighbors_host[iabox], dtype=self.box_id_dtype)) - for patch in nbpatch: - tmp = np.array(multiabox) + patch - if np.min(tmp) >= 0 and np.max(tmp) <= maxid: - anb_pre[-1][nbid] = self.box_id_dtype( - nboxes - n_active_boxes - + np.sum(tmp * ind_bases)) - nbid += 1 - iabox += 1 - self.active_neighboring_boxes = make_obj_array([ - cl.array.to_device(queue, anb) for anb in anb_pre]) - self.register_fields({ - "active_neighboring_boxes": self.active_neighboring_boxes}) - - @property - def dimensions(self): - return len(self.box_centers) - - @property - def nboxes(self): - return len(self.box_levels) - - @property - def nlevels(self): - return len(self.level_boxes) - - @property - def n_active_boxes(self): - return len(self.active_boxes) - - def plot(self, **kwargs): - from boxtree.visualization import BoxTreePlotter - plotter = BoxTreePlotter(self) - plotter.draw_tree(**kwargs) - plotter.set_bounding_box() - - def get_box_extent(self, ibox): - if isinstance(ibox, cl.array.Array): - ibox = self.box_id_dtype(ibox.get()) - lev = self.box_level_dtype(self.box_levels[ibox].get()) - else: - lev = self.box_level_dtype(self.box_levels[ibox]) - box_size = self.root_extent / (1 << lev) - extent_low = np.zeros(self.dimensions, self.coord_dtype) - for d in range(self.dimensions): - if isinstance(self.box_centers[0], cl.array.Array): - extent_low[d] = self.box_centers[ - d, ibox].get() - 0.5 * box_size - else: - extent_low[d] = self.box_centers[d, ibox] - 0.5 * box_size - - extent_high = extent_low + box_size - return extent_low, extent_high - -# }}} End Data structure for a tree of boxes - -# {{{ Discretize a BoxTree using quadrature - - -class QuadratureOnBoxTree(object): - """Tensor-product quadrature rules applied to each active box. - - This class only holds data on template quadrature rules, while it - responds to queries for: - - quadrature nodes - - quadrature weights - - active box centers - - active box measures (areas/volumes) - - The information is generated on the fly and is responsive to - changes made to the underlying BoxTree object. - - .. attribute:: quadrature_formula - - The 1D template quadrature formula defined on [-1, 1]. - - .. attribute:: boxtree - - The underlying BoxTree object, assumed to reside on the device, - i.e., its get() method was not called. - """ - - def __init__(self, boxtree, quadrature_formula=None): - """If no quadrature_formula is supplied, the trivial one is used - sum[ f(centers) * measures ] - """ - if not isinstance(boxtree, BoxTree): - raise RuntimeError("Invalid boxtree") - - self.boxtree = boxtree - - if quadrature_formula is None: - from modepy import LegendreGaussQuadrature - self.quadrature_formula = LegendreGaussQuadrature(0) - else: - self.quadrature_formula = quadrature_formula - - def get_q_points(self, queue): - """Return quadrature points. - """ - q_nodes = cl.array.to_device(queue, self.quadrature_formula.nodes) - - def tensor_q_node_code(dim): - if dim == 1: - return "q_nodes[iq0]" - elif dim == 2: - return "if(iaxis==0, q_nodes[iq0], q_nodes[iq1])" - elif dim == 3: - return """if(iaxis==0, q_nodes[iq0], - if(iaxis==1, q_nodes[iq1], q_nodes[iq2]) - ) - """ - - def get_knl(dim): - quad_vars = ["iq%d" % i for i in range(dim)] - knl = lp.make_kernel( - "{[iabox, iaxis, QUAD_VARS]".replace( - "QUAD_VARS", ', '.join(quad_vars)) - + ": 0<=iabox box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - <> box_relative_extent = root_extent / 2 ** (box_level+1) - for iaxis, QUAD_VARS - result[iaxis, iabox, QUAD_VARS] = box_centers[ - iaxis, box_id] + ( - GET_TENSOR_Q_NODE_IAXIS * box_relative_extent) - end - end - """ - .replace("GET_TENSOR_Q_NODE_IAXIS", tensor_q_node_code(dim)) - .replace("QUAD_VARS", ", ".join(quad_vars)), - [ - lp.GlobalArg("result", self.boxtree.coord_dtype, - shape=lp.auto), - lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, - shape=lp.auto), - lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, - shape="nboxes"), - lp.GlobalArg("q_nodes", self.boxtree.coord_dtype, - shape=lp.auto), - lp.GlobalArg("box_centers", self.boxtree.coord_dtype, - shape="(DIM, nboxes)".replace("DIM", str(dim))), - lp.ValueArg("root_extent", self.boxtree.coord_dtype), - lp.ValueArg("n_active_boxes", self.boxtree.size_t), - lp.ValueArg("n_q_nodes", self.boxtree.size_t), - lp.ValueArg("nboxes", self.boxtree.size_t), - ], - name="get_active_box_quad_points") - - knl = lp.split_iname(knl, - "iabox", 128, outer_tag="g.0", inner_tag="l.0") - - return knl - - evt, result = get_knl(self.boxtree.dimensions)(queue, - active_boxes=self.boxtree.active_boxes, - box_levels=self.boxtree.box_levels, - q_nodes=q_nodes, - box_centers=self.boxtree.box_centers, - root_extent=self.boxtree.root_extent, - n_active_boxes=self.boxtree.n_active_boxes, - n_q_nodes=len(q_nodes), - nboxes=self.boxtree.nboxes) - - assert len(result) == 1 - # TODO: If the ordering is not the same as dealii, - # reverse the order of quad vars - return make_obj_array([cpnt.ravel() for cpnt in result[0]]) - - def get_q_weights(self, queue): - """Return quadrature weights. - """ - q_weights = cl.array.to_device(queue, self.quadrature_formula.weights) - - def get_knl(dim): - quad_vars = ["iq%d" % i for i in range(dim)] - knl = lp.make_kernel( - "{[iabox, QUAD_VARS]:".replace( - "QUAD_VARS", ', '.join(quad_vars)) - + " 0<=iabox box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - <> box_relative_measure = root_measure / ( - 2 ** ((box_level+1) * DIM)) - for QUAD_VARS - result[iabox, QUAD_VARS] = (QUAD_WEIGHT - * box_relative_measure) - end - end - """.replace("DIM", str(dim)) - .replace("QUAD_VARS", ", ".join(quad_vars)) - .replace("QUAD_WEIGHT", " * ".join( - ["q_weights[" + qvar + "]" for qvar in quad_vars])), - [ - lp.GlobalArg("result", self.boxtree.coord_dtype, - shape=lp.auto), - lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, - shape=lp.auto), - lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, - shape="nboxes"), - lp.GlobalArg("q_weights", self.boxtree.coord_dtype, - shape=lp.auto), - lp.ValueArg("root_measure", self.boxtree.coord_dtype), - lp.ValueArg("n_active_boxes", self.boxtree.size_t), - lp.ValueArg("n_q_nodes", self.boxtree.size_t), - lp.ValueArg("nboxes", self.boxtree.size_t), - ], - name="get_active_box_quad_weights") - - knl = lp.split_iname(knl, - "iabox", 128, outer_tag="g.0", inner_tag="l.0") - - return knl - - evt, result = get_knl(self.boxtree.dimensions)(queue, - active_boxes=self.boxtree.active_boxes, - box_levels=self.boxtree.box_levels, - q_weights=q_weights, - root_measure=(self.boxtree.root_extent - ** self.boxtree.dimensions), - n_active_boxes=self.boxtree.n_active_boxes, - n_q_nodes=len(q_weights), - nboxes=self.boxtree.nboxes) - - assert len(result) == 1 - # TODO: If the ordering is not the same as dealii, - # reverse the order of quad vars - return result[0].ravel() - - def get_cell_centers(self, queue): - """Return centers of active boxes. - """ - def get_knl(): - knl = lp.make_kernel( # noqa: E131 - "{[iabox, iaxis]: 0<=iabox box_id = active_boxes[iabox] - result[iaxis, iabox] = box_centers[iaxis, box_id] - end - """, - [ - lp.GlobalArg("result", self.boxtree.coord_dtype, - shape=lp.auto), - lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, - shape=lp.auto), - lp.GlobalArg("box_centers", self.boxtree.coord_dtype, - shape="(dims, nboxes)"), - lp.ValueArg("n_active_boxes", self.boxtree.size_t), - lp.ValueArg("dims", self.boxtree.size_t), - lp.ValueArg("nboxes", self.boxtree.size_t), - ], - name="get_active_box_centers") - - knl = lp.split_iname(knl, - "iabox", 128, outer_tag="g.0", inner_tag="l.0") - - return knl - - evt, result = get_knl()(queue, - active_boxes=self.boxtree.active_boxes, - box_centers=self.boxtree.box_centers, - n_active_boxes=self.boxtree.n_active_boxes, - dims=self.boxtree.dimensions, - nboxes=self.boxtree.nboxes) - - assert len(result) == 1 - return make_obj_array([cpnt.ravel() for cpnt in result[0]]) - - def get_cell_measures(self, queue): - """Return measures of active boxes. - This method does not call BoxTree.get_box_extent() for performance - reasons. - """ - def get_knl(): - knl = lp.make_kernel( - "{[iabox]: 0<=iabox box_id = active_boxes[iabox] - <> box_level = box_levels[box_id] - result[iabox] = root_measure / (2 ** (box_level * dims)) - end - """, - [ - lp.GlobalArg("result", self.boxtree.coord_dtype, - shape=lp.auto), - lp.GlobalArg("active_boxes", self.boxtree.box_id_dtype, - shape=lp.auto), - lp.GlobalArg("box_levels", self.boxtree.box_level_dtype, - shape="nboxes"), - lp.ValueArg("root_measure", self.boxtree.coord_dtype), - lp.ValueArg("n_active_boxes", self.boxtree.size_t), - lp.ValueArg("dims", self.boxtree.size_t), - lp.ValueArg("nboxes", self.boxtree.size_t), - ], - name="get_active_box_measures") - - knl = lp.split_iname(knl, - "iabox", 128, outer_tag="g.0", inner_tag="l.0") - - return knl - - evt, result = get_knl()(queue, - active_boxes=self.boxtree.active_boxes, - box_levels=self.boxtree.box_levels, - root_measure=( - self.boxtree.root_extent ** self.boxtree.dimensions), - n_active_boxes=self.boxtree.n_active_boxes, - dims=self.boxtree.dimensions, - nboxes=self.boxtree.nboxes) - - assert len(result) == 1 - return result[0] - - -# }}} End Discretize a BoxTree using quadrature diff --git a/boxtree/visualization.py b/boxtree/visualization.py index d9353877..f31c9ad5 100644 --- a/boxtree/visualization.py +++ b/boxtree/visualization.py @@ -172,105 +172,9 @@ def get_tikz_for_tree(self): # }}} -# {{{ box tree plotting - - -class BoxTreePlotter(TreePlotter): - """Assumes that the tree has data living on the host. - See :meth:`boxtree.BoxTree.get`. - """ - - def __init__(self, tree): - self.tree = tree - - def draw_tree(self, **kwargs): - if self.tree.dimensions != 2: - raise NotImplementedError("can only plot 2D trees for now") - - fill = kwargs.pop("fill", False) - edgecolor = kwargs.pop("edgecolor", "black") - kwargs["fill"] = fill - kwargs["edgecolor"] = edgecolor - - for iabox in range(self.tree.n_active_boxes): - ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox]) - self.draw_box(ibox, **kwargs) - - def set_bounding_box(self): - import matplotlib.pyplot as pt - root_center = np.array([self.tree.box_centers[d, 0] for d - in range(self.tree.dimensions)]) - root_extent = self.tree.root_extent - pt.xlim(root_center[0] - root_extent / 2, - root_center[0] + root_extent / 2) - pt.ylim(root_center[1] - root_extent / 2, - root_center[1] + root_extent / 2) - - pt.gca().set_aspect("equal") - - def draw_box_numbers(self): - import matplotlib.pyplot as pt - - tree = self.tree - - for iabox in range(tree.n_active_boxes): - ibox = tree.box_id_dtype(tree.active_boxes[iabox]) - x, y = tree.box_centers[:, ibox] - lev = int(tree.box_levels[ibox]) - pt.text(x, y, str(ibox), fontsize=20*1.15**(-lev), - ha="center", va="center", - bbox=dict(facecolor='white', alpha=0.5, lw=0)) - - def get_tikz_for_tree(self): - if self.tree.dimensions != 2: - raise NotImplementedError("can only plot 2D trees for now") - - lines = [] - - lines.append(r"\def\nboxes{%d}" % self.tree.nboxes) - lines.append(r"\def\lastboxnr{%d}" % (self.tree.nboxes-1)) - - for iabox in range(self.tree.n_active_boxes): - ibox = self.tree.box_id_dtype(self.tree.active_boxes[iabox]) - el, eh = self.tree.get_box_extent(ibox) - - c = self.tree.box_centers[:, ibox] - - lines.append( - r"\coordinate (boxl%d) at (%r, %r);" - % (ibox, float(el[0]), float(el[1]))) - lines.append( - r"\coordinate (boxh%d) at (%r, %r);" - % (ibox, float(eh[0]), float(eh[1]))) - lines.append( - r"\coordinate (boxc%d) at (%r, %r);" - % (ibox, float(c[0]), float(c[1]))) - lines.append( - r"\def\boxsize%s{%r}" - % (int_to_roman(ibox), float(eh[0]-el[0]))) - lines.append( - r"\def\boxlevel%s{%r}" - % (int_to_roman(ibox), self.tree.box_levels[ibox])) - - lines.append( - r"\def\boxpath#1{(boxl#1) rectangle (boxh#1)}") - lines.append( - r"\def\drawboxes{" - r"\foreach \ibox in {0,...,\lastboxnr}{" - r"\draw \boxpath{\ibox};" - r"}}") - lines.append( - r"\def\drawboxnrs{" - r"\foreach \ibox in {0,...,\lastboxnr}{" - r"\node [font=\tiny] at (boxc\ibox) {\ibox};" - r"}}") - return "\n".join(lines) - -# }}} # {{{ traversal plotting - def _draw_box_list(tree_plotter, ibox, starts, lists, key_to_box=None, **kwargs): default_facecolor = "blue" diff --git a/examples/interactive_boxtree_demo.py b/examples/interactive_boxtree_demo.py deleted file mode 100644 index 1a299058..00000000 --- a/examples/interactive_boxtree_demo.py +++ /dev/null @@ -1,58 +0,0 @@ -import pyopencl as cl -import boxtree.tree_interactive_build - -import matplotlib.pyplot as pt -import numpy as np - -ctx = cl.create_some_context() -queue = cl.CommandQueue(ctx) - -tree = boxtree.tree_interactive_build.BoxTree() -tree.generate_uniform_boxtree(queue, nlevels=3, root_extent=2, - # root_vertex=(0,0,0)) - root_vertex=(0,0)) - -# the default quad formula uses cell centers and cell measures -from modepy import GaussLegendreQuadrature -n_q_points = 5 -quadrature_formula = GaussLegendreQuadrature(n_q_points - 1) -print(quadrature_formula.nodes, quadrature_formula.weights) -quad = boxtree.tree_interactive_build.QuadratureOnBoxTree(tree, - quadrature_formula) -cell_centers = quad.get_cell_centers(queue) -cell_measures = quad.get_cell_measures(queue) -q_points = quad.get_q_points(queue) -q_weights = quad.get_q_weights(queue) - -# print(q_points) -# print(q_weights, np.sum(q_weights.get())) - -# print(q_points - cell_centers) -# print(q_weights - cell_measures) - -# call get() before plotting -from boxtree.visualization import BoxTreePlotter -plt = BoxTreePlotter(tree.get(queue)) -plt.draw_tree() -plt.set_bounding_box() -plt.draw_box_numbers() - -qx = q_points[0].get(queue) -qy = q_points[1].get(queue) - -pt.plot(qx, qy, '*') - -pt.tick_params( - axis='x', # changes apply to the x-axis - which='both', # both major and minor ticks are affected - bottom='off', # ticks along the bottom edge are off - top='off', # ticks along the top edge are off - labelbottom='off') -pt.tick_params( - axis='y', - which='both', - left='off', - top='off', - labelleft='off') - -pt.show() diff --git a/setup.py b/setup.py index d1cb8b5a..0882bd24 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,6 @@ def main(): "pyopencl>=2018.2.2", "Mako>=0.7.3", "pytest>=2.3", - "modepy>=2016.1.2", "cgen>=2013.1.2", "six", ]) From 3b2dbb7170f63ca23ee726020782359bf6925b02 Mon Sep 17 00:00:00 2001 From: xywei Date: Mon, 19 Apr 2021 21:23:46 -0500 Subject: [PATCH 25/25] Enable building uniform mesh for volumential --- boxtree/tree_build.py | 48 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/boxtree/tree_build.py b/boxtree/tree_build.py index d93dfd0a..0327b08a 100644 --- a/boxtree/tree_build.py +++ b/boxtree/tree_build.py @@ -64,7 +64,9 @@ def vec_of_signs(dim, i): """ # e.g. bin(10) = '0b1010' binary_digits = [int(bd) for bd in bin(i)[2:]] - return np.array(binary_digits) * 2 - 1 + n = len(binary_digits) + assert n <= dim + return np.array([0]*(dim-n) + binary_digits) * 2 - 1 class _TreeOfBoxes: @@ -101,9 +103,13 @@ def bounding_box(self): highs = lows + self.root_box_extent return [lows, highs] + def get_box_size(self, ibox): + lev = self.box_levels[ibox] + box_size = self.root_box_extent * 0.5**lev + return box_size + def get_box_extent(self, ibox): - lev = int(self.box_levels[ibox]) - box_size = self.root_box_extent / (1 << lev) + box_size = self.get_box_size(ibox) extent_low = self.box_centers[:, ibox] - 0.5*box_size extent_high = extent_low + box_size return extent_low, extent_high @@ -197,10 +203,41 @@ def sort_particles(self, particles): """ raise NotImplementedError + def distribute_quadrature_rule(self, quad_rule): + """Generate quadrature nodes (`dim x nnodes`) and weights. Within each + leaf box, nodes are in lexicographic order, where the last axis varies + fastest. For example, `[[0, 0, 1, 1], [0, 1, 0, 1]]`. + + :param quad_rule: a :class:`modepy.Quadrature` + """ + x, w = quad_rule.nodes, quad_rule.weights # nodes in [-1, 1] + n_box_nodes = len(x)**self.dim + box_nodes = np.array(np.meshgrid(*((x,) * self.dim), indexing='ij') + ).reshape([self.dim, -1]) + if self.dim == 2: + box_weights = w[:, None] @ w[None, :] + elif self.dim == 3: + box_weights = w[:, None, None] @ w[None, :, None] @ w[None, None, :] + box_weights = box_weights.reshape(-1) + lfboxes = self.leaf_boxes() + + nodes = np.tile(box_nodes, (1, len(lfboxes))) + box_shifts = np.repeat( + self.box_centers[:, lfboxes], n_box_nodes, axis=1) + box_scales = np.repeat( + self.get_box_size(lfboxes) / 2, n_box_nodes) + nodes = nodes * box_scales[None, :] + box_shifts + + weights = np.tile(box_weights, len(lfboxes)) + weights = weights * box_scales**self.dim + + return nodes, weights + def make_quadrature_tree(self, queue, quad): """Generate :class:`boxtree.Tree` tree with a given quadrature rule where particles are quadrature nodes in each leaf box. """ + # FIXME if not np.all(sorted(self.box_levels) == self.box_levels): raise ValueError("calling the method requires a normalized tree") levs = np.append(self.box_levels, self.nlevels) @@ -265,6 +302,11 @@ def make_quadrature_tree(self, queue, quad): def refined(self, refine_flags): return self.refine_and_coarsened(refine_flags, None) + def uniformly_refined(self): + refine_flags = np.zeros(self.nboxes, bool) + refine_flags[self.is_leaf()] = 1 + return self.refined(refine_flags) + def coarsened(self, coarsen_flags): return self.refine_and_coarsened(None, coarsen_flags)