From 90e003c2d37e802246663bb5b67a4ec07ba5c3ed Mon Sep 17 00:00:00 2001 From: Alex Dickson Date: Fri, 2 May 2025 10:40:37 -0400 Subject: [PATCH 1/6] added projector class --- src/wepy/resampling/projectors/__init__.py | 0 src/wepy/resampling/projectors/centroid.py | 170 ++++++++++++++++++++ src/wepy/resampling/projectors/projector.py | 170 ++++++++++++++++++++ 3 files changed, 340 insertions(+) create mode 100644 src/wepy/resampling/projectors/__init__.py create mode 100644 src/wepy/resampling/projectors/centroid.py create mode 100644 src/wepy/resampling/projectors/projector.py diff --git a/src/wepy/resampling/projectors/__init__.py b/src/wepy/resampling/projectors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/wepy/resampling/projectors/centroid.py b/src/wepy/resampling/projectors/centroid.py new file mode 100644 index 00000000..822c9b28 --- /dev/null +++ b/src/wepy/resampling/projectors/centroid.py @@ -0,0 +1,170 @@ +"""Modular component for defining distance metrics usable within many +different resamplers. + +This module contains an abstract base class for Distance classes. + +The suggested implementation method is to leave the 'distance' method +as is, and override the 'image' and 'image_distance' methods +instead. Because the default 'distance' method calls these +transparently. The resamplers will use the 'image' and +'image_distance' calls because this allows performance optimizations. + +For example in WExplore the images for some walkers end up being +stored as the definitions of Voronoi regions, and if the whole walker +state was stored it would not only use much more space in memory but +require that common transformations be repeated every time a distance +is to be calculated to that image (which is many times). In REVO all +to all distances between walkers are computed which would also incur a +high cost. + +So use the 'image' to do precomputations on raw walker states and use +the 'image_distance' to compute distances using only those images. + +""" +# Standard Library +import logging + +logger = logging.getLogger(__name__) + +# Third Party Library +import numpy as np +from wepy.util.util import box_vectors_to_lengths_angles + + +class Distance(object): + """Abstract Base class for Distance classes.""" + + def __init__(self): + """Constructor for Distance class.""" + pass + + def image(self, state): + """Compute the 'image' of a walker state which should be some + transformation of the walker state that is more + convenient. E.g. for precomputation of expensive operations or + for saving as resampler state. + + The abstract implementation is naive and just returns the + state itself, thus it is the identity function. + + Parameters + ---------- + state : object implementing WalkerState + The state which will be transformed to an image + + Returns + ------- + image : object implementing WalkerState + The same state that was given as an argument. + + """ + + return state + + def image_distance(self, image_a, image_b): + """Compute the distance between two images of walker states. + + Parameters + ---------- + image_a : object produced by Distance.image + + image_b : object produced by Distance.image + + Returns + ------- + + distance : float + The distance between the two images + + Raises + ------ + + NotImplementedError : always because this is abstract + + """ + raise NotImplementedError + + def distance(self, state_a, state_b): + """Compute the distance between two states. + + Parameters + ---------- + state_a : object implementing WalkerState + + state_b : object implementing WalkerState + + Returns + ------- + + distance : float + The distance between the two walker states + + + """ + + return self.image_distance(self.image(state_a), self.image(state_b)) + + +class XYEuclideanDistance(Distance): + """2 dimensional euclidean distance between points. + + States have the attributes 'x' and 'y'. + + """ + + def image(self, state): + return np.array([state["x"], state["y"]]) + + def image_distance(self, image_a, image_b): + return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2) + +class AtomPairDistance(Distance): + """Constructs a vector of atomic distances for each state. + Distance is the root mean squared distance between the vectors. + """ + + def __init__(self, pair_list, periodic=True): + """Construct a distance metric. + + Parameters + ---------- + + pair_list : arraylike of tuples + The indices of the atom pairs between which to compute + distances. + + """ + self.pair_list = pair_list + self.periodic = periodic + + def _adjust_disp_vector(self, disp, box_lengths): + edited = True + while edited: + edited = False + for i in range(3): + if disp[i] > box_lengths[i]/2: + disp[i] -= box_lengths[i] + edited = True + elif disp[i] < -box_lengths[i]/2: + disp[i] += box_lengths[i] + edited = True + return disp + + def image(self, state): + + if self.periodic: + # get the box lengths from the vectors + box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"]) + + dist_list = np.zeros((len(self.pair_list))) + for i,p in enumerate(self.pair_list): + disp_vector = state["positions"][p[0]] - state["positions"][p[1]] + if self.periodic: + dist_list[i] = np.sqrt(np.sum(np.square(self._adjust_disp_vector(disp_vector,box_lengths)))) + else: + dist_list[i] = np.sqrt(np.sum(np.square(disp_vector))) + + return dist_list + + def image_distance(self, image_a, image_b): + return np.sqrt(np.mean(np.square(image_a - image_b))) diff --git a/src/wepy/resampling/projectors/projector.py b/src/wepy/resampling/projectors/projector.py new file mode 100644 index 00000000..822c9b28 --- /dev/null +++ b/src/wepy/resampling/projectors/projector.py @@ -0,0 +1,170 @@ +"""Modular component for defining distance metrics usable within many +different resamplers. + +This module contains an abstract base class for Distance classes. + +The suggested implementation method is to leave the 'distance' method +as is, and override the 'image' and 'image_distance' methods +instead. Because the default 'distance' method calls these +transparently. The resamplers will use the 'image' and +'image_distance' calls because this allows performance optimizations. + +For example in WExplore the images for some walkers end up being +stored as the definitions of Voronoi regions, and if the whole walker +state was stored it would not only use much more space in memory but +require that common transformations be repeated every time a distance +is to be calculated to that image (which is many times). In REVO all +to all distances between walkers are computed which would also incur a +high cost. + +So use the 'image' to do precomputations on raw walker states and use +the 'image_distance' to compute distances using only those images. + +""" +# Standard Library +import logging + +logger = logging.getLogger(__name__) + +# Third Party Library +import numpy as np +from wepy.util.util import box_vectors_to_lengths_angles + + +class Distance(object): + """Abstract Base class for Distance classes.""" + + def __init__(self): + """Constructor for Distance class.""" + pass + + def image(self, state): + """Compute the 'image' of a walker state which should be some + transformation of the walker state that is more + convenient. E.g. for precomputation of expensive operations or + for saving as resampler state. + + The abstract implementation is naive and just returns the + state itself, thus it is the identity function. + + Parameters + ---------- + state : object implementing WalkerState + The state which will be transformed to an image + + Returns + ------- + image : object implementing WalkerState + The same state that was given as an argument. + + """ + + return state + + def image_distance(self, image_a, image_b): + """Compute the distance between two images of walker states. + + Parameters + ---------- + image_a : object produced by Distance.image + + image_b : object produced by Distance.image + + Returns + ------- + + distance : float + The distance between the two images + + Raises + ------ + + NotImplementedError : always because this is abstract + + """ + raise NotImplementedError + + def distance(self, state_a, state_b): + """Compute the distance between two states. + + Parameters + ---------- + state_a : object implementing WalkerState + + state_b : object implementing WalkerState + + Returns + ------- + + distance : float + The distance between the two walker states + + + """ + + return self.image_distance(self.image(state_a), self.image(state_b)) + + +class XYEuclideanDistance(Distance): + """2 dimensional euclidean distance between points. + + States have the attributes 'x' and 'y'. + + """ + + def image(self, state): + return np.array([state["x"], state["y"]]) + + def image_distance(self, image_a, image_b): + return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2) + +class AtomPairDistance(Distance): + """Constructs a vector of atomic distances for each state. + Distance is the root mean squared distance between the vectors. + """ + + def __init__(self, pair_list, periodic=True): + """Construct a distance metric. + + Parameters + ---------- + + pair_list : arraylike of tuples + The indices of the atom pairs between which to compute + distances. + + """ + self.pair_list = pair_list + self.periodic = periodic + + def _adjust_disp_vector(self, disp, box_lengths): + edited = True + while edited: + edited = False + for i in range(3): + if disp[i] > box_lengths[i]/2: + disp[i] -= box_lengths[i] + edited = True + elif disp[i] < -box_lengths[i]/2: + disp[i] += box_lengths[i] + edited = True + return disp + + def image(self, state): + + if self.periodic: + # get the box lengths from the vectors + box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"]) + + dist_list = np.zeros((len(self.pair_list))) + for i,p in enumerate(self.pair_list): + disp_vector = state["positions"][p[0]] - state["positions"][p[1]] + if self.periodic: + dist_list[i] = np.sqrt(np.sum(np.square(self._adjust_disp_vector(disp_vector,box_lengths)))) + else: + dist_list[i] = np.sqrt(np.sum(np.square(disp_vector))) + + return dist_list + + def image_distance(self, image_a, image_b): + return np.sqrt(np.mean(np.square(image_a - image_b))) From baf2b0754c15255bb3b119a0339a2c13681fd687 Mon Sep 17 00:00:00 2001 From: Alex Dickson Date: Fri, 2 May 2025 17:26:20 -0400 Subject: [PATCH 2/6] projector-based revo resampler --- src/wepy/resampling/projectors/centroid.py | 173 +------ src/wepy/resampling/projectors/projector.py | 157 +----- src/wepy/resampling/projectors/tica.py | 74 +++ src/wepy/resampling/resamplers/revo.py | 536 +++++++++++++++++++- 4 files changed, 649 insertions(+), 291 deletions(-) create mode 100644 src/wepy/resampling/projectors/tica.py diff --git a/src/wepy/resampling/projectors/centroid.py b/src/wepy/resampling/projectors/centroid.py index 822c9b28..ebd0d4db 100644 --- a/src/wepy/resampling/projectors/centroid.py +++ b/src/wepy/resampling/projectors/centroid.py @@ -1,25 +1,4 @@ -"""Modular component for defining distance metrics usable within many -different resamplers. - -This module contains an abstract base class for Distance classes. - -The suggested implementation method is to leave the 'distance' method -as is, and override the 'image' and 'image_distance' methods -instead. Because the default 'distance' method calls these -transparently. The resamplers will use the 'image' and -'image_distance' calls because this allows performance optimizations. - -For example in WExplore the images for some walkers end up being -stored as the definitions of Voronoi regions, and if the whole walker -state was stored it would not only use much more space in memory but -require that common transformations be repeated every time a distance -is to be calculated to that image (which is many times). In REVO all -to all distances between walkers are computed which would also incur a -high cost. - -So use the 'image' to do precomputations on raw walker states and use -the 'image_distance' to compute distances using only those images. - +"""Projector for determining centroid distances. """ # Standard Library import logging @@ -28,143 +7,47 @@ # Third Party Library import numpy as np -from wepy.util.util import box_vectors_to_lengths_angles - - -class Distance(object): - """Abstract Base class for Distance classes.""" - - def __init__(self): - """Constructor for Distance class.""" - pass - - def image(self, state): - """Compute the 'image' of a walker state which should be some - transformation of the walker state that is more - convenient. E.g. for precomputation of expensive operations or - for saving as resampler state. - - The abstract implementation is naive and just returns the - state itself, thus it is the identity function. - - Parameters - ---------- - state : object implementing WalkerState - The state which will be transformed to an image - - Returns - ------- - image : object implementing WalkerState - The same state that was given as an argument. - - """ - - return state - - def image_distance(self, image_a, image_b): - """Compute the distance between two images of walker states. - - Parameters - ---------- - image_a : object produced by Distance.image - - image_b : object produced by Distance.image - - Returns - ------- - - distance : float - The distance between the two images - - Raises - ------ - NotImplementedError : always because this is abstract - - """ - raise NotImplementedError - - def distance(self, state_a, state_b): - """Compute the distance between two states. - - Parameters - ---------- - state_a : object implementing WalkerState - - state_b : object implementing WalkerState - - Returns - ------- - - distance : float - The distance between the two walker states - - - """ - - return self.image_distance(self.image(state_a), self.image(state_b)) - - -class XYEuclideanDistance(Distance): - """2 dimensional euclidean distance between points. - - States have the attributes 'x' and 'y'. - - """ - - def image(self, state): - return np.array([state["x"], state["y"]]) - - def image_distance(self, image_a, image_b): - return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2) +from wepy.resampling.projectors.projector import Projector +from wepy.util.util import box_vectors_to_lengths_angles +from geomm.grouping import group_pair -class AtomPairDistance(Distance): - """Constructs a vector of atomic distances for each state. - Distance is the root mean squared distance between the vectors. +class CentroidProjector(Projector): + """Projects a state onto the centroid distance between two groups. """ - def __init__(self, pair_list, periodic=True): - """Construct a distance metric. + def __init__(self, group1_idxs, group2_idxs, periodic=True): + """Construct a centroid distance projector. Parameters ---------- - pair_list : arraylike of tuples - The indices of the atom pairs between which to compute - distances. - + group1_idxs : list of int - indices of atoms in group1 + group2_idxs : list of int - indices of atoms in group2 + periodic : bool (default = True) - whether to use periodic boundary conditions to + minimize centroid distances """ - self.pair_list = pair_list + self.group1_idxs = group1_idxs + self.group2_idxs = group2_idxs self.periodic = periodic + + def project(self, state): - def _adjust_disp_vector(self, disp, box_lengths): - edited = True - while edited: - edited = False - for i in range(3): - if disp[i] > box_lengths[i]/2: - disp[i] -= box_lengths[i] - edited = True - elif disp[i] < -box_lengths[i]/2: - disp[i] += box_lengths[i] - edited = True - return disp + # cut out only the coordinates we need + coords = np.concatenate([state['positions'][self.group1_idxs],state['positions'][self.group2_idxs]]) + idxs1 = list(range(len(self.group1_idxs))) + idxs2 = list(range(len(self.group1_idxs),len(self.group1_idxs) + len(self.group2_idxs))) - def image(self, state): - if self.periodic: # get the box lengths from the vectors box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"]) + coords = group_pair(coords,box_lengths,idxs1,idxs2) + + # determine coordinate centroids + c1 = coords[idxs1].mean(axis=0) + c2 = coords[idxs2].mean(axis=0) + + # return the distance between the centroids + return np.sqrt(np.sum(np.square(c1-c2))) - dist_list = np.zeros((len(self.pair_list))) - for i,p in enumerate(self.pair_list): - disp_vector = state["positions"][p[0]] - state["positions"][p[1]] - if self.periodic: - dist_list[i] = np.sqrt(np.sum(np.square(self._adjust_disp_vector(disp_vector,box_lengths)))) - else: - dist_list[i] = np.sqrt(np.sum(np.square(disp_vector))) - - return dist_list - def image_distance(self, image_a, image_b): - return np.sqrt(np.mean(np.square(image_a - image_b))) diff --git a/src/wepy/resampling/projectors/projector.py b/src/wepy/resampling/projectors/projector.py index 822c9b28..8b333c69 100644 --- a/src/wepy/resampling/projectors/projector.py +++ b/src/wepy/resampling/projectors/projector.py @@ -1,25 +1,10 @@ -"""Modular component for defining distance metrics usable within many -different resamplers. +"""Modular component for defining "projectors" that project a +walker state into a one- or low-dimensional subspace. These are +usable within different resamplers. -This module contains an abstract base class for Distance classes. - -The suggested implementation method is to leave the 'distance' method -as is, and override the 'image' and 'image_distance' methods -instead. Because the default 'distance' method calls these -transparently. The resamplers will use the 'image' and -'image_distance' calls because this allows performance optimizations. - -For example in WExplore the images for some walkers end up being -stored as the definitions of Voronoi regions, and if the whole walker -state was stored it would not only use much more space in memory but -require that common transformations be repeated every time a distance -is to be calculated to that image (which is many times). In REVO all -to all distances between walkers are computed which would also incur a -high cost. - -So use the 'image' to do precomputations on raw walker states and use -the 'image_distance' to compute distances using only those images. +This module contains an abstract base class for Projector classes. +This is similar to the 'image' function in a Distance object """ # Standard Library import logging @@ -28,24 +13,20 @@ # Third Party Library import numpy as np -from wepy.util.util import box_vectors_to_lengths_angles - -class Distance(object): - """Abstract Base class for Distance classes.""" +class Projector(object): + """Abstract Base class for Projector classes.""" def __init__(self): - """Constructor for Distance class.""" + """Constructor for Projector class.""" pass - def image(self, state): - """Compute the 'image' of a walker state which should be some - transformation of the walker state that is more - convenient. E.g. for precomputation of expensive operations or - for saving as resampler state. + def project(self, state): + """Compute the 'projection' of a walker state onto one + or more variables. - The abstract implementation is naive and just returns the - state itself, thus it is the identity function. + The abstract implementation is naive and just returns the + numpy array [1]. Parameters ---------- @@ -54,117 +35,9 @@ def image(self, state): Returns ------- - image : object implementing WalkerState + projection : numpy array The same state that was given as an argument. """ - return state - - def image_distance(self, image_a, image_b): - """Compute the distance between two images of walker states. - - Parameters - ---------- - image_a : object produced by Distance.image - - image_b : object produced by Distance.image - - Returns - ------- - - distance : float - The distance between the two images - - Raises - ------ - - NotImplementedError : always because this is abstract - - """ - raise NotImplementedError - - def distance(self, state_a, state_b): - """Compute the distance between two states. - - Parameters - ---------- - state_a : object implementing WalkerState - - state_b : object implementing WalkerState - - Returns - ------- - - distance : float - The distance between the two walker states - - - """ - - return self.image_distance(self.image(state_a), self.image(state_b)) - - -class XYEuclideanDistance(Distance): - """2 dimensional euclidean distance between points. - - States have the attributes 'x' and 'y'. - - """ - - def image(self, state): - return np.array([state["x"], state["y"]]) - - def image_distance(self, image_a, image_b): - return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2) - -class AtomPairDistance(Distance): - """Constructs a vector of atomic distances for each state. - Distance is the root mean squared distance between the vectors. - """ - - def __init__(self, pair_list, periodic=True): - """Construct a distance metric. - - Parameters - ---------- - - pair_list : arraylike of tuples - The indices of the atom pairs between which to compute - distances. - - """ - self.pair_list = pair_list - self.periodic = periodic - - def _adjust_disp_vector(self, disp, box_lengths): - edited = True - while edited: - edited = False - for i in range(3): - if disp[i] > box_lengths[i]/2: - disp[i] -= box_lengths[i] - edited = True - elif disp[i] < -box_lengths[i]/2: - disp[i] += box_lengths[i] - edited = True - return disp - - def image(self, state): - - if self.periodic: - # get the box lengths from the vectors - box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"]) - - dist_list = np.zeros((len(self.pair_list))) - for i,p in enumerate(self.pair_list): - disp_vector = state["positions"][p[0]] - state["positions"][p[1]] - if self.periodic: - dist_list[i] = np.sqrt(np.sum(np.square(self._adjust_disp_vector(disp_vector,box_lengths)))) - else: - dist_list[i] = np.sqrt(np.sum(np.square(disp_vector))) - - return dist_list - - def image_distance(self, image_a, image_b): - return np.sqrt(np.mean(np.square(image_a - image_b))) + return np.ones((1)) \ No newline at end of file diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py new file mode 100644 index 00000000..edb7855d --- /dev/null +++ b/src/wepy/resampling/projectors/tica.py @@ -0,0 +1,74 @@ +"""Projectors into a pre-trained tICA space. +""" +# Standard Library +import logging + +logger = logging.getLogger(__name__) + +# Third Party Library +import numpy as np + +from wepy.resampling.projectors.projector import Projector +from wepy.util.util import box_vectors_to_lengths_angles +#from geomm.grouping import shorten_vec + +def shorten_vec(x, unitcell_side_lengths): + """ + For a given vector x between two points in a periodic box, return the + shortest version of that vector. + """ + pos_idxs = np.where(x > 0.5*unitcell_side_lengths)[0] + + for dim_idx in pos_idxs: + x[dim_idx] += unitcell_side_lengths[dim_idx] + + neg_idxs = np.where(x < -0.5*unitcell_side_lengths)[0] + + for dim_idx in neg_idxs: + x[dim_idx] -= unitcell_side_lengths[dim_idx] + + return x + +class DistanceTICAProjector(Projector): + """Projects a state into a predefined TICA space, using a set of distances as intermediate features. + """ + + def __init__(self, dist_idxs, tica_vectors, periodic=True): + """Construct a DistanceTICA projector. + + Parameters + ---------- + + dist_idxs : np.array of shape (nd,2) - indices of atoms for computing distances + tica_vectors : np.array of shape (nt,nd) - vectors for projecting into tica space + periodic : bool (default = True) - whether to use periodic boundary conditions to + minimize pair distances + """ + self.dist_idxs = np.array(dist_idxs) + self.tica_vectors = np.array(tica_vectors) + self.periodic = periodic + + assert self.dist_idxs.shape[0] == self.tica_vectors.shape[1], "Number of distances needs to match dimensionality of tica vectors!" + assert self.dist_idxs.shape[1] == 2, "Distance indices have incorrect shape" + + self.ndim = self.tica_vectors.shape[0] + + def project(self, state): + + # get all the displacement vectors + disp_vecs = state['positions'][self.dist_idxs[:,0]] - state['positions'][self.dist_idxs[:,1]] + + if self.periodic: + # get the box lengths from the vectors + box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"]) + dists = np.array([np.sqrt(np.sum(np.square(shorten_vec(v)))) for v in disp_vecs]) + else: + dists = np.array([np.sqrt(np.sum(np.square(v))) for v in disp_vecs]) + + # calculate projections + proj = np.zeros(self.ndim) + for i in range(self.ndim): + proj[i] = np.dot(self.tica_vectors[i], dists) + + return proj + diff --git a/src/wepy/resampling/resamplers/revo.py b/src/wepy/resampling/resamplers/revo.py index 6f8fedc7..4080437c 100644 --- a/src/wepy/resampling/resamplers/revo.py +++ b/src/wepy/resampling/resamplers/revo.py @@ -84,10 +84,6 @@ class REVOResampler(CloneMergeResampler): - variation : the final value of trajectory variation - - images : the images of walkers that is defined by the distance object - - - image_shape : the shape of the image - The algorithm saves the records of cloning and merging information in resampling data. @@ -741,3 +737,535 @@ def resample(self, walkers): ] return resampled_walkers, resampling_data, resampler_data + +class REVOProjectionResampler(REVOResampler): + r"""Resampler implementing a modified REVO algorithm that works in a + projection space. The distance between walkers in the projection space + is used to guide merging, and a sum of "single-body" terms is used + as the variation function V. The single-body terms are also calculated + using the walker projections. + + This algorithm was originally proposed here: + https://doi.org/10.1063/5.0239713 + + The objective function in that work was defined as + .. math:: + V = \sum_{i} V_i = \sum_i \frac{1}{d^i_{\text{from_target}}} + + and here it is implemented such that any quantity can be used + for V_i that depends solely on walker i. + + As in the original REVO, this algorithm needs the following parameters: + + pmin: the minimum statistical weight. REVO does not clone + walkers with a weight less than pmin. + + pmax: The maximum statistical weight. It prevents the + accumulation of too much weight in one walker. + + merge_dist: This is the merge-distance threshold. The distance + between merged walkers should be less than this value. + + The resample function, called during every cycle, takes the + ensemble of walkers and performs the follow steps: + + - Evaluates V_i for each walker + - Decides which walkers should be merged or cloned + - Applies the cloning and merging decisions to get the resampled walkers + - Creates the resampling data that includes + + - n_walkers : the number of walkers. number of walkers is + kept constant thought the resampling. + + - walker_variations : the Vi values for each walker + + - walker_projections : the projections for each walker + + - variation : the final value of trajectory variation (V) + + The algorithm saves the records of cloning and merging + information in resampling data. + + Only the net clones and merges are recorded in the resampling records. + + """ + + # fields that can be used for a table like representation + RESAMPLING_RECORD_FIELDS = CloneMergeResampler.RESAMPLING_RECORD_FIELDS + + # fields for resampling data + RESAMPLER_FIELDS = CloneMergeResampler.RESAMPLER_FIELDS + ( + "num_walkers", + "walker_variations", + "walker_projections", + "variation", + ) + RESAMPLER_SHAPES = CloneMergeResampler.RESAMPLER_SHAPES + ( + (1,), + Ellipsis, + Ellipsis, + (1,), + ) + RESAMPLER_DTYPES = CloneMergeResampler.RESAMPLER_DTYPES + ( + int, + float, + float, + float, + ) + + # fields that can be used for a table like representation + RESAMPLER_RECORD_FIELDS = CloneMergeResampler.RESAMPLER_RECORD_FIELDS + ( + "variation", + ) + + def __init__( + self, + merge_dist=None, + projector=None, + max_function=None, + init_state=None, + merge_alg="pairs", + pmin=1e-12, + pmax=0.1, + seed=None, + num_proc=1, + **kwargs + ): + """Constructor for the REVO Property Resampler. + + Parameters + ---------- + + merge_dist : float + The merge distance threshold. Will be compared with Euclidean + distances between projection values, e.g.: + np.sqrt(np.sum(np.square(p1-p2))). + + projector : object implementing Project + The object that projects walkers into a reduced dimensional space. + + max_function : function (float, np.array) -> float + Uses the weight and a projection for one walker (np.array) as input, returns Vi. + The resampler will promote maximization of this quantity. + + merge_alg : string + Indication of which algorithm is used to find pairs to merge. + + 'pairs' (default) indicates that a list of all suitable pairs is generated, + and the pair that minimizes the expected variation loss is chosen. + + 'greedy' indicates that first the lowest variation walker is + selected, and this is attempted to be merged with the closest + suitable walker + + init_state : WalkerState object + Used for automatically determining the state image shape. + + seed : None or int, optional + The random seed. If None, the system (random) one will be used. + + num_proc : int, optional + The number of processors used to calculate the walker images + and all-to-all distances. + + """ + + # call the init methods in the CloneMergeResampler + # superclass. We set the min and max number of walkers to be + # constant + super(REVOResampler,self).__init__( + pmin=pmin, + pmax=pmax, + min_num_walkers=Ellipsis, + max_num_walkers=Ellipsis, + **kwargs + ) + + assert merge_dist is not None, "Merge distance must be given." + assert projector is not None, "Projector object must be given." + assert max_function is not None, "Maximization function must be given" + assert init_state is not None, "An initial state must be given." + + self.merge_dist = merge_dist + self.merge_alg = merge_alg + + # the projector function + self.projector = projector + + # the maximization function + self.max_function = max_function + + # setting the random seed + self.seed = seed + if seed is not None: + rand.seed(seed) + + # setting the number of processors + self.num_proc = num_proc + + def _calc_variation(self, walker_weights, num_walker_copies, projection_list): + """Calculates the variation value. + + Parameters + ---------- + + walker_weights : list of float + The weights of all walkers. The sum of all weights should be 1.0. + + num_walker_copies : list of int + The number of copies of each walker. + 0 means the walker does not exist anymore. + 1 means there is one copy of this walker. + >1 means it will be cloned to this number of walkers. + + distance_matrix : list of arraylike of shape (num_walkers) + + Returns + ------- + variation : float + The calculated variation value. + + walker_variations : arraylike of shape (num_walkers) + The Vi value of each walker. + + """ + + num_walkers = len(walker_weights) + + # the value to be optimized + variation = 0 + + # the walker variation values (Vi values) + walker_variations = np.zeros(num_walkers) + + # calculate the variation and walker variation values + for i in range(num_walkers): + if num_walker_copies[i] > 0: + variation += num_walker_copies[i]*self.max_function(walker_weights[i]/num_walker_copies[i],projection_list[i]) + walker_variations[i] = self.max_function(walker_weights[i]/num_walker_copies[i],projection_list[i]) + + return variation, walker_variations + + def decide(self, walker_weights, num_walker_copies, distance_matrix, projection_list): + """Optimize the trajectory variation by making decisions for resampling. + + Parameters + ---------- + + walker_weights : list of float + The weights of all walkers. The sum of all weights should be 1.0. + + num_walker_copies : list of int + The number of copies of each walker. + 0 means the walker is not exists anymore. + 1 means there is one of the this walker. + >1 means it should be cloned to this number of walkers. + + distance_matrix : list of arraylike of shape (num_walkers) + + Returns + ------- + variation : float + The optimized value of the trajectory variation. + + resampling_data : list of dict of str: value + The resampling records resulting from the decisions. + + """ + num_walkers = len(walker_weights) + + variations = [] + merge_groups = [[] for i in range(num_walkers)] + walker_clone_nums = [0 for i in range(num_walkers)] + + # make copy of walkers properties + new_walker_weights = walker_weights.copy() + new_num_walker_copies = num_walker_copies.copy() + + # calculate the initial variation which will be optimized + variation, walker_variations = self._calc_variation( + walker_weights, new_num_walker_copies, projection_list + ) + variations.append(variation) + + # maximize the variance through cloning and merging + logger.info("Starting variance optimization: {}".format(variation)) + + productive = True + while productive: + productive = False + # find min and max walker_variationss, alter new_amp + + # initialize to None, we may not find one of each + min_idx = None + max_idx = None + + # selects a walker with minimum walker_variations and a walker with + # maximum walker_variations walker (distance to other walkers) will be + # tagged for cloning (stored in maxwind), except if it is + # already a keep merge target + max_tups = [] + for i, value in enumerate(walker_variations): + # 1. must have an amp >=1 which gives the number of clones to be made of it + # 2. clones for the given amplitude must not be smaller than the minimum probability + # 3. must not already be a keep merge target + if ( + (new_num_walker_copies[i] >= 1) + and ( + new_walker_weights[i] / (new_num_walker_copies[i] + 1) + > self.pmin + ) + and (len(merge_groups[i]) == 0) + ): + max_tups.append((value, i)) + + if len(max_tups) > 0: + max_value, max_idx = max(max_tups) + + merge_pair = [] + if self.merge_alg == "pairs": + pot_merge_pairs = self._find_eligible_merge_pairs( + new_walker_weights, distance_matrix, max_idx, new_num_walker_copies + ) + merge_pair = self._calc_variation_loss( + walker_variations, new_walker_weights, pot_merge_pairs + ) + elif self.merge_alg == "greedy": + # walker with the lowest walker_variations (distance to other walkers) + # will be tagged for merging (stored in min_idx) + min_tups = [ + (value, i) + for i, value in enumerate(walker_variations) + if new_num_walker_copies[i] == 1 + and (new_walker_weights[i] < self.pmax) + ] + + if len(min_tups) > 0: + min_value, min_idx = min(min_tups) + + # does min_idx have an eligible merging partner? + closewalk = None + condition_list = np.array([i is not None for i in [min_idx, max_idx]]) + if condition_list.all() and min_idx != max_idx: + # get the walkers that aren't the minimum and the max + # walker_variations walkers, as candidates for merging + closewalks = set(range(num_walkers)).difference([min_idx, max_idx]) + + # remove those walkers that if they were merged with + # the min walker_variations walker would violate the pmax + closewalks = [ + idx + for idx in closewalks + if (new_num_walker_copies[idx] == 1) + and ( + new_walker_weights[idx] + new_walker_weights[min_idx] + < self.pmax + ) + ] + + # if there are any walkers left, get the distances of + # the close walkers to the min walker_variations walker if that + # distance is less than the maximum merge distance + if len(closewalks) > 0: + closewalks_dists = [ + (distance_matrix[min_idx][i], i) + for i in closewalks + if distance_matrix[min_idx][i] < (self.merge_dist) + ] + + # if any were found set this as the closewalk + if len(closewalks_dists) > 0: + closedist, closewalk = min(closewalks_dists) + merge_pair = [min_idx, closewalk] + + else: + raise ValueError("Unrecognized value for merge_alg in REVO") + + # did we find a suitable pair to merge? + if len(merge_pair) != 0: + min_idx = merge_pair[0] + closewalk = merge_pair[1] + + # change new_amp + tempsum = new_walker_weights[min_idx] + new_walker_weights[closewalk] + new_num_walker_copies[min_idx] = new_walker_weights[min_idx] / tempsum + new_num_walker_copies[closewalk] = ( + new_walker_weights[closewalk] / tempsum + ) + new_num_walker_copies[max_idx] += 1 + + # re-determine variation function, and walker_variations values + new_variation, walker_variations = self._calc_variation( + new_walker_weights, new_num_walker_copies, projection_list + ) + + if new_variation > variation: + variations.append(new_variation) + + logger.info("Variance move to {} accepted".format(new_variation)) + + productive = True + variation = new_variation + + # make a decision on which walker to keep + # (min_idx, or closewalk), equivalent to: + # `random.choices([closewalk, min_idx], + # weights=[new_walker_weights[closewalk], new_walker_weights[min_idx])` + r = rand.uniform( + 0.0, new_walker_weights[closewalk] + new_walker_weights[min_idx] + ) + + # keeps closewalk and gets rid of min_idx + if r < new_walker_weights[closewalk]: + keep_idx = closewalk + squash_idx = min_idx + + # keep min_idx, get rid of closewalk + else: + keep_idx = min_idx + squash_idx = closewalk + + # update weight + new_walker_weights[keep_idx] += new_walker_weights[squash_idx] + new_walker_weights[squash_idx] = 0.0 + + # update new_num_walker_copies + new_num_walker_copies[squash_idx] = 0 + new_num_walker_copies[keep_idx] = 1 + + # add the squash index to the merge group + merge_groups[keep_idx].append(squash_idx) + + # add the indices of the walkers that were already + # in the merge group that was just squashed + merge_groups[keep_idx].extend(merge_groups[squash_idx]) + + # reset the merge group that was just squashed to empty + merge_groups[squash_idx] = [] + + # increase the number of clones that the cloned + # walker has + walker_clone_nums[max_idx] += 1 + + # new variation for starting new stage + new_variation, walker_variations = self._calc_variation( + new_walker_weights, new_num_walker_copies, projection_list + ) + variations.append(new_variation) + + logger.info("variance after selection: {}".format(new_variation)) + + # if not productive + else: + new_num_walker_copies[min_idx] = 1 + new_num_walker_copies[closewalk] = 1 + new_num_walker_copies[max_idx] -= 1 + + # given we know what we want to clone to specific slots + # (squashing other walkers) we need to determine where these + # squashed walkers will be merged + walker_actions = self.assign_clones(merge_groups, walker_clone_nums) + + # because there is only one step in resampling here we just + # add another field for the step as 0 and add the walker index + # to its record as well + for walker_idx, walker_record in enumerate(walker_actions): + walker_record["step_idx"] = np.array([0]) + walker_record["walker_idx"] = np.array([walker_idx]) + + return walker_actions, variations[-1], walker_variations + + def _all_to_all_distance(self, projection_list): + """Calculate the pairwise all-to-all distances between walkers. + + Parameters + ---------- + projection_list : list of numpy arrays + + + Returns + ------- + distance_matrix : np.array(n_walkers,n_walkers) + + """ + # initialize an all-to-all matrix, with 0.0 for self distances + n_walkers = len(projection_list) + dist_mat = np.zeros((n_walkers, n_walkers)) + + for i in range(n_walkers-1): + for j in range(i,n_walkers): + dist_mat[i,j] = np.sqrt(np.sum(np.square(projection_list[i] - projection_list[j]))) + dist_mat[j,i] = dist_mat[i,j] + + return dist_mat + + def _calc_projection_list(self, walkers): + if self.num_proc > 1: + pool = mulproc.Pool(self.num_proc) + projs = pool.map(self.projector.project, [walker.state for walker in walkers]) + else: + projs = [] + for walker in walkers: + projs.append(self.projector.project(walker.state)) + + return projs + + def resample(self, walkers): + """Resamples walkers based on REVOProjectionResampler algorithm + + Parameters + ---------- + walkers : list of walkers + + + Returns + ------- + resampled_walkers : list of resampled_walkers + + resampling_data : list of dict of str: value + The resampling records resulting from the decisions. + + resampler_data :list of dict of str: value + The resampler records resulting from the resampler actions. + + """ + + # initialize the parameters + num_walkers = len(walkers) + walker_weights = [walker.weight for walker in walkers] + + # Needs to be floats to do partial amps during second variation calculations. + num_walker_copies = np.ones(num_walkers) + + # calculate projection list + projection_list = self._calc_projection_list(walkers) + + # calculate distance matrix + distance_matrix = self._all_to_all_distance(projection_list) + + # determine cloning and merging actions to be performed, by + # maximizing the variation, i.e. the Decider + resampling_data, variation, walker_variations = self.decide( + walker_weights, num_walker_copies, distance_matrix, projection_list + ) + + # convert the target idxs and decision_id to feature vector arrays + for record in resampling_data: + record["target_idxs"] = np.array(record["target_idxs"]) + record["decision_id"] = np.array([record["decision_id"]]) + + # actually do the cloning and merging of the walkers + resampled_walkers = self.DECISION.action(walkers, [resampling_data]) + + # flatten the distance matrix and give the number of walkers + # as well for the resampler data, there is just one per cycle + resampler_data = [ + { + "walker_projections": np.ravel(np.array(projection_list)), + "walker_variations:": np.ravel(np.array(walker_variations)), + "num_walkers": np.array([len(walkers)]), + "variation": np.array([variation]), + } + ] + + return resampled_walkers, resampling_data, resampler_data From 0cc65a5b821a67d6461bccd3138e2922fb82cd71 Mon Sep 17 00:00:00 2001 From: Samik Bose Date: Thu, 8 Jan 2026 11:28:42 -0500 Subject: [PATCH 3/6] Update the CoordTICAProjector --- src/wepy/resampling/distances/distance.py | 22 ++++ src/wepy/resampling/projectors/tica.py | 134 ++++++++++++++++++++-- src/wepy/runners/openmm.py | 1 - 3 files changed, 144 insertions(+), 13 deletions(-) diff --git a/src/wepy/resampling/distances/distance.py b/src/wepy/resampling/distances/distance.py index 822c9b28..d379b94c 100644 --- a/src/wepy/resampling/distances/distance.py +++ b/src/wepy/resampling/distances/distance.py @@ -118,6 +118,28 @@ def image(self, state): def image_distance(self, image_a, image_b): return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2) +class ProjectorDistance(Distance): + """Take a projector as input + + """ + def __init__(self, projector): + """Construct a distance metric. + + Parameters + ---------- + + projector : A Projector object, which implementes the project function + """ + + self.projector = projector + + def image(self, state): + return self.projector.project(state) + + def image_distance(self, image_a, image_b): + return np.sqrt(np.sum(np.square(image_a - image_b))) + + class AtomPairDistance(Distance): """Constructs a vector of atomic distances for each state. Distance is the root mean squared distance between the vectors. diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py index edb7855d..f6393a2e 100644 --- a/src/wepy/resampling/projectors/tica.py +++ b/src/wepy/resampling/projectors/tica.py @@ -20,39 +20,50 @@ def shorten_vec(x, unitcell_side_lengths): pos_idxs = np.where(x > 0.5*unitcell_side_lengths)[0] for dim_idx in pos_idxs: - x[dim_idx] += unitcell_side_lengths[dim_idx] + x[dim_idx] -= unitcell_side_lengths[dim_idx] neg_idxs = np.where(x < -0.5*unitcell_side_lengths)[0] for dim_idx in neg_idxs: - x[dim_idx] -= unitcell_side_lengths[dim_idx] + x[dim_idx] += unitcell_side_lengths[dim_idx] return x + +def build_coord_feature(state, ref_pos, idxs,): + + + return aligned_feat_coord + + class DistanceTICAProjector(Projector): """Projects a state into a predefined TICA space, using a set of distances as intermediate features. """ - def __init__(self, dist_idxs, tica_vectors, periodic=True): + def __init__(self, dist_idxs, tica_model, periodic=True): """Construct a DistanceTICA projector. Parameters ---------- dist_idxs : np.array of shape (nd,2) - indices of atoms for computing distances - tica_vectors : np.array of shape (nt,nd) - vectors for projecting into tica space + + tica_model : Deeptime or equivalent object that has a transform function, which + will be used to transform the distances into tica space. + periodic : bool (default = True) - whether to use periodic boundary conditions to minimize pair distances """ + self.dist_idxs = np.array(dist_idxs) - self.tica_vectors = np.array(tica_vectors) + self.model = tica_model self.periodic = periodic - assert self.dist_idxs.shape[0] == self.tica_vectors.shape[1], "Number of distances needs to match dimensionality of tica vectors!" - assert self.dist_idxs.shape[1] == 2, "Distance indices have incorrect shape" + # check for transform type functions + if hasattr(self.model, 'transform'): - self.ndim = self.tica_vectors.shape[0] - + self.ndim = self.model.dim + def project(self, state): # get all the displacement vectors @@ -61,14 +72,113 @@ def project(self, state): if self.periodic: # get the box lengths from the vectors box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"]) - dists = np.array([np.sqrt(np.sum(np.square(shorten_vec(v)))) for v in disp_vecs]) + dists = np.array([np.sqrt(np.sum(np.square(shorten_vec(v, box_lengths)))) for v in disp_vecs]) else: dists = np.array([np.sqrt(np.sum(np.square(v))) for v in disp_vecs]) + tranformed_dists = self.model.transform(dists) + # calculate projections - proj = np.zeros(self.ndim) + proj = np.zeros(self.ndim, tranformed_dists.shape[0]) for i in range(self.ndim): - proj[i] = np.dot(self.tica_vectors[i], dists) + proj[i] = np.dot(tranformed_dists[i], dists) + + return proj + +class CoordTICAProjector(Projector): + """Projects a state into a predefined TICA space, using selected + Cartesian coordinates (e.g., Cα atoms) as features. + + The feature vector is constructed by taking the positions of a + fixed set of atoms and flattening them to shape (natoms*3,). + """ + + def __init__(self, atom_idxs, tica_vectors, alignment_idxs, + periodic=True): + """ + Parameters + ---------- + atom_idxs : array-like of shape (natoms,) + Indices of the atoms whose coordinates are used as features. + The order MUST match the order used when training TICA. + + model: tica model to extract the vectors + + tica_vectors : np.ndarray of shape (ntica, natoms*3) + TICA eigenvectors for projecting into TICA space. + + alignment_idxs: array-like of shape (align_atoms,) + Indices of atoms whose coordinates are used as the reference for + alignment of the frames. These atoms MUST match the atoms that + were used to align the frames before tica model training. + + periodic : bool, default=True + Whether to take periodic boundary conditions into account. + + """ + + self.atom_idxs = np.asarray(atom_idxs, dtype=int) + self.tica_vectors = np.asarray(tica_vectors, dtype=float) + self.periodic = periodic + self.alignment_idxs = alignment_idxs + + natoms = self.atom_idxs.shape[0] + nfeat = natoms * 3 + + assert self.tica_vectors.ndim == 2, "tica_vectors must be 2D" + assert self.tica_vectors.shape[1] == nfeat, \ + f"TICA vectors expect {self.tica_vectors.shape[1]} features, " \ + f"but natoms*3 = {nfeat}" + + self.ndim = self.tica_vectors.shape[0] + + def project(self, state): + """ + Project the given walker state into TICA space. + + Parameters + ---------- + state : dict-like + Must contain 'positions' with shape (N, 3) and, if + periodic=True, 'box_vectors'. + + Returns + ------- + proj : np.ndarray of shape (ntica,) + TICA coordinates for this state. + """ + ## positions of selected atoms: (natoms, 3) + #pos = state['positions'][self.atom_idxs].copy() + ## + #ref + + ## ALWAYS ALIGN WITH A SPECIFIC SELECTION + ## USING GEOMM AND GROUP PAIR< CENTER AROUND + ## + if self.periodic: + # use minimum-image convention for each displacement + box_lengths, _ = box_vectors_to_lengths_angles(state["box_vectors"]) + for i in range(pos.shape[0]): + disp = pos[i] - ref + disp = shorten_vec(disp, box_lengths) + pos[i] = disp + else: + pos -= ref + #else: + # # optional: still can correct for PBC wrt some origin, but + # # usually you want anchor_first_atom=True for coord-based TICA + # if self.periodic: + # logger.warning("periodic=True but anchor_first_atom=False; " + # "coordinates may be discontinuous across PBCs.") + + + + # flatten to (natoms*3,) + feat_vec = pos.reshape(-1) + # project into TICA space: (ntica, nfeat) @ (nfeat,) -> (ntica,) + proj = self.tica_vectors.dot(feat_vec) + return proj + diff --git a/src/wepy/runners/openmm.py b/src/wepy/runners/openmm.py index c9c863ff..c59084a9 100644 --- a/src/wepy/runners/openmm.py +++ b/src/wepy/runners/openmm.py @@ -289,7 +289,6 @@ def __init__( self.platform_kwargs = platform_kwargs self.enforce_box = enforce_box - self.get_parameter_derivs = get_parameter_derivs self.getState_kwargs = dict(GET_STATE_KWARG_DEFAULTS) # update with the user based enforce_box From f48b6e3b766d851f4ca7c6f6a3292591e4cf86c1 Mon Sep 17 00:00:00 2001 From: Samik Bose Date: Thu, 8 Jan 2026 11:43:57 -0500 Subject: [PATCH 4/6] Update the CoordTICAProjector --- src/wepy/resampling/projectors/tica.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py index f6393a2e..9e68787c 100644 --- a/src/wepy/resampling/projectors/tica.py +++ b/src/wepy/resampling/projectors/tica.py @@ -76,13 +76,13 @@ def project(self, state): else: dists = np.array([np.sqrt(np.sum(np.square(v))) for v in disp_vecs]) - tranformed_dists = self.model.transform(dists) - - # calculate projections - proj = np.zeros(self.ndim, tranformed_dists.shape[0]) - for i in range(self.ndim): - proj[i] = np.dot(tranformed_dists[i], dists) + proj = self.model.transform(dists) + ## calculate projections + #proj = np.zeros(self.ndim, tranformed_dists.shape[0]) + #for i in range(self.ndim): + # proj[i] = np.dot(tranformed_dists[i], dists) + # return proj class CoordTICAProjector(Projector): From 61d17469ef2ce3343dd43f2a65e7ed03b41072f6 Mon Sep 17 00:00:00 2001 From: Samik Bose Date: Wed, 14 Jan 2026 14:02:28 -0500 Subject: [PATCH 5/6] TODO: CoordTICAProjector() --- src/wepy/resampling/projectors/tica.py | 20 +++++++++++--------- src/wepy/resampling/resamplers/revo.py | 4 ++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py index 9e68787c..a95f41ff 100644 --- a/src/wepy/resampling/projectors/tica.py +++ b/src/wepy/resampling/projectors/tica.py @@ -10,7 +10,6 @@ from wepy.resampling.projectors.projector import Projector from wepy.util.util import box_vectors_to_lengths_angles -#from geomm.grouping import shorten_vec def shorten_vec(x, unitcell_side_lengths): """ @@ -32,6 +31,13 @@ def shorten_vec(x, unitcell_side_lengths): def build_coord_feature(state, ref_pos, idxs,): + # get the state from the simulation + + # extract the position + + # now use the same idxs to align + + # return the aligned coords return aligned_feat_coord @@ -56,12 +62,12 @@ def __init__(self, dist_idxs, tica_model, periodic=True): """ self.dist_idxs = np.array(dist_idxs) - self.model = tica_model self.periodic = periodic + self.model = tica_model # check for transform type functions - if hasattr(self.model, 'transform'): - + #if hasattr(self.model, 'transform'): + # self.ndim = self.model.dim def project(self, state): @@ -78,11 +84,6 @@ def project(self, state): proj = self.model.transform(dists) - ## calculate projections - #proj = np.zeros(self.ndim, tranformed_dists.shape[0]) - #for i in range(self.ndim): - # proj[i] = np.dot(tranformed_dists[i], dists) - # return proj class CoordTICAProjector(Projector): @@ -147,6 +148,7 @@ def project(self, state): proj : np.ndarray of shape (ntica,) TICA coordinates for this state. """ + ## positions of selected atoms: (natoms, 3) #pos = state['positions'][self.atom_idxs].copy() ## diff --git a/src/wepy/resampling/resamplers/revo.py b/src/wepy/resampling/resamplers/revo.py index 4080437c..bb19bff4 100644 --- a/src/wepy/resampling/resamplers/revo.py +++ b/src/wepy/resampling/resamplers/revo.py @@ -756,7 +756,7 @@ class REVOProjectionResampler(REVOResampler): for V_i that depends solely on walker i. As in the original REVO, this algorithm needs the following parameters: - +proximity_score pmin: the minimum statistical weight. REVO does not clone walkers with a weight less than pmin. @@ -1262,7 +1262,7 @@ def resample(self, walkers): resampler_data = [ { "walker_projections": np.ravel(np.array(projection_list)), - "walker_variations:": np.ravel(np.array(walker_variations)), + "walker_variations": np.ravel(np.array(walker_variations)), "num_walkers": np.array([len(walkers)]), "variation": np.array([variation]), } From 7055f0676b9ef7be7b22463c3a7b0c9030c08268 Mon Sep 17 00:00:00 2001 From: Samik Bose Date: Mon, 9 Mar 2026 16:52:32 -0400 Subject: [PATCH 6/6] tica projectors-distances updated --- src/wepy/resampling/distances/distance.py | 4 +- src/wepy/resampling/projectors/tica.py | 244 ++++++++++++++-------- 2 files changed, 156 insertions(+), 92 deletions(-) diff --git a/src/wepy/resampling/distances/distance.py b/src/wepy/resampling/distances/distance.py index d379b94c..bd9ec4fd 100644 --- a/src/wepy/resampling/distances/distance.py +++ b/src/wepy/resampling/distances/distance.py @@ -123,11 +123,11 @@ class ProjectorDistance(Distance): """ def __init__(self, projector): - """Construct a distance metric. + """Construct a distance metric. Parameters ---------- - + projector : A Projector object, which implementes the project function """ diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py index a95f41ff..92ef2ad0 100644 --- a/src/wepy/resampling/projectors/tica.py +++ b/src/wepy/resampling/projectors/tica.py @@ -8,130 +8,204 @@ # Third Party Library import numpy as np +# geomm functions +from geomm.grouping import group_pair +from geomm.superimpose import superimpose + +# wepy functions from wepy.resampling.projectors.projector import Projector from wepy.util.util import box_vectors_to_lengths_angles -def shorten_vec(x, unitcell_side_lengths): - """ - For a given vector x between two points in a periodic box, return the - shortest version of that vector. + +def aligned(coords, ref_coords,unitcell_length, alignment_idxs, pair_idx1,pair_idx2): + + """For a frame from a trajectory this function does a bunch of operations + systematically: + (i) First moves pair_idx2 coordinates to the image of the periodic unitcell that minimizes the + difference between the centers of geometry between the pair_idx2 and pair_idx1 (e.g. a protein and ligand). + (ii) Then, centers the set of coordinates around a set of alingment atom indices. + (iii) Finally superimpose all the frames on top of a reference frame. (alignment) + + + This uses the geomm group pair function, followed by centering of grouped snapshots and then again + geomm superimpose function. + Unlike the 'aligned_frames' function, it only operates over a single frame (not the entire trajectory) + + Parameters + ---------- + + coords : arraylike + The coordinates array of the frame of the particles you will be transforming. + (group pairing followed by centering followed by superimpose/alignment) + + ref_coords: arraylike + Output of the ref_centered_pose function + Centered reference frame coordinates to be used in superimpose + + unitcell_length : arraylike + The lengths of the sides of a rectangular unitcell. + + alignment_idxs : arraylike + Collection of the indices which will be used to center the snapshot + and align them against the ref pose. + Please ensure that the same indices are used while centering the reference pose too. + + pair_idx1 : arraylike + Collection of the indices that define that member of the pair. + + pair_idxs2 : arraylike + Collection of the indices that define that member of the pair. + + Returns + ------- + + superimposed_pos : arrays + Transformed coordinates of the frame. + Transformation: group pairing followed by centering followed by superimpose/alignment. + """ - pos_idxs = np.where(x > 0.5*unitcell_side_lengths)[0] - for dim_idx in pos_idxs: - x[dim_idx] -= unitcell_side_lengths[dim_idx] - neg_idxs = np.where(x < -0.5*unitcell_side_lengths)[0] - for dim_idx in neg_idxs: - x[dim_idx] += unitcell_side_lengths[dim_idx] + assert coords.shape[1] == 3, "coordinates are not of 3 dimensions" + assert coords.shape[0] == ref_coords.shape[0], "Number of atoms does not match between reference and provided coords" - return x + grouped_pos = group_pair(coords, unitcell_length, pair_idx1, pair_idx2) + centroid = np.average(grouped_pos[alignment_idxs], axis =0) + grouped_centered_pos = grouped_pos - centroid + superimposed_pos, _ , _ = superimpose(ref_coords,grouped_centered_pos, idxs=alignment_idxs) -def build_coord_feature(state, ref_pos, idxs,): + return superimposed_pos - # get the state from the simulation +def shorten_vecs(disp_vecs, box_lengths): + """ + Apply minimum-image convention to displacement vectors in an + cubic/tetragonal/rectangular(?)/orthorhombic periodic box (vectorized). Seems faster. + + Parameters + ---------- + disp_vecs : array_like, shape (n, 3) + Displacement vectors[[dx_i, dy_i, dz_i], [dx_j, dy_j, dz_j] ...]. + box_lengths : array_like, shape (3,) + Box side lengths [Lx, Ly, Lz]. + + Returns + ------- + numpy.ndarray, shape (n, 3) + Minimum-image displacement vectors. + """ - # extract the position + X = np.asarray(disp_vecs, dtype=float) # (n, 3) + L = np.asarray(box_lengths, dtype=float) # (3,) + + # Broadcasting: (n,3) / (3,) works automatically + + return X - L * np.round(X / L) + # this should always work, np.round(0.6) = 1, np.round(-0.6) = -1 and np.round(0.2)=0. - # now use the same idxs to align - # return the aligned coords - - return aligned_feat_coord - - class DistanceTICAProjector(Projector): - """Projects a state into a predefined TICA space, using a set of distances as intermediate features. + """ + Projects a state into a predefined TICA space, using a set of distances as intermediate features. """ def __init__(self, dist_idxs, tica_model, periodic=True): + """Construct a DistanceTICA projector. Parameters ---------- - dist_idxs : np.array of shape (nd,2) - indices of atoms for computing distances + dist_idxs : np.array of shape (nd,2) + Indices of atoms for computing distances in an image. + + tica_model : Deeptime or equivalent object + It MUST have a transform function, which + will be used to transform the distances into tica space. - tica_model : Deeptime or equivalent object that has a transform function, which - will be used to transform the distances into tica space. + periodic : bool (default = True) + Whether to use periodic boundary conditions to minimize pair distances - periodic : bool (default = True) - whether to use periodic boundary conditions to - minimize pair distances """ self.dist_idxs = np.array(dist_idxs) self.periodic = periodic self.model = tica_model - # check for transform type functions - #if hasattr(self.model, 'transform'): - # + #check if the model has .transform object/attribute + # hasattr(self.model, 'transform'): + self.ndim = self.model.dim def project(self, state): # get all the displacement vectors disp_vecs = state['positions'][self.dist_idxs[:,0]] - state['positions'][self.dist_idxs[:,1]] - + if self.periodic: - # get the box lengths from the vectors - box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"]) - dists = np.array([np.sqrt(np.sum(np.square(shorten_vec(v, box_lengths)))) for v in disp_vecs]) - else: - dists = np.array([np.sqrt(np.sum(np.square(v))) for v in disp_vecs]) - - proj = self.model.transform(dists) + box_lengths, _ = box_vectors_to_lengths_angles(state["box_vectors"]) + disp_vecs = shorten_vecs(disp_vecs, box_lengths) + + dists = np.linalg.norm(disp_vecs, axis=1) + #print(f'Dist: {dists}') + proj = self.model.transform(dists) + + print(f'Proj: {proj}') return proj + class CoordTICAProjector(Projector): """Projects a state into a predefined TICA space, using selected - Cartesian coordinates (e.g., Cα atoms) as features. + Cartesian coordinates (e.g., CA atoms or backbone atoms) as features. The feature vector is constructed by taking the positions of a fixed set of atoms and flattening them to shape (natoms*3,). """ - def __init__(self, atom_idxs, tica_vectors, alignment_idxs, - periodic=True): + def __init__(self, alignment_idxs, atom_idxs, tica_model, ref_centered_pos, periodic=True): """ Parameters ---------- + alignment_idxs: array-like of shape (align_atoms,) + Indices of atoms whose coordinates are used as the reference for + alignment of the frames. These atoms MUST match the atoms that + were used to align the frames before tica model training. + atom_idxs : array-like of shape (natoms,) Indices of the atoms whose coordinates are used as features. The order MUST match the order used when training TICA. - model: tica model to extract the vectors - tica_vectors : np.ndarray of shape (ntica, natoms*3) - TICA eigenvectors for projecting into TICA space. - - alignment_idxs: array-like of shape (align_atoms,) - Indices of atoms whose coordinates are used as the reference for - alignment of the frames. These atoms MUST match the atoms that - were used to align the frames before tica model training. - - periodic : bool, default=True - Whether to take periodic boundary conditions into account. + model: Deeptime or equivalent object + It MUST have a transform function, which + will be used to transform the distances into tica space. + + ref_centered_pos: arraylike + MUST FOLLOW some conditions: + 1. Centered reference frame coordinates to be used in the superimpose function + 2. The exact ref pose used while making the coord features for training the tica model. + 3. Generally should be the output of the ref_centered_pose function + + periodic : bool (default = True) + Whether to use periodic boundary conditions to minimize pair distances """ self.atom_idxs = np.asarray(atom_idxs, dtype=int) - self.tica_vectors = np.asarray(tica_vectors, dtype=float) - self.periodic = periodic - self.alignment_idxs = alignment_idxs + self.alignment_idxs = np.array(alignment_idxs, dtype=int) + self.ref_centered_pose = ref_centered_pos - natoms = self.atom_idxs.shape[0] - nfeat = natoms * 3 + self.periodic = periodic + self.model = tica_model + #check if the model has .transform object/attribute + # hasattr(self.model, 'transform'): + self.ndim = self.model.dim - assert self.tica_vectors.ndim == 2, "tica_vectors must be 2D" - assert self.tica_vectors.shape[1] == nfeat, \ - f"TICA vectors expect {self.tica_vectors.shape[1]} features, " \ - f"but natoms*3 = {nfeat}" + #natoms = self.atom_idxs.shape[0] + #nfeat = natoms * 3 - self.ndim = self.tica_vectors.shape[0] def project(self, state): """ @@ -149,38 +223,28 @@ def project(self, state): TICA coordinates for this state. """ - ## positions of selected atoms: (natoms, 3) - #pos = state['positions'][self.atom_idxs].copy() - ## - #ref - - ## ALWAYS ALIGN WITH A SPECIFIC SELECTION - ## USING GEOMM AND GROUP PAIR< CENTER AROUND - ## if self.periodic: - # use minimum-image convention for each displacement box_lengths, _ = box_vectors_to_lengths_angles(state["box_vectors"]) - for i in range(pos.shape[0]): - disp = pos[i] - ref - disp = shorten_vec(disp, box_lengths) - pos[i] = disp - else: - pos -= ref - #else: - # # optional: still can correct for PBC wrt some origin, but - # # usually you want anchor_first_atom=True for coord-based TICA - # if self.periodic: - # logger.warning("periodic=True but anchor_first_atom=False; " - # "coordinates may be discontinuous across PBCs.") - - - - # flatten to (natoms*3,) - feat_vec = pos.reshape(-1) - # project into TICA space: (ntica, nfeat) @ (nfeat,) -> (ntica,) - proj = self.tica_vectors.dot(feat_vec) - return proj + pos_aligned = aligned(coords = state['positions'], + ref_coords = self.ref_centered_pose, + unitcell_length = box_lengths, + alignment_idxs = self.alignment_idxs, + pair_idx1 = self.alignment_idxs, + pair_idx2 = self.atom_idxs) + + + else: # Discuss with Alex + centroid = np.average(state['positions'][self.alignment_idxs], axis =0) + centered_pos = state['positions'] - centroid + pos_aligned, _ , _ = superimpose(self.ref_centered_pose, centered_pos, idxs=self.alignment_idxs) + feat_coord = pos_aligned[self.atom_idxs].reshape(1, -1) + # without reshape it is still not in the input shape for the parent tica model + # the parent tica model is trained on a flattened array. + + proj = self.model.transform(feat_coord) + + return proj