From 90e003c2d37e802246663bb5b67a4ec07ba5c3ed Mon Sep 17 00:00:00 2001
From: Alex Dickson <alexrd@msu.edu>
Date: Fri, 2 May 2025 10:40:37 -0400
Subject: [PATCH 1/6] added projector class

---
 src/wepy/resampling/projectors/__init__.py  |   0
 src/wepy/resampling/projectors/centroid.py  | 170 ++++++++++++++++++++
 src/wepy/resampling/projectors/projector.py | 170 ++++++++++++++++++++
 3 files changed, 340 insertions(+)
 create mode 100644 src/wepy/resampling/projectors/__init__.py
 create mode 100644 src/wepy/resampling/projectors/centroid.py
 create mode 100644 src/wepy/resampling/projectors/projector.py

diff --git a/src/wepy/resampling/projectors/__init__.py b/src/wepy/resampling/projectors/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/wepy/resampling/projectors/centroid.py b/src/wepy/resampling/projectors/centroid.py
new file mode 100644
index 00000000..822c9b28
--- /dev/null
+++ b/src/wepy/resampling/projectors/centroid.py
@@ -0,0 +1,170 @@
+"""Modular component for defining distance metrics usable within many
+different resamplers.
+
+This module contains an abstract base class for Distance classes.
+
+The suggested implementation method is to leave the 'distance' method
+as is, and override the 'image' and 'image_distance' methods
+instead. Because the default 'distance' method calls these
+transparently. The resamplers will use the 'image' and
+'image_distance' calls because this allows performance optimizations.
+
+For example in WExplore the images for some walkers end up being
+stored as the definitions of Voronoi regions, and if the whole walker
+state was stored it would not only use much more space in memory but
+require that common transformations be repeated every time a distance
+is to be calculated to that image (which is many times). In REVO all
+to all distances between walkers are computed which would also incur a
+high cost.
+
+So use the 'image' to do precomputations on raw walker states and use
+the 'image_distance' to compute distances using only those images.
+
+"""
+# Standard Library
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Third Party Library
+import numpy as np
+from wepy.util.util import box_vectors_to_lengths_angles
+
+
+class Distance(object):
+    """Abstract Base class for Distance classes."""
+
+    def __init__(self):
+        """Constructor for Distance class."""
+        pass
+
+    def image(self, state):
+        """Compute the 'image' of a walker state which should be some
+        transformation of the walker state that is more
+        convenient. E.g. for precomputation of expensive operations or
+        for saving as resampler state.
+
+        The abstract implementation is naive and just returns the
+        state itself, thus it is the identity function.
+
+        Parameters
+        ----------
+        state : object implementing WalkerState
+            The state which will be transformed to an image
+
+        Returns
+        -------
+        image : object implementing WalkerState
+            The same state that was given as an argument.
+
+        """
+
+        return state
+
+    def image_distance(self, image_a, image_b):
+        """Compute the distance between two images of walker states.
+
+        Parameters
+        ----------
+        image_a : object produced by Distance.image
+
+        image_b : object produced by Distance.image
+
+        Returns
+        -------
+
+        distance : float
+            The distance between the two images
+
+        Raises
+        ------
+
+        NotImplementedError : always because this is abstract
+
+        """
+        raise NotImplementedError
+
+    def distance(self, state_a, state_b):
+        """Compute the distance between two states.
+
+        Parameters
+        ----------
+        state_a : object implementing WalkerState
+
+        state_b : object implementing WalkerState
+
+        Returns
+        -------
+
+        distance : float
+            The distance between the two walker states
+
+
+        """
+
+        return self.image_distance(self.image(state_a), self.image(state_b))
+
+
+class XYEuclideanDistance(Distance):
+    """2 dimensional euclidean distance between points.
+
+    States have the attributes 'x' and 'y'.
+
+    """
+
+    def image(self, state):
+        return np.array([state["x"], state["y"]])
+
+    def image_distance(self, image_a, image_b):
+        return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2)
+
+class AtomPairDistance(Distance):
+    """Constructs a vector of atomic distances for each state.
+    Distance is the root mean squared distance between the vectors.
+    """
+
+    def __init__(self, pair_list, periodic=True):
+        """Construct a distance metric.
+
+        Parameters
+        ----------
+
+        pair_list : arraylike of tuples
+            The indices of the atom pairs between which to compute
+            distances.
+
+        """
+        self.pair_list = pair_list
+        self.periodic = periodic
+
+    def _adjust_disp_vector(self, disp, box_lengths):
+        edited = True
+        while edited:
+            edited = False
+            for i in range(3):
+                if disp[i] > box_lengths[i]/2:
+                    disp[i] -= box_lengths[i]
+                    edited = True
+                elif disp[i] < -box_lengths[i]/2:
+                    disp[i] += box_lengths[i]
+                    edited = True
+        return disp
+        
+    def image(self, state):
+
+        if self.periodic:
+            # get the box lengths from the vectors
+            box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"])
+
+        dist_list = np.zeros((len(self.pair_list)))
+        for i,p in enumerate(self.pair_list):
+            disp_vector = state["positions"][p[0]] - state["positions"][p[1]]
+            if self.periodic:
+                dist_list[i] = np.sqrt(np.sum(np.square(self._adjust_disp_vector(disp_vector,box_lengths))))
+            else:
+                dist_list[i] = np.sqrt(np.sum(np.square(disp_vector)))
+        
+        return dist_list
+
+    def image_distance(self, image_a, image_b):
+        return np.sqrt(np.mean(np.square(image_a - image_b)))
diff --git a/src/wepy/resampling/projectors/projector.py b/src/wepy/resampling/projectors/projector.py
new file mode 100644
index 00000000..822c9b28
--- /dev/null
+++ b/src/wepy/resampling/projectors/projector.py
@@ -0,0 +1,170 @@
+"""Modular component for defining distance metrics usable within many
+different resamplers.
+
+This module contains an abstract base class for Distance classes.
+
+The suggested implementation method is to leave the 'distance' method
+as is, and override the 'image' and 'image_distance' methods
+instead. Because the default 'distance' method calls these
+transparently. The resamplers will use the 'image' and
+'image_distance' calls because this allows performance optimizations.
+
+For example in WExplore the images for some walkers end up being
+stored as the definitions of Voronoi regions, and if the whole walker
+state was stored it would not only use much more space in memory but
+require that common transformations be repeated every time a distance
+is to be calculated to that image (which is many times). In REVO all
+to all distances between walkers are computed which would also incur a
+high cost.
+
+So use the 'image' to do precomputations on raw walker states and use
+the 'image_distance' to compute distances using only those images.
+
+"""
+# Standard Library
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Third Party Library
+import numpy as np
+from wepy.util.util import box_vectors_to_lengths_angles
+
+
+class Distance(object):
+    """Abstract Base class for Distance classes."""
+
+    def __init__(self):
+        """Constructor for Distance class."""
+        pass
+
+    def image(self, state):
+        """Compute the 'image' of a walker state which should be some
+        transformation of the walker state that is more
+        convenient. E.g. for precomputation of expensive operations or
+        for saving as resampler state.
+
+        The abstract implementation is naive and just returns the
+        state itself, thus it is the identity function.
+
+        Parameters
+        ----------
+        state : object implementing WalkerState
+            The state which will be transformed to an image
+
+        Returns
+        -------
+        image : object implementing WalkerState
+            The same state that was given as an argument.
+
+        """
+
+        return state
+
+    def image_distance(self, image_a, image_b):
+        """Compute the distance between two images of walker states.
+
+        Parameters
+        ----------
+        image_a : object produced by Distance.image
+
+        image_b : object produced by Distance.image
+
+        Returns
+        -------
+
+        distance : float
+            The distance between the two images
+
+        Raises
+        ------
+
+        NotImplementedError : always because this is abstract
+
+        """
+        raise NotImplementedError
+
+    def distance(self, state_a, state_b):
+        """Compute the distance between two states.
+
+        Parameters
+        ----------
+        state_a : object implementing WalkerState
+
+        state_b : object implementing WalkerState
+
+        Returns
+        -------
+
+        distance : float
+            The distance between the two walker states
+
+
+        """
+
+        return self.image_distance(self.image(state_a), self.image(state_b))
+
+
+class XYEuclideanDistance(Distance):
+    """2 dimensional euclidean distance between points.
+
+    States have the attributes 'x' and 'y'.
+
+    """
+
+    def image(self, state):
+        return np.array([state["x"], state["y"]])
+
+    def image_distance(self, image_a, image_b):
+        return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2)
+
+class AtomPairDistance(Distance):
+    """Constructs a vector of atomic distances for each state.
+    Distance is the root mean squared distance between the vectors.
+    """
+
+    def __init__(self, pair_list, periodic=True):
+        """Construct a distance metric.
+
+        Parameters
+        ----------
+
+        pair_list : arraylike of tuples
+            The indices of the atom pairs between which to compute
+            distances.
+
+        """
+        self.pair_list = pair_list
+        self.periodic = periodic
+
+    def _adjust_disp_vector(self, disp, box_lengths):
+        edited = True
+        while edited:
+            edited = False
+            for i in range(3):
+                if disp[i] > box_lengths[i]/2:
+                    disp[i] -= box_lengths[i]
+                    edited = True
+                elif disp[i] < -box_lengths[i]/2:
+                    disp[i] += box_lengths[i]
+                    edited = True
+        return disp
+        
+    def image(self, state):
+
+        if self.periodic:
+            # get the box lengths from the vectors
+            box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"])
+
+        dist_list = np.zeros((len(self.pair_list)))
+        for i,p in enumerate(self.pair_list):
+            disp_vector = state["positions"][p[0]] - state["positions"][p[1]]
+            if self.periodic:
+                dist_list[i] = np.sqrt(np.sum(np.square(self._adjust_disp_vector(disp_vector,box_lengths))))
+            else:
+                dist_list[i] = np.sqrt(np.sum(np.square(disp_vector)))
+        
+        return dist_list
+
+    def image_distance(self, image_a, image_b):
+        return np.sqrt(np.mean(np.square(image_a - image_b)))

From baf2b0754c15255bb3b119a0339a2c13681fd687 Mon Sep 17 00:00:00 2001
From: Alex Dickson <alexrd@msu.edu>
Date: Fri, 2 May 2025 17:26:20 -0400
Subject: [PATCH 2/6] projector-based revo resampler

---
 src/wepy/resampling/projectors/centroid.py  | 173 +------
 src/wepy/resampling/projectors/projector.py | 157 +-----
 src/wepy/resampling/projectors/tica.py      |  74 +++
 src/wepy/resampling/resamplers/revo.py      | 536 +++++++++++++++++++-
 4 files changed, 649 insertions(+), 291 deletions(-)
 create mode 100644 src/wepy/resampling/projectors/tica.py

diff --git a/src/wepy/resampling/projectors/centroid.py b/src/wepy/resampling/projectors/centroid.py
index 822c9b28..ebd0d4db 100644
--- a/src/wepy/resampling/projectors/centroid.py
+++ b/src/wepy/resampling/projectors/centroid.py
@@ -1,25 +1,4 @@
-"""Modular component for defining distance metrics usable within many
-different resamplers.
-
-This module contains an abstract base class for Distance classes.
-
-The suggested implementation method is to leave the 'distance' method
-as is, and override the 'image' and 'image_distance' methods
-instead. Because the default 'distance' method calls these
-transparently. The resamplers will use the 'image' and
-'image_distance' calls because this allows performance optimizations.
-
-For example in WExplore the images for some walkers end up being
-stored as the definitions of Voronoi regions, and if the whole walker
-state was stored it would not only use much more space in memory but
-require that common transformations be repeated every time a distance
-is to be calculated to that image (which is many times). In REVO all
-to all distances between walkers are computed which would also incur a
-high cost.
-
-So use the 'image' to do precomputations on raw walker states and use
-the 'image_distance' to compute distances using only those images.
-
+"""Projector for determining centroid distances.
 """
 # Standard Library
 import logging
@@ -28,143 +7,47 @@
 
 # Third Party Library
 import numpy as np
-from wepy.util.util import box_vectors_to_lengths_angles
-
-
-class Distance(object):
-    """Abstract Base class for Distance classes."""
-
-    def __init__(self):
-        """Constructor for Distance class."""
-        pass
-
-    def image(self, state):
-        """Compute the 'image' of a walker state which should be some
-        transformation of the walker state that is more
-        convenient. E.g. for precomputation of expensive operations or
-        for saving as resampler state.
-
-        The abstract implementation is naive and just returns the
-        state itself, thus it is the identity function.
-
-        Parameters
-        ----------
-        state : object implementing WalkerState
-            The state which will be transformed to an image
-
-        Returns
-        -------
-        image : object implementing WalkerState
-            The same state that was given as an argument.
-
-        """
-
-        return state
-
-    def image_distance(self, image_a, image_b):
-        """Compute the distance between two images of walker states.
-
-        Parameters
-        ----------
-        image_a : object produced by Distance.image
-
-        image_b : object produced by Distance.image
-
-        Returns
-        -------
-
-        distance : float
-            The distance between the two images
-
-        Raises
-        ------
 
-        NotImplementedError : always because this is abstract
-
-        """
-        raise NotImplementedError
-
-    def distance(self, state_a, state_b):
-        """Compute the distance between two states.
-
-        Parameters
-        ----------
-        state_a : object implementing WalkerState
-
-        state_b : object implementing WalkerState
-
-        Returns
-        -------
-
-        distance : float
-            The distance between the two walker states
-
-
-        """
-
-        return self.image_distance(self.image(state_a), self.image(state_b))
-
-
-class XYEuclideanDistance(Distance):
-    """2 dimensional euclidean distance between points.
-
-    States have the attributes 'x' and 'y'.
-
-    """
-
-    def image(self, state):
-        return np.array([state["x"], state["y"]])
-
-    def image_distance(self, image_a, image_b):
-        return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2)
+from wepy.resampling.projectors.projector import Projector
+from wepy.util.util import box_vectors_to_lengths_angles
+from geomm.grouping import group_pair
 
-class AtomPairDistance(Distance):
-    """Constructs a vector of atomic distances for each state.
-    Distance is the root mean squared distance between the vectors.
+class CentroidProjector(Projector):
+    """Projects a state onto the centroid distance between two groups.
     """
 
-    def __init__(self, pair_list, periodic=True):
-        """Construct a distance metric.
+    def __init__(self, group1_idxs, group2_idxs, periodic=True):
+        """Construct a centroid distance projector.
 
         Parameters
         ----------
 
-        pair_list : arraylike of tuples
-            The indices of the atom pairs between which to compute
-            distances.
-
+        group1_idxs : list of int - indices of atoms in group1
+        group2_idxs : list of int - indices of atoms in group2
+        periodic :    bool (default = True) - whether to use periodic boundary conditions to
+                      minimize centroid distances
         """
-        self.pair_list = pair_list
+        self.group1_idxs = group1_idxs
+        self.group2_idxs = group2_idxs
         self.periodic = periodic
+      
+    def project(self, state):
 
-    def _adjust_disp_vector(self, disp, box_lengths):
-        edited = True
-        while edited:
-            edited = False
-            for i in range(3):
-                if disp[i] > box_lengths[i]/2:
-                    disp[i] -= box_lengths[i]
-                    edited = True
-                elif disp[i] < -box_lengths[i]/2:
-                    disp[i] += box_lengths[i]
-                    edited = True
-        return disp
+        # cut out only the coordinates we need
+        coords = np.concatenate([state['positions'][self.group1_idxs],state['positions'][self.group2_idxs]])
+        idxs1 = list(range(len(self.group1_idxs)))
+        idxs2 = list(range(len(self.group1_idxs),len(self.group1_idxs) + len(self.group2_idxs)))
         
-    def image(self, state):
-
         if self.periodic:
             # get the box lengths from the vectors
             box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"])
+            coords = group_pair(coords,box_lengths,idxs1,idxs2)
+
+        # determine coordinate centroids
+        c1 = coords[idxs1].mean(axis=0)
+        c2 = coords[idxs2].mean(axis=0)
+
+        # return the distance between the centroids
+        return np.sqrt(np.sum(np.square(c1-c2)))
 
-        dist_list = np.zeros((len(self.pair_list)))
-        for i,p in enumerate(self.pair_list):
-            disp_vector = state["positions"][p[0]] - state["positions"][p[1]]
-            if self.periodic:
-                dist_list[i] = np.sqrt(np.sum(np.square(self._adjust_disp_vector(disp_vector,box_lengths))))
-            else:
-                dist_list[i] = np.sqrt(np.sum(np.square(disp_vector)))
-        
-        return dist_list
 
-    def image_distance(self, image_a, image_b):
-        return np.sqrt(np.mean(np.square(image_a - image_b)))
diff --git a/src/wepy/resampling/projectors/projector.py b/src/wepy/resampling/projectors/projector.py
index 822c9b28..8b333c69 100644
--- a/src/wepy/resampling/projectors/projector.py
+++ b/src/wepy/resampling/projectors/projector.py
@@ -1,25 +1,10 @@
-"""Modular component for defining distance metrics usable within many
-different resamplers.
+"""Modular component for defining "projectors" that project a 
+walker state into a one- or low-dimensional subspace. These are
+usable within different resamplers.
 
-This module contains an abstract base class for Distance classes.
-
-The suggested implementation method is to leave the 'distance' method
-as is, and override the 'image' and 'image_distance' methods
-instead. Because the default 'distance' method calls these
-transparently. The resamplers will use the 'image' and
-'image_distance' calls because this allows performance optimizations.
-
-For example in WExplore the images for some walkers end up being
-stored as the definitions of Voronoi regions, and if the whole walker
-state was stored it would not only use much more space in memory but
-require that common transformations be repeated every time a distance
-is to be calculated to that image (which is many times). In REVO all
-to all distances between walkers are computed which would also incur a
-high cost.
-
-So use the 'image' to do precomputations on raw walker states and use
-the 'image_distance' to compute distances using only those images.
+This module contains an abstract base class for Projector classes.
 
+This is similar to the 'image' function in a Distance object
 """
 # Standard Library
 import logging
@@ -28,24 +13,20 @@
 
 # Third Party Library
 import numpy as np
-from wepy.util.util import box_vectors_to_lengths_angles
 
-
-class Distance(object):
-    """Abstract Base class for Distance classes."""
+class Projector(object):
+    """Abstract Base class for Projector classes."""
 
     def __init__(self):
-        """Constructor for Distance class."""
+        """Constructor for Projector class."""
         pass
 
-    def image(self, state):
-        """Compute the 'image' of a walker state which should be some
-        transformation of the walker state that is more
-        convenient. E.g. for precomputation of expensive operations or
-        for saving as resampler state.
+    def project(self, state):
+        """Compute the 'projection' of a walker state onto one
+        or more variables.
 
-        The abstract implementation is naive and just returns the
-        state itself, thus it is the identity function.
+        The abstract implementation is naive and just returns the 
+        numpy array [1].
 
         Parameters
         ----------
@@ -54,117 +35,9 @@ def image(self, state):
 
         Returns
         -------
-        image : object implementing WalkerState
+        projection : numpy array
             The same state that was given as an argument.
 
         """
 
-        return state
-
-    def image_distance(self, image_a, image_b):
-        """Compute the distance between two images of walker states.
-
-        Parameters
-        ----------
-        image_a : object produced by Distance.image
-
-        image_b : object produced by Distance.image
-
-        Returns
-        -------
-
-        distance : float
-            The distance between the two images
-
-        Raises
-        ------
-
-        NotImplementedError : always because this is abstract
-
-        """
-        raise NotImplementedError
-
-    def distance(self, state_a, state_b):
-        """Compute the distance between two states.
-
-        Parameters
-        ----------
-        state_a : object implementing WalkerState
-
-        state_b : object implementing WalkerState
-
-        Returns
-        -------
-
-        distance : float
-            The distance between the two walker states
-
-
-        """
-
-        return self.image_distance(self.image(state_a), self.image(state_b))
-
-
-class XYEuclideanDistance(Distance):
-    """2 dimensional euclidean distance between points.
-
-    States have the attributes 'x' and 'y'.
-
-    """
-
-    def image(self, state):
-        return np.array([state["x"], state["y"]])
-
-    def image_distance(self, image_a, image_b):
-        return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2)
-
-class AtomPairDistance(Distance):
-    """Constructs a vector of atomic distances for each state.
-    Distance is the root mean squared distance between the vectors.
-    """
-
-    def __init__(self, pair_list, periodic=True):
-        """Construct a distance metric.
-
-        Parameters
-        ----------
-
-        pair_list : arraylike of tuples
-            The indices of the atom pairs between which to compute
-            distances.
-
-        """
-        self.pair_list = pair_list
-        self.periodic = periodic
-
-    def _adjust_disp_vector(self, disp, box_lengths):
-        edited = True
-        while edited:
-            edited = False
-            for i in range(3):
-                if disp[i] > box_lengths[i]/2:
-                    disp[i] -= box_lengths[i]
-                    edited = True
-                elif disp[i] < -box_lengths[i]/2:
-                    disp[i] += box_lengths[i]
-                    edited = True
-        return disp
-        
-    def image(self, state):
-
-        if self.periodic:
-            # get the box lengths from the vectors
-            box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"])
-
-        dist_list = np.zeros((len(self.pair_list)))
-        for i,p in enumerate(self.pair_list):
-            disp_vector = state["positions"][p[0]] - state["positions"][p[1]]
-            if self.periodic:
-                dist_list[i] = np.sqrt(np.sum(np.square(self._adjust_disp_vector(disp_vector,box_lengths))))
-            else:
-                dist_list[i] = np.sqrt(np.sum(np.square(disp_vector)))
-        
-        return dist_list
-
-    def image_distance(self, image_a, image_b):
-        return np.sqrt(np.mean(np.square(image_a - image_b)))
+        return np.ones((1))
\ No newline at end of file
diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py
new file mode 100644
index 00000000..edb7855d
--- /dev/null
+++ b/src/wepy/resampling/projectors/tica.py
@@ -0,0 +1,74 @@
+"""Projectors into a pre-trained tICA space.
+"""
+# Standard Library
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Third Party Library
+import numpy as np
+
+from wepy.resampling.projectors.projector import Projector
+from wepy.util.util import box_vectors_to_lengths_angles
+#from geomm.grouping import shorten_vec
+
+def shorten_vec(x, unitcell_side_lengths):
+    """
+    For a given vector x between two points in a periodic box, return the
+    shortest version of that vector.
+    """
+    pos_idxs = np.where(x > 0.5*unitcell_side_lengths)[0]
+
+    for dim_idx in pos_idxs:
+        x[dim_idx] += unitcell_side_lengths[dim_idx]
+
+    neg_idxs = np.where(x < -0.5*unitcell_side_lengths)[0]
+
+    for dim_idx in neg_idxs:
+        x[dim_idx] -= unitcell_side_lengths[dim_idx]
+
+    return x
+
+class DistanceTICAProjector(Projector):
+    """Projects a state into a predefined TICA space, using a set of distances as intermediate features.
+    """
+
+    def __init__(self, dist_idxs, tica_vectors, periodic=True):
+        """Construct a DistanceTICA projector.
+
+        Parameters
+        ----------
+
+        dist_idxs : np.array of shape (nd,2) - indices of atoms for computing distances
+        tica_vectors : np.array of shape (nt,nd) - vectors for projecting into tica space
+        periodic :    bool (default = True) - whether to use periodic boundary conditions to
+                      minimize pair distances
+        """
+        self.dist_idxs = np.array(dist_idxs)
+        self.tica_vectors = np.array(tica_vectors)
+        self.periodic = periodic
+
+        assert self.dist_idxs.shape[0] == self.tica_vectors.shape[1], "Number of distances needs to match dimensionality of tica vectors!"
+        assert self.dist_idxs.shape[1] == 2, "Distance indices have incorrect shape"
+
+        self.ndim = self.tica_vectors.shape[0]
+      
+    def project(self, state):
+
+        # get all the displacement vectors
+        disp_vecs = state['positions'][self.dist_idxs[:,0]] - state['positions'][self.dist_idxs[:,1]]
+            
+        if self.periodic:
+            # get the box lengths from the vectors
+            box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"])
+            dists = np.array([np.sqrt(np.sum(np.square(shorten_vec(v)))) for v in disp_vecs])
+        else:
+            dists = np.array([np.sqrt(np.sum(np.square(v))) for v in disp_vecs])
+            
+        # calculate projections
+        proj = np.zeros(self.ndim)
+        for i in range(self.ndim):
+            proj[i] = np.dot(self.tica_vectors[i], dists)
+        
+        return proj
+
diff --git a/src/wepy/resampling/resamplers/revo.py b/src/wepy/resampling/resamplers/revo.py
index 6f8fedc7..4080437c 100644
--- a/src/wepy/resampling/resamplers/revo.py
+++ b/src/wepy/resampling/resamplers/revo.py
@@ -84,10 +84,6 @@ class REVOResampler(CloneMergeResampler):
 
            - variation : the final value of trajectory variation
 
-           - images : the images of walkers that is defined by the distance object
-
-           - image_shape : the shape of the image
-
     The algorithm saves the records of cloning and merging
     information in resampling data.
 
@@ -741,3 +737,535 @@ def resample(self, walkers):
         ]
 
         return resampled_walkers, resampling_data, resampler_data
+
+class REVOProjectionResampler(REVOResampler):
+    r"""Resampler implementing a modified REVO algorithm that works in a
+    projection space.  The distance between walkers in the projection space
+    is used to guide merging, and a sum of "single-body" terms is used
+    as the variation function V.  The single-body terms are also calculated 
+    using the walker projections.
+
+    This algorithm was originally proposed here:
+    https://doi.org/10.1063/5.0239713
+
+    The objective function in that work was defined as
+    .. math::
+        V = \sum_{i} V_i = \sum_i \frac{1}{d^i_{\text{from_target}}}
+
+    and here it is implemented such that any quantity can be used
+    for V_i that depends solely on walker i.
+
+    As in the original REVO, this algorithm needs the following parameters:
+
+       pmin: the minimum statistical weight. REVO does not clone
+       walkers with a weight less than pmin.
+
+       pmax: The maximum statistical weight. It prevents the
+       accumulation of too much weight in one walker.
+
+       merge_dist: This is the merge-distance threshold. The distance
+        between merged walkers should be less than this value.
+
+    The resample function, called during every cycle, takes the
+    ensemble of walkers and performs the follow steps:
+
+       - Evaluates V_i for each walker
+       - Decides which walkers should be merged or cloned
+       - Applies the cloning and merging decisions to get the resampled walkers
+       - Creates the resampling data that includes
+       
+           - n_walkers : the number of walkers. number of walkers is
+             kept constant thought the resampling.
+
+           - walker_variations : the Vi values for each walker
+
+           - walker_projections : the projections for each walker
+
+           - variation : the final value of trajectory variation (V)
+
+    The algorithm saves the records of cloning and merging
+    information in resampling data.
+
+    Only the net clones and merges are recorded in the resampling records.
+
+    """
+
+    # fields that can be used for a table like representation
+    RESAMPLING_RECORD_FIELDS = CloneMergeResampler.RESAMPLING_RECORD_FIELDS
+
+    # fields for resampling data
+    RESAMPLER_FIELDS = CloneMergeResampler.RESAMPLER_FIELDS + (
+        "num_walkers",
+        "walker_variations",
+        "walker_projections",
+        "variation",
+    )
+    RESAMPLER_SHAPES = CloneMergeResampler.RESAMPLER_SHAPES + (
+        (1,),
+        Ellipsis,
+        Ellipsis,
+        (1,),
+    )
+    RESAMPLER_DTYPES = CloneMergeResampler.RESAMPLER_DTYPES + (
+        int,
+        float,
+        float,
+        float,
+    )
+
+    # fields that can be used for a table like representation
+    RESAMPLER_RECORD_FIELDS = CloneMergeResampler.RESAMPLER_RECORD_FIELDS + (
+        "variation",
+    )
+
+    def __init__(
+        self,
+        merge_dist=None,
+        projector=None,
+        max_function=None,
+        init_state=None,
+        merge_alg="pairs",
+        pmin=1e-12,
+        pmax=0.1,
+        seed=None,
+        num_proc=1,
+        **kwargs
+    ):
+        """Constructor for the REVO Property Resampler.
+
+        Parameters
+        ----------
+
+        merge_dist : float
+            The merge distance threshold. Will be compared with Euclidean
+            distances between projection values, e.g.:
+            np.sqrt(np.sum(np.square(p1-p2))).
+
+        projector : object implementing Project
+            The object that projects walkers into a reduced dimensional space.
+
+        max_function : function (float, np.array) -> float
+            Uses the weight and a projection for one walker (np.array) as input, returns Vi.
+            The resampler will promote maximization of this quantity.
+
+        merge_alg : string
+            Indication of which algorithm is used to find pairs to merge.
+
+            'pairs' (default) indicates that a list of all suitable pairs is generated,
+            and the pair that minimizes the expected variation loss is chosen.
+
+            'greedy' indicates that first the lowest variation walker is
+            selected, and this is attempted to be merged with the closest
+            suitable walker
+
+        init_state : WalkerState object
+            Used for automatically determining the state image shape.
+
+        seed : None or int, optional
+            The random seed. If None, the system (random) one will be used.
+
+        num_proc : int, optional
+            The number of processors used to calculate the walker images
+            and all-to-all distances.
+
+        """
+
+        # call the init methods in the CloneMergeResampler
+        # superclass. We set the min and max number of walkers to be
+        # constant
+        super(REVOResampler,self).__init__(
+            pmin=pmin,
+            pmax=pmax,
+            min_num_walkers=Ellipsis,
+            max_num_walkers=Ellipsis,
+            **kwargs
+        )
+
+        assert merge_dist is not None, "Merge distance must be given."
+        assert projector is not None, "Projector object must be given."
+        assert max_function is not None, "Maximization function must be given"
+        assert init_state is not None, "An initial state must be given."
+
+        self.merge_dist = merge_dist
+        self.merge_alg = merge_alg
+
+        # the projector function
+        self.projector = projector
+
+        # the maximization function
+        self.max_function = max_function
+
+        # setting the random seed
+        self.seed = seed
+        if seed is not None:
+            rand.seed(seed)
+
+        # setting the number of processors
+        self.num_proc = num_proc
+
+    def _calc_variation(self, walker_weights, num_walker_copies, projection_list):
+        """Calculates the variation value.
+
+        Parameters
+        ----------
+
+        walker_weights : list of float
+            The weights of all walkers. The sum of all weights should be 1.0.
+
+        num_walker_copies : list of int
+            The number of copies of each walker.
+            0 means the walker does not exist anymore.
+            1 means there is one copy of this walker.
+            >1 means it will be cloned to this number of walkers.
+
+        distance_matrix : list of arraylike of shape (num_walkers)
+
+        Returns
+        -------
+        variation : float
+           The calculated variation value.
+
+        walker_variations : arraylike of shape (num_walkers)
+           The Vi value of each walker.
+
+        """
+
+        num_walkers = len(walker_weights)
+
+        # the value to be optimized
+        variation = 0
+
+        # the walker variation values (Vi values)
+        walker_variations = np.zeros(num_walkers)
+
+        # calculate the variation and walker variation values
+        for i in range(num_walkers):
+            if num_walker_copies[i] > 0:
+                variation += num_walker_copies[i]*self.max_function(walker_weights[i]/num_walker_copies[i],projection_list[i])
+                walker_variations[i] = self.max_function(walker_weights[i]/num_walker_copies[i],projection_list[i])
+
+        return variation, walker_variations
+
+    def decide(self, walker_weights, num_walker_copies, distance_matrix, projection_list):
+        """Optimize the trajectory variation by making decisions for resampling.
+
+        Parameters
+        ----------
+
+        walker_weights : list of float
+            The weights of all walkers. The sum of all weights should be 1.0.
+
+        num_walker_copies : list of int
+            The number of copies of each walker.
+            0 means the walker is not exists anymore.
+            1 means there is one of the this walker.
+            >1 means it should be cloned to this number of walkers.
+
+        distance_matrix : list of arraylike of shape (num_walkers)
+
+        Returns
+        -------
+        variation : float
+            The optimized value of the trajectory variation.
+
+        resampling_data : list of dict of str: value
+            The resampling records resulting from the decisions.
+
+        """
+        num_walkers = len(walker_weights)
+
+        variations = []
+        merge_groups = [[] for i in range(num_walkers)]
+        walker_clone_nums = [0 for i in range(num_walkers)]
+
+        # make copy of walkers properties
+        new_walker_weights = walker_weights.copy()
+        new_num_walker_copies = num_walker_copies.copy()
+
+        # calculate the initial variation which will be optimized
+        variation, walker_variations = self._calc_variation(
+            walker_weights, new_num_walker_copies, projection_list
+        )
+        variations.append(variation)
+
+        # maximize the variance through cloning and merging
+        logger.info("Starting variance optimization: {}".format(variation))
+
+        productive = True
+        while productive:
+            productive = False
+            # find min and max walker_variationss, alter new_amp
+
+            # initialize to None, we may not find one of each
+            min_idx = None
+            max_idx = None
+
+            # selects a walker with minimum walker_variations and a walker with
+            # maximum walker_variations walker (distance to other walkers) will be
+            # tagged for cloning (stored in maxwind), except if it is
+            # already a keep merge target
+            max_tups = []
+            for i, value in enumerate(walker_variations):
+                # 1. must have an amp >=1 which gives the number of clones to be made of it
+                # 2. clones for the given amplitude must not be smaller than the minimum probability
+                # 3. must not already be a keep merge target
+                if (
+                    (new_num_walker_copies[i] >= 1)
+                    and (
+                        new_walker_weights[i] / (new_num_walker_copies[i] + 1)
+                        > self.pmin
+                    )
+                    and (len(merge_groups[i]) == 0)
+                ):
+                    max_tups.append((value, i))
+
+            if len(max_tups) > 0:
+                max_value, max_idx = max(max_tups)
+
+            merge_pair = []
+            if self.merge_alg == "pairs":
+                pot_merge_pairs = self._find_eligible_merge_pairs(
+                    new_walker_weights, distance_matrix, max_idx, new_num_walker_copies
+                )
+                merge_pair = self._calc_variation_loss(
+                    walker_variations, new_walker_weights, pot_merge_pairs
+                )
+            elif self.merge_alg == "greedy":
+                # walker with the lowest walker_variations (distance to other walkers)
+                # will be tagged for merging (stored in min_idx)
+                min_tups = [
+                    (value, i)
+                    for i, value in enumerate(walker_variations)
+                    if new_num_walker_copies[i] == 1
+                    and (new_walker_weights[i] < self.pmax)
+                ]
+
+                if len(min_tups) > 0:
+                    min_value, min_idx = min(min_tups)
+
+                # does min_idx have an eligible merging partner?
+                closewalk = None
+                condition_list = np.array([i is not None for i in [min_idx, max_idx]])
+                if condition_list.all() and min_idx != max_idx:
+                    # get the walkers that aren't the minimum and the max
+                    # walker_variations walkers, as candidates for merging
+                    closewalks = set(range(num_walkers)).difference([min_idx, max_idx])
+
+                    # remove those walkers that if they were merged with
+                    # the min walker_variations walker would violate the pmax
+                    closewalks = [
+                        idx
+                        for idx in closewalks
+                        if (new_num_walker_copies[idx] == 1)
+                        and (
+                            new_walker_weights[idx] + new_walker_weights[min_idx]
+                            < self.pmax
+                        )
+                    ]
+
+                    # if there are any walkers left, get the distances of
+                    # the close walkers to the min walker_variations walker if that
+                    # distance is less than the maximum merge distance
+                    if len(closewalks) > 0:
+                        closewalks_dists = [
+                            (distance_matrix[min_idx][i], i)
+                            for i in closewalks
+                            if distance_matrix[min_idx][i] < (self.merge_dist)
+                        ]
+
+                    # if any were found set this as the closewalk
+                    if len(closewalks_dists) > 0:
+                        closedist, closewalk = min(closewalks_dists)
+                        merge_pair = [min_idx, closewalk]
+
+            else:
+                raise ValueError("Unrecognized value for merge_alg in REVO")
+
+            # did we find a suitable pair to merge?
+            if len(merge_pair) != 0:
+                min_idx = merge_pair[0]
+                closewalk = merge_pair[1]
+
+                # change new_amp
+                tempsum = new_walker_weights[min_idx] + new_walker_weights[closewalk]
+                new_num_walker_copies[min_idx] = new_walker_weights[min_idx] / tempsum
+                new_num_walker_copies[closewalk] = (
+                    new_walker_weights[closewalk] / tempsum
+                )
+                new_num_walker_copies[max_idx] += 1
+
+                # re-determine variation function, and walker_variations values
+                new_variation, walker_variations = self._calc_variation(
+                    new_walker_weights, new_num_walker_copies, projection_list
+                )
+
+                if new_variation > variation:
+                    variations.append(new_variation)
+
+                    logger.info("Variance move to {} accepted".format(new_variation))
+
+                    productive = True
+                    variation = new_variation
+
+                    # make a decision on which walker to keep
+                    # (min_idx, or closewalk), equivalent to:
+                    # `random.choices([closewalk, min_idx],
+                    #                 weights=[new_walker_weights[closewalk], new_walker_weights[min_idx])`
+                    r = rand.uniform(
+                        0.0, new_walker_weights[closewalk] + new_walker_weights[min_idx]
+                    )
+
+                    # keeps closewalk and gets rid of min_idx
+                    if r < new_walker_weights[closewalk]:
+                        keep_idx = closewalk
+                        squash_idx = min_idx
+
+                    # keep min_idx, get rid of closewalk
+                    else:
+                        keep_idx = min_idx
+                        squash_idx = closewalk
+
+                    # update weight
+                    new_walker_weights[keep_idx] += new_walker_weights[squash_idx]
+                    new_walker_weights[squash_idx] = 0.0
+
+                    # update new_num_walker_copies
+                    new_num_walker_copies[squash_idx] = 0
+                    new_num_walker_copies[keep_idx] = 1
+
+                    # add the squash index to the merge group
+                    merge_groups[keep_idx].append(squash_idx)
+
+                    # add the indices of the walkers that were already
+                    # in the merge group that was just squashed
+                    merge_groups[keep_idx].extend(merge_groups[squash_idx])
+
+                    # reset the merge group that was just squashed to empty
+                    merge_groups[squash_idx] = []
+
+                    # increase the number of clones that the cloned
+                    # walker has
+                    walker_clone_nums[max_idx] += 1
+
+                    # new variation for starting new stage
+                    new_variation, walker_variations = self._calc_variation(
+                        new_walker_weights, new_num_walker_copies, projection_list
+                    )
+                    variations.append(new_variation)
+
+                    logger.info("variance after selection: {}".format(new_variation))
+
+                # if not productive
+                else:
+                    new_num_walker_copies[min_idx] = 1
+                    new_num_walker_copies[closewalk] = 1
+                    new_num_walker_copies[max_idx] -= 1
+
+        # given we know what we want to clone to specific slots
+        # (squashing other walkers) we need to determine where these
+        # squashed walkers will be merged
+        walker_actions = self.assign_clones(merge_groups, walker_clone_nums)
+
+        # because there is only one step in resampling here we just
+        # add another field for the step as 0 and add the walker index
+        # to its record as well
+        for walker_idx, walker_record in enumerate(walker_actions):
+            walker_record["step_idx"] = np.array([0])
+            walker_record["walker_idx"] = np.array([walker_idx])
+
+        return walker_actions, variations[-1], walker_variations
+
+    def _all_to_all_distance(self, projection_list):
+        """Calculate the pairwise all-to-all distances between walkers.
+
+        Parameters
+        ----------
+        projection_list : list of numpy arrays
+
+
+        Returns
+        -------
+        distance_matrix : np.array(n_walkers,n_walkers)
+
+        """
+        # initialize an all-to-all matrix, with 0.0 for self distances
+        n_walkers = len(projection_list)
+        dist_mat = np.zeros((n_walkers, n_walkers))
+
+        for i in range(n_walkers-1):
+            for j in range(i,n_walkers):
+                dist_mat[i,j] = np.sqrt(np.sum(np.square(projection_list[i] - projection_list[j])))
+                dist_mat[j,i] = dist_mat[i,j]
+        
+        return dist_mat
+
+    def _calc_projection_list(self, walkers):
+        if self.num_proc > 1:
+            pool = mulproc.Pool(self.num_proc)
+            projs = pool.map(self.projector.project, [walker.state for walker in walkers])
+        else:
+            projs = []
+            for walker in walkers:
+                projs.append(self.projector.project(walker.state))
+        
+        return projs
+
+    def resample(self, walkers):
+        """Resamples walkers based on REVOProjectionResampler algorithm
+
+        Parameters
+        ----------
+        walkers : list of walkers
+
+
+        Returns
+        -------
+        resampled_walkers : list of resampled_walkers
+
+        resampling_data : list of dict of str: value
+            The resampling records resulting from the decisions.
+
+        resampler_data :list of dict of str: value
+            The resampler records resulting from the resampler actions.
+
+        """
+
+        # initialize the parameters
+        num_walkers = len(walkers)
+        walker_weights = [walker.weight for walker in walkers]
+
+        # Needs to be floats to do partial amps during second variation calculations.
+        num_walker_copies = np.ones(num_walkers)
+
+        # calculate projection list
+        projection_list = self._calc_projection_list(walkers)
+
+        # calculate distance matrix
+        distance_matrix = self._all_to_all_distance(projection_list)
+
+        # determine cloning and merging actions to be performed, by
+        # maximizing the variation, i.e. the Decider
+        resampling_data, variation, walker_variations = self.decide(
+            walker_weights, num_walker_copies, distance_matrix, projection_list
+        )
+
+        # convert the target idxs and decision_id to feature vector arrays
+        for record in resampling_data:
+            record["target_idxs"] = np.array(record["target_idxs"])
+            record["decision_id"] = np.array([record["decision_id"]])
+
+        # actually do the cloning and merging of the walkers
+        resampled_walkers = self.DECISION.action(walkers, [resampling_data])
+
+        # flatten the distance matrix and give the number of walkers
+        # as well for the resampler data, there is just one per cycle
+        resampler_data = [
+            {
+                "walker_projections": np.ravel(np.array(projection_list)),
+                "walker_variations:": np.ravel(np.array(walker_variations)),
+                "num_walkers": np.array([len(walkers)]),
+                "variation": np.array([variation]),
+            }
+        ]
+
+        return resampled_walkers, resampling_data, resampler_data

From 0cc65a5b821a67d6461bccd3138e2922fb82cd71 Mon Sep 17 00:00:00 2001
From: Samik Bose <bosesami@msu.edu>
Date: Thu, 8 Jan 2026 11:28:42 -0500
Subject: [PATCH 3/6] Update the CoordTICAProjector

---
 src/wepy/resampling/distances/distance.py |  22 ++++
 src/wepy/resampling/projectors/tica.py    | 134 ++++++++++++++++++++--
 src/wepy/runners/openmm.py                |   1 -
 3 files changed, 144 insertions(+), 13 deletions(-)

diff --git a/src/wepy/resampling/distances/distance.py b/src/wepy/resampling/distances/distance.py
index 822c9b28..d379b94c 100644
--- a/src/wepy/resampling/distances/distance.py
+++ b/src/wepy/resampling/distances/distance.py
@@ -118,6 +118,28 @@ def image(self, state):
     def image_distance(self, image_a, image_b):
         return np.sqrt((image_a[0] - image_b[0]) ** 2 + (image_a[1] - image_b[1]) ** 2)
 
+class ProjectorDistance(Distance):
+    """Take a projector as input 
+        
+    """
+    def __init__(self, projector):
+       """Construct a distance metric.
+
+        Parameters
+        ----------
+
+        projector : A Projector object, which implementes the project function 
+        """
+        
+        self.projector = projector
+
+    def image(self, state):
+        return self.projector.project(state)
+        
+    def image_distance(self, image_a, image_b):
+        return np.sqrt(np.sum(np.square(image_a - image_b)))
+
+
 class AtomPairDistance(Distance):
     """Constructs a vector of atomic distances for each state.
     Distance is the root mean squared distance between the vectors.
diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py
index edb7855d..f6393a2e 100644
--- a/src/wepy/resampling/projectors/tica.py
+++ b/src/wepy/resampling/projectors/tica.py
@@ -20,39 +20,50 @@ def shorten_vec(x, unitcell_side_lengths):
     pos_idxs = np.where(x > 0.5*unitcell_side_lengths)[0]
 
     for dim_idx in pos_idxs:
-        x[dim_idx] += unitcell_side_lengths[dim_idx]
+        x[dim_idx] -= unitcell_side_lengths[dim_idx]
 
     neg_idxs = np.where(x < -0.5*unitcell_side_lengths)[0]
 
     for dim_idx in neg_idxs:
-        x[dim_idx] -= unitcell_side_lengths[dim_idx]
+        x[dim_idx] += unitcell_side_lengths[dim_idx]
 
     return x
 
+
+def build_coord_feature(state, ref_pos, idxs,):
+
+    
+    return aligned_feat_coord
+    
+    
 class DistanceTICAProjector(Projector):
     """Projects a state into a predefined TICA space, using a set of distances as intermediate features.
     """
 
-    def __init__(self, dist_idxs, tica_vectors, periodic=True):
+    def __init__(self, dist_idxs, tica_model, periodic=True):
         """Construct a DistanceTICA projector.
 
         Parameters
         ----------
 
         dist_idxs : np.array of shape (nd,2) - indices of atoms for computing distances
-        tica_vectors : np.array of shape (nt,nd) - vectors for projecting into tica space
+        
+        tica_model : Deeptime or equivalent object that has a transform function, which 
+                    will be used to transform the distances into tica space.
+        
         periodic :    bool (default = True) - whether to use periodic boundary conditions to
                       minimize pair distances
         """
+
         self.dist_idxs = np.array(dist_idxs)
-        self.tica_vectors = np.array(tica_vectors)
+        self.model = tica_model
         self.periodic = periodic
 
-        assert self.dist_idxs.shape[0] == self.tica_vectors.shape[1], "Number of distances needs to match dimensionality of tica vectors!"
-        assert self.dist_idxs.shape[1] == 2, "Distance indices have incorrect shape"
+        # check for transform type functions
+        if hasattr(self.model, 'transform'):
 
-        self.ndim = self.tica_vectors.shape[0]
-      
+        self.ndim = self.model.dim
+    
     def project(self, state):
 
         # get all the displacement vectors
@@ -61,14 +72,113 @@ def project(self, state):
         if self.periodic:
             # get the box lengths from the vectors
             box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"])
-            dists = np.array([np.sqrt(np.sum(np.square(shorten_vec(v)))) for v in disp_vecs])
+            dists = np.array([np.sqrt(np.sum(np.square(shorten_vec(v, box_lengths)))) for v in disp_vecs])
         else:
             dists = np.array([np.sqrt(np.sum(np.square(v))) for v in disp_vecs])
             
+        tranformed_dists = self.model.transform(dists)
+
         # calculate projections
-        proj = np.zeros(self.ndim)
+        proj = np.zeros(self.ndim, tranformed_dists.shape[0])
         for i in range(self.ndim):
-            proj[i] = np.dot(self.tica_vectors[i], dists)
+            proj[i] = np.dot(tranformed_dists[i], dists)
+        
+        return proj
+
+class CoordTICAProjector(Projector):
+    """Projects a state into a predefined TICA space, using selected
+    Cartesian coordinates (e.g., Cα atoms) as features.
+
+    The feature vector is constructed by taking the positions of a
+    fixed set of atoms and flattening them to shape (natoms*3,).
+    """
+
+    def __init__(self, atom_idxs, tica_vectors, alignment_idxs,
+                 periodic=True):
+        """
+        Parameters
+        ----------
+        atom_idxs : array-like of shape (natoms,)
+            Indices of the atoms whose coordinates are used as features.
+            The order MUST match the order used when training TICA.
+
+        model: tica model to extract the vectors
+        
+        tica_vectors : np.ndarray of shape (ntica, natoms*3)
+            TICA eigenvectors for projecting into TICA space.
+
+        alignment_idxs: array-like of shape (align_atoms,) 
+            Indices of atoms whose coordinates are used as the reference for 
+            alignment of the frames. These atoms MUST match the atoms that
+            were used to align the frames before tica model training.
+
+        periodic : bool, default=True
+            Whether to take periodic boundary conditions into account.
+
+        """
+
+        self.atom_idxs = np.asarray(atom_idxs, dtype=int)
+        self.tica_vectors = np.asarray(tica_vectors, dtype=float)
+        self.periodic = periodic
+        self.alignment_idxs = alignment_idxs
+
+        natoms = self.atom_idxs.shape[0]
+        nfeat = natoms * 3
+
+        assert self.tica_vectors.ndim == 2, "tica_vectors must be 2D"
+        assert self.tica_vectors.shape[1] == nfeat, \
+            f"TICA vectors expect {self.tica_vectors.shape[1]} features, " \
+            f"but natoms*3 = {nfeat}"
+
+        self.ndim = self.tica_vectors.shape[0]
+
+    def project(self, state):
+        """
+        Project the given walker state into TICA space.
+
+        Parameters
+        ----------
+        state : dict-like
+            Must contain 'positions' with shape (N, 3) and, if
+            periodic=True, 'box_vectors'.
+
+        Returns
+        -------
+        proj : np.ndarray of shape (ntica,)
+            TICA coordinates for this state.
+        """
+        ## positions of selected atoms: (natoms, 3)
+        #pos = state['positions'][self.atom_idxs].copy()
+        ## 
+        #ref
+
+        ## ALWAYS ALIGN WITH A SPECIFIC SELECTION
+        ## USING GEOMM AND GROUP PAIR< CENTER AROUND
+        ## 
         
+        if self.periodic:
+            # use minimum-image convention for each displacement
+            box_lengths, _ = box_vectors_to_lengths_angles(state["box_vectors"])
+            for i in range(pos.shape[0]):
+                disp = pos[i] - ref
+                disp = shorten_vec(disp, box_lengths)
+                pos[i] = disp
+        else:
+            pos -= ref
+        #else:
+        #    # optional: still can correct for PBC wrt some origin, but
+        #    # usually you want anchor_first_atom=True for coord-based TICA
+        #    if self.periodic:
+        #        logger.warning("periodic=True but anchor_first_atom=False; "
+        #                       "coordinates may be discontinuous across PBCs.")
+
+
+
+        # flatten to (natoms*3,)
+        feat_vec = pos.reshape(-1)
+        # project into TICA space: (ntica, nfeat) @ (nfeat,) -> (ntica,)
+        proj = self.tica_vectors.dot(feat_vec)
+
         return proj
 
+
diff --git a/src/wepy/runners/openmm.py b/src/wepy/runners/openmm.py
index c9c863ff..c59084a9 100644
--- a/src/wepy/runners/openmm.py
+++ b/src/wepy/runners/openmm.py
@@ -289,7 +289,6 @@ def __init__(
         self.platform_kwargs = platform_kwargs
 
         self.enforce_box = enforce_box
-        self.get_parameter_derivs = get_parameter_derivs
         
         self.getState_kwargs = dict(GET_STATE_KWARG_DEFAULTS)
         # update with the user based enforce_box

From f48b6e3b766d851f4ca7c6f6a3292591e4cf86c1 Mon Sep 17 00:00:00 2001
From: Samik Bose <bosesami@msu.edu>
Date: Thu, 8 Jan 2026 11:43:57 -0500
Subject: [PATCH 4/6] Update the CoordTICAProjector

---
 src/wepy/resampling/projectors/tica.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py
index f6393a2e..9e68787c 100644
--- a/src/wepy/resampling/projectors/tica.py
+++ b/src/wepy/resampling/projectors/tica.py
@@ -76,13 +76,13 @@ def project(self, state):
         else:
             dists = np.array([np.sqrt(np.sum(np.square(v))) for v in disp_vecs])
             
-        tranformed_dists = self.model.transform(dists)
-
-        # calculate projections
-        proj = np.zeros(self.ndim, tranformed_dists.shape[0])
-        for i in range(self.ndim):
-            proj[i] = np.dot(tranformed_dists[i], dists)
+        proj = self.model.transform(dists)
         
+        ## calculate projections
+        #proj = np.zeros(self.ndim, tranformed_dists.shape[0])
+        #for i in range(self.ndim):
+        #    proj[i] = np.dot(tranformed_dists[i], dists)
+        # 
         return proj
 
 class CoordTICAProjector(Projector):

From 61d17469ef2ce3343dd43f2a65e7ed03b41072f6 Mon Sep 17 00:00:00 2001
From: Samik Bose <bosesami@msu.edu>
Date: Wed, 14 Jan 2026 14:02:28 -0500
Subject: [PATCH 5/6] TODO: CoordTICAProjector()

---
 src/wepy/resampling/projectors/tica.py | 20 +++++++++++---------
 src/wepy/resampling/resamplers/revo.py |  4 ++--
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py
index 9e68787c..a95f41ff 100644
--- a/src/wepy/resampling/projectors/tica.py
+++ b/src/wepy/resampling/projectors/tica.py
@@ -10,7 +10,6 @@
 
 from wepy.resampling.projectors.projector import Projector
 from wepy.util.util import box_vectors_to_lengths_angles
-#from geomm.grouping import shorten_vec
 
 def shorten_vec(x, unitcell_side_lengths):
     """
@@ -32,6 +31,13 @@ def shorten_vec(x, unitcell_side_lengths):
 
 def build_coord_feature(state, ref_pos, idxs,):
 
+    # get the state from the simulation
+
+    # extract the position
+
+    # now use the same idxs to align
+
+    # return the aligned coords
     
     return aligned_feat_coord
     
@@ -56,12 +62,12 @@ def __init__(self, dist_idxs, tica_model, periodic=True):
         """
 
         self.dist_idxs = np.array(dist_idxs)
-        self.model = tica_model
         self.periodic = periodic
 
+        self.model = tica_model
         # check for transform type functions
-        if hasattr(self.model, 'transform'):
-
+        #if hasattr(self.model, 'transform'):
+        #    
         self.ndim = self.model.dim
     
     def project(self, state):
@@ -78,11 +84,6 @@ def project(self, state):
             
         proj = self.model.transform(dists)
         
-        ## calculate projections
-        #proj = np.zeros(self.ndim, tranformed_dists.shape[0])
-        #for i in range(self.ndim):
-        #    proj[i] = np.dot(tranformed_dists[i], dists)
-        # 
         return proj
 
 class CoordTICAProjector(Projector):
@@ -147,6 +148,7 @@ def project(self, state):
         proj : np.ndarray of shape (ntica,)
             TICA coordinates for this state.
         """
+        
         ## positions of selected atoms: (natoms, 3)
         #pos = state['positions'][self.atom_idxs].copy()
         ## 
diff --git a/src/wepy/resampling/resamplers/revo.py b/src/wepy/resampling/resamplers/revo.py
index 4080437c..bb19bff4 100644
--- a/src/wepy/resampling/resamplers/revo.py
+++ b/src/wepy/resampling/resamplers/revo.py
@@ -756,7 +756,7 @@ class REVOProjectionResampler(REVOResampler):
     for V_i that depends solely on walker i.
 
     As in the original REVO, this algorithm needs the following parameters:
-
+proximity_score
        pmin: the minimum statistical weight. REVO does not clone
        walkers with a weight less than pmin.
 
@@ -1262,7 +1262,7 @@ def resample(self, walkers):
         resampler_data = [
             {
                 "walker_projections": np.ravel(np.array(projection_list)),
-                "walker_variations:": np.ravel(np.array(walker_variations)),
+                "walker_variations": np.ravel(np.array(walker_variations)),
                 "num_walkers": np.array([len(walkers)]),
                 "variation": np.array([variation]),
             }

From 7055f0676b9ef7be7b22463c3a7b0c9030c08268 Mon Sep 17 00:00:00 2001
From: Samik Bose <bosesami@msu.edu>
Date: Mon, 9 Mar 2026 16:52:32 -0400
Subject: [PATCH 6/6] tica projectors-distances updated

---
 src/wepy/resampling/distances/distance.py |   4 +-
 src/wepy/resampling/projectors/tica.py    | 244 ++++++++++++++--------
 2 files changed, 156 insertions(+), 92 deletions(-)

diff --git a/src/wepy/resampling/distances/distance.py b/src/wepy/resampling/distances/distance.py
index d379b94c..bd9ec4fd 100644
--- a/src/wepy/resampling/distances/distance.py
+++ b/src/wepy/resampling/distances/distance.py
@@ -123,11 +123,11 @@ class ProjectorDistance(Distance):
         
     """
     def __init__(self, projector):
-       """Construct a distance metric.
+        """Construct a distance metric.
 
         Parameters
         ----------
-
+        
         projector : A Projector object, which implementes the project function 
         """
         
diff --git a/src/wepy/resampling/projectors/tica.py b/src/wepy/resampling/projectors/tica.py
index a95f41ff..92ef2ad0 100644
--- a/src/wepy/resampling/projectors/tica.py
+++ b/src/wepy/resampling/projectors/tica.py
@@ -8,130 +8,204 @@
 # Third Party Library
 import numpy as np
 
+# geomm functions
+from geomm.grouping import group_pair
+from geomm.superimpose import superimpose
+
+# wepy functions
 from wepy.resampling.projectors.projector import Projector
 from wepy.util.util import box_vectors_to_lengths_angles
 
-def shorten_vec(x, unitcell_side_lengths):
-    """
-    For a given vector x between two points in a periodic box, return the
-    shortest version of that vector.
+
+def aligned(coords, ref_coords,unitcell_length, alignment_idxs, pair_idx1,pair_idx2):
+
+    """For a frame from a trajectory this function does a bunch of operations
+    systematically:
+    (i) First moves pair_idx2 coordinates to the image of the periodic unitcell that minimizes the 
+    difference between the centers of geometry between the pair_idx2 and pair_idx1 (e.g. a protein and ligand).
+    (ii) Then, centers the set of coordinates around a set of alingment atom indices.
+    (iii) Finally superimpose all the frames on top of a reference frame. (alignment)
+
+
+    This uses the geomm group pair function, followed by centering of grouped snapshots and then again
+    geomm superimpose function.
+    Unlike the 'aligned_frames' function, it only operates over a single frame (not the entire trajectory)
+
+    Parameters
+    ----------
+
+    coords : arraylike
+        The coordinates array of the frame of the particles you will be transforming.
+        (group pairing followed by centering followed by superimpose/alignment)
+
+    ref_coords: arraylike
+        Output of the ref_centered_pose function
+        Centered reference frame coordinates to be used in superimpose
+
+    unitcell_length : arraylike 
+        The lengths of the sides of a rectangular unitcell.
+
+    alignment_idxs : arraylike 
+        Collection of the indices which will be used to center the snapshot
+        and align them against the ref pose.
+        Please ensure that the same indices are used while centering the reference pose too.
+
+    pair_idx1 : arraylike 
+        Collection of the indices that define that member of the pair.
+
+    pair_idxs2 : arraylike
+        Collection of the indices that define that member of the pair.
+
+    Returns
+    -------
+
+    superimposed_pos : arrays
+        Transformed coordinates of the frame.
+        Transformation: group pairing followed by centering followed by superimpose/alignment.
+
     """
-    pos_idxs = np.where(x > 0.5*unitcell_side_lengths)[0]
 
-    for dim_idx in pos_idxs:
-        x[dim_idx] -= unitcell_side_lengths[dim_idx]
 
-    neg_idxs = np.where(x < -0.5*unitcell_side_lengths)[0]
 
-    for dim_idx in neg_idxs:
-        x[dim_idx] += unitcell_side_lengths[dim_idx]
+    assert coords.shape[1] == 3, "coordinates are not of 3 dimensions"
+    assert coords.shape[0] == ref_coords.shape[0], "Number of atoms does not match between reference and provided coords"
 
-    return x
+    grouped_pos = group_pair(coords, unitcell_length, pair_idx1, pair_idx2)
+    centroid = np.average(grouped_pos[alignment_idxs], axis =0)
+    grouped_centered_pos = grouped_pos - centroid
 
+    superimposed_pos, _ , _ = superimpose(ref_coords,grouped_centered_pos, idxs=alignment_idxs)
 
-def build_coord_feature(state, ref_pos, idxs,):
+    return superimposed_pos
 
-    # get the state from the simulation
+def shorten_vecs(disp_vecs, box_lengths):
+    """
+    Apply minimum-image convention to displacement vectors in an
+    cubic/tetragonal/rectangular(?)/orthorhombic periodic box (vectorized). Seems faster.
+
+    Parameters
+    ----------
+    disp_vecs : array_like, shape (n, 3)
+        Displacement vectors[[dx_i, dy_i, dz_i], [dx_j, dy_j, dz_j] ...].
+    box_lengths : array_like, shape (3,)
+        Box side lengths [Lx, Ly, Lz].
+
+    Returns
+    -------
+    numpy.ndarray, shape (n, 3)
+        Minimum-image displacement vectors.
+    """
 
-    # extract the position
+    X = np.asarray(disp_vecs, dtype=float)      # (n, 3)
+    L = np.asarray(box_lengths, dtype=float)   # (3,)
+
+    # Broadcasting: (n,3) / (3,) works automatically
+
+    return X - L * np.round(X / L)
+    # this should always work, np.round(0.6) = 1, np.round(-0.6) = -1 and np.round(0.2)=0.
 
-    # now use the same idxs to align
 
-    # return the aligned coords
-    
-    return aligned_feat_coord
-    
-    
 class DistanceTICAProjector(Projector):
-    """Projects a state into a predefined TICA space, using a set of distances as intermediate features.
+    """
+    Projects a state into a predefined TICA space, using a set of distances as intermediate features.
     """
 
     def __init__(self, dist_idxs, tica_model, periodic=True):
+
         """Construct a DistanceTICA projector.
 
         Parameters
         ----------
 
-        dist_idxs : np.array of shape (nd,2) - indices of atoms for computing distances
+        dist_idxs : np.array of shape (nd,2) 
+            Indices of atoms for computing distances in an image.
+        
+        tica_model : Deeptime or equivalent object 
+            It MUST have a transform function, which 
+            will be used to transform the distances into tica space.
         
-        tica_model : Deeptime or equivalent object that has a transform function, which 
-                    will be used to transform the distances into tica space.
+        periodic : bool (default = True)
+            Whether to use periodic boundary conditions to minimize pair distances
         
-        periodic :    bool (default = True) - whether to use periodic boundary conditions to
-                      minimize pair distances
         """
 
         self.dist_idxs = np.array(dist_idxs)
         self.periodic = periodic
 
         self.model = tica_model
-        # check for transform type functions
-        #if hasattr(self.model, 'transform'):
-        #    
+        #check if the model has .transform object/attribute 
+        # hasattr(self.model, 'transform'):
+
         self.ndim = self.model.dim
     
     def project(self, state):
 
         # get all the displacement vectors
         disp_vecs = state['positions'][self.dist_idxs[:,0]] - state['positions'][self.dist_idxs[:,1]]
-            
+
         if self.periodic:
-            # get the box lengths from the vectors
-            box_lengths, box_angles = box_vectors_to_lengths_angles(state["box_vectors"])
-            dists = np.array([np.sqrt(np.sum(np.square(shorten_vec(v, box_lengths)))) for v in disp_vecs])
-        else:
-            dists = np.array([np.sqrt(np.sum(np.square(v))) for v in disp_vecs])
-            
-        proj = self.model.transform(dists)
+            box_lengths, _ = box_vectors_to_lengths_angles(state["box_vectors"])
+            disp_vecs = shorten_vecs(disp_vecs, box_lengths)
+
+        dists = np.linalg.norm(disp_vecs, axis=1)
         
+        #print(f'Dist: {dists}')
+        proj = self.model.transform(dists)
+
+        print(f'Proj: {proj}')
         return proj
 
+
 class CoordTICAProjector(Projector):
     """Projects a state into a predefined TICA space, using selected
-    Cartesian coordinates (e.g., Cα atoms) as features.
+    Cartesian coordinates (e.g., CA atoms or backbone atoms) as features.
 
     The feature vector is constructed by taking the positions of a
     fixed set of atoms and flattening them to shape (natoms*3,).
     """
 
-    def __init__(self, atom_idxs, tica_vectors, alignment_idxs,
-                 periodic=True):
+    def __init__(self, alignment_idxs, atom_idxs, tica_model, ref_centered_pos, periodic=True):
         """
         Parameters
         ----------
+        alignment_idxs: array-like of shape (align_atoms,)
+            Indices of atoms whose coordinates are used as the reference for 
+            alignment of the frames. These atoms MUST match the atoms that
+            were used to align the frames before tica model training.
+
         atom_idxs : array-like of shape (natoms,)
             Indices of the atoms whose coordinates are used as features.
             The order MUST match the order used when training TICA.
 
-        model: tica model to extract the vectors
         
-        tica_vectors : np.ndarray of shape (ntica, natoms*3)
-            TICA eigenvectors for projecting into TICA space.
-
-        alignment_idxs: array-like of shape (align_atoms,) 
-            Indices of atoms whose coordinates are used as the reference for 
-            alignment of the frames. These atoms MUST match the atoms that
-            were used to align the frames before tica model training.
-
-        periodic : bool, default=True
-            Whether to take periodic boundary conditions into account.
+        model: Deeptime or equivalent object 
+            It MUST have a transform function, which 
+            will be used to transform the distances into tica space.
+
+        ref_centered_pos: arraylike
+            MUST FOLLOW some conditions:
+            1. Centered reference frame coordinates to be used in the superimpose function
+            2. The exact ref pose used while making the coord features for training the tica model.
+            3. Generally should be the output of the ref_centered_pose function
+        
+        periodic : bool (default = True)
+            Whether to use periodic boundary conditions to minimize pair distances
 
         """
 
         self.atom_idxs = np.asarray(atom_idxs, dtype=int)
-        self.tica_vectors = np.asarray(tica_vectors, dtype=float)
-        self.periodic = periodic
-        self.alignment_idxs = alignment_idxs
+        self.alignment_idxs = np.array(alignment_idxs, dtype=int)
+        self.ref_centered_pose = ref_centered_pos
 
-        natoms = self.atom_idxs.shape[0]
-        nfeat = natoms * 3
+        self.periodic = periodic
+        self.model = tica_model
+        #check if the model has .transform object/attribute 
+        # hasattr(self.model, 'transform'):
+        self.ndim = self.model.dim
 
-        assert self.tica_vectors.ndim == 2, "tica_vectors must be 2D"
-        assert self.tica_vectors.shape[1] == nfeat, \
-            f"TICA vectors expect {self.tica_vectors.shape[1]} features, " \
-            f"but natoms*3 = {nfeat}"
+        #natoms = self.atom_idxs.shape[0]
+        #nfeat = natoms * 3
 
-        self.ndim = self.tica_vectors.shape[0]
 
     def project(self, state):
         """
@@ -149,38 +223,28 @@ def project(self, state):
             TICA coordinates for this state.
         """
         
-        ## positions of selected atoms: (natoms, 3)
-        #pos = state['positions'][self.atom_idxs].copy()
-        ## 
-        #ref
-
-        ## ALWAYS ALIGN WITH A SPECIFIC SELECTION
-        ## USING GEOMM AND GROUP PAIR< CENTER AROUND
-        ## 
         
         if self.periodic:
-            # use minimum-image convention for each displacement
             box_lengths, _ = box_vectors_to_lengths_angles(state["box_vectors"])
-            for i in range(pos.shape[0]):
-                disp = pos[i] - ref
-                disp = shorten_vec(disp, box_lengths)
-                pos[i] = disp
-        else:
-            pos -= ref
-        #else:
-        #    # optional: still can correct for PBC wrt some origin, but
-        #    # usually you want anchor_first_atom=True for coord-based TICA
-        #    if self.periodic:
-        #        logger.warning("periodic=True but anchor_first_atom=False; "
-        #                       "coordinates may be discontinuous across PBCs.")
-
-
-
-        # flatten to (natoms*3,)
-        feat_vec = pos.reshape(-1)
-        # project into TICA space: (ntica, nfeat) @ (nfeat,) -> (ntica,)
-        proj = self.tica_vectors.dot(feat_vec)
 
-        return proj
+            pos_aligned = aligned(coords = state['positions'],
+                                            ref_coords = self.ref_centered_pose,
+                                            unitcell_length = box_lengths,
+                                            alignment_idxs = self.alignment_idxs,
+                                            pair_idx1 = self.alignment_idxs,
+                                            pair_idx2 = self.atom_idxs)
+
+
+        else: # Discuss with Alex
+            centroid = np.average(state['positions'][self.alignment_idxs], axis =0)
+            centered_pos = state['positions'] - centroid
+            pos_aligned, _ , _ = superimpose(self.ref_centered_pose, centered_pos, idxs=self.alignment_idxs)
 
 
+        feat_coord = pos_aligned[self.atom_idxs].reshape(1, -1) 
+        # without reshape it is still not in the input shape for the parent tica model 
+        # the parent tica model is trained on a flattened array.
+
+        proj = self.model.transform(feat_coord)
+
+        return proj