DeepLearnPhysics
diff --git a/‎bin/run.py
Lines changed: 8 additions & 4 deletions b/‎bin/run.py
Lines changed: 8 additions & 4 deletions
diff --git a/‎spine/ana/base.py
Lines changed: 77 additions & 2 deletions b/‎spine/ana/base.py
Lines changed: 77 additions & 2 deletions
diff --git a/‎spine/ana/diag/__init__.py
Lines changed: 2 additions & 0 deletions b/‎spine/ana/diag/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎spine/ana/diag/track.py
Lines changed: 189 additions & 0 deletions b/‎spine/ana/diag/track.py
Lines changed: 189 additions & 0 deletions
diff --git a/‎spine/ana/factories.py
Lines changed: 2 additions & 2 deletions b/‎spine/ana/factories.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎spine/ana/script/save.py
Lines changed: 2 additions & 2 deletions b/‎spine/ana/script/save.py
Lines changed: 2 additions & 2 deletions
@@ -25,7 +25,8 @@
 from spine.main import run
 
 
-def main(config, source, source_list, output, n, nskip, detect_anomaly, log_dir, weight_prefix, weight_path):
+def main(config, source, source_list, output, n, nskip, detect_anomaly,
+         log_dir, weight_prefix, weight_path):
     """Main driver for training/validation/inference/analysis.
 
     Performs these basic functions:
@@ -53,7 +54,8 @@ def main(config, source, source_list, output, n, nskip, detect_anomaly, log_dir,
     weight_prefix : str
         Path to the directory for storing the training weights
     weight_path : str
-        Path string a weight file or pattern for multiple weight files to load the model weights
+        Path string a weight file or pattern for multiple weight files to load
+        the model weights
     """
     # Try to find configuration file using the absolute path or under
     # the 'config' directory of the parent SPINE repository
@@ -112,7 +114,8 @@ def main(config, source, source_list, output, n, nskip, detect_anomaly, log_dir,
 
     if weight_prefix is not None:
         if not 'train' in cfg['base']:
-            raise KeyError('--weight_prefix flag provided: must specify `train` in the `base` block.')
+            raise KeyError("--weight_prefix flag provided: must specify "
+                           "`train` in the `base` block.")
         cfg['base']['train']['weight_prefix']=weight_prefix
 
     if weight_path is not None:
@@ -182,4 +185,5 @@ def main(config, source, source_list, output, n, nskip, detect_anomaly, log_dir,
 
     # Execute the main function
     main(args.config, args.source, args.source_list, args.output, args.n,
-         args.nskip, args.detect_anomaly, args.log_dir, args.weight_prefix, args.weight_path)
+         args.nskip, args.detect_anomaly, args.log_dir, args.weight_prefix,
+         args.weight_path)
@@ -44,8 +44,15 @@ class AnaBase(ABC):
     # Valid run modes
     _run_modes = ('reco', 'truth', 'both', 'all')
 
-    def __init__(self, obj_type=None, run_mode=None, append=False,
-                 overwrite=False, log_dir=None, prefix=None):
+    # List of known point modes for true particles and their corresponding keys
+    _point_modes = (
+            ('points', 'points_label'),
+            ('points_adapt', 'points'),
+            ('points_g4', 'points_g4')
+    )
+
+    def __init__(self, obj_type=None, run_mode=None, truth_point_mode=None,
+                 append=False, overwrite=False, log_dir=None, prefix=None):
         """Initialize default anlysis script object properties.
 
         Parameters
@@ -56,6 +63,10 @@ def __init__(self, obj_type=None, run_mode=None, append=False,
             If specified, tells whether the analysis script must run on
             reconstructed ('reco'), true ('true') or both objects
             ('both' or 'all')
+        truth_point_mode : str, optional
+            If specified, tells which attribute of the :class:`TruthFragment`,
+            :class:`TruthParticle` or :class:`TruthInteraction` object to use
+            to fetch its point coordinates
         append : bool, default False
             If True, appends existing CSV files instead of creating new ones
         overwrite : bool, default False
@@ -114,6 +125,14 @@ def __init__(self, obj_type=None, run_mode=None, append=False,
         # Update underlying keys, if needed
         self.update_keys({k:True for k in self.obj_keys})
 
+        # If a truth point mode is specified, store it
+        if truth_point_mode is not None:
+            assert truth_point_mode in self.point_modes, (
+                     "The `truth_point_mode` argument must be one of "
+                    f"{self.point_modes.keys()}. Got `{truth_point_mode}` instead.")
+            self.truth_point_mode = truth_point_mode
+            self.truth_index_mode = truth_point_mode.replace('points', 'index')
+
         # Store the append flag
         self.append_file = append
         self.overwrite_file = overwrite
@@ -167,6 +186,18 @@ def keys(self, keys):
         """
         self._keys = tuple(keys.items())
 
+    @property
+    def point_modes(self):
+        """Dictionary which makes the correspondance between the name of a true
+        object point attribute with the underlying point tensor it points to.
+
+        Returns
+        -------
+        Dict[str, str]
+            Dictionary of (attribute, key) mapping for point coordinates
+        """
+        return dict(self._point_modes)
+
     def update_keys(self, update_dict):
         """Update the underlying set of keys and their necessity in place.
 
@@ -249,6 +280,50 @@ def __call__(self, data, entry=None):
         # Run the analysis script
         return self.process(data_filter)
 
+    def get_index(self, obj):
+        """Get a certain pre-defined index attribute of an object.
+
+        The :class:`TruthFragment`, :class:`TruthParticle` and
+        :class:`TruthInteraction` objects index are obtained using the
+        `truth_index_mode` attribute of the class.
+
+        Parameters
+        ----------
+        obj : Union[FragmentBase, ParticleBase, InteractionBase]
+            Fragment, Particle or Interaction object
+
+        Results
+        -------
+        np.ndarray
+           (N) Object index
+        """
+        if not obj.is_truth:
+            return obj.index
+        else:
+            return getattr(obj, self.truth_index_mode)
+
+    def get_points(self, obj):
+        """Get a certain pre-defined point attribute of an object.
+
+        The :class:`TruthFragment`, :class:`TruthParticle` and
+        :class:`TruthInteraction` objects points are obtained using the
+        `truth_point_mode` attribute of the class.
+
+        Parameters
+        ----------
+        obj : Union[FragmentBase, ParticleBase, InteractionBase]
+            Fragment, Particle or Interaction object
+
+        Results
+        -------
+        np.ndarray
+           (N, 3) Point coordinates
+        """
+        if not obj.is_truth:
+            return obj.points
+        else:
+            return getattr(obj, self.truth_point_mode)
+
     @abstractmethod
     def process(self, data):
         """Place-holder method to be defined in each analysis script.
 
@@ -3,8 +3,10 @@
 This submodule is use to run basic diagnostics analyses such as:
 - Track dE/dx profile
 - Track energy reconstruction
+- Track completeness
 - Shower start dE/dx
 - ...
 '''
 
 from .shower import *
+from .track import *
@@ -0,0 +1,189 @@
+"""Module to evaluate diagnostic metrics on tracks."""
+
+import numpy as np
+from scipy.spatial.distance import cdist
+
+from spine.ana.base import AnaBase
+
+from spine.utils.globals import TRACK_SHP
+from spine.utils.numba_local import principal_components
+
+
+__all__ = ['TrackCompletenessAna']
+
+
+class TrackCompletenessAna(AnaBase):
+    """This analysis script identifies gaps in tracks and measures the
+    cumulative length of these gaps relative to the track length.
+
+    This is a useful diagnostic tool to evaluate the space-point efficiency
+    on tracks (good standard candal as track should have exactly no gap in
+    a perfectly efficient detector).
+    """
+
+    # Name of the analysis script (as specified in the configuration)
+    name = 'track_completeness'
+
+    def __init__(self, time_window=None, run_mode='both',
+                 truth_point_mode='points', **kwargs):
+        """Initialize the analysis script.
+
+        Parameters
+        ----------
+        time_window : List[float]
+            Time window within which to include particle (only works for `truth`)
+        **kwargs : dict, optional
+            Additional arguments to pass to :class:`AnaBase`
+        """
+        # Initialize the parent class
+        super().__init__('particle', run_mode, truth_point_mode, **kwargs)
+
+        # Store the time window
+        self.time_window = time_window
+        assert time_window is None or len(time_window) == 2, (
+                "Time window must be specified as an array of two values.")
+        assert time_window is None or run_mode == 'truth', (
+                "Time of reconstructed particle is unknown.")
+
+        # Make sure the metadata is provided (rasterization needed)
+        self.update_keys({'meta': True})
+
+        # Initialize the CSV writer(s) you want
+        for prefix in self.prefixes:
+            self.initialize_writer(prefix)
+
+    def process(self, data):
+        """Evaluate track completeness for tracks in one entry.
+
+        Parameters
+        ----------
+        data : dict
+            Dictionary of data products
+        """
+        # Fetch the pixel size in this image (assume cubic cells)
+        pixel_size = data['meta'].size[0]
+
+        # Loop over the types of particle data products
+        for key in self.obj_keys:
+            # Fetch the prefix ('reco' or 'truth')
+            prefix = key.split('_')[0]
+
+            # Loop over particle objects
+            for part in data[key]:
+                # Check that the particle is a track
+                if part.shape != TRACK_SHP:
+                    continue
+
+                # If needed, check on the particle time
+                if self.time_window is not None:
+                    if part.t < self.time_window[0] or part.t > self.time_window[1]:
+                        continue
+
+                # Initialize the particle dictionary
+                comp_dict = {'particle_id': part.id}
+
+                # Fetch the particle point coordinates
+                points = self.get_points(part)
+
+                # Find start/end points, collapse onto track cluster
+                start = points[np.argmin(cdist([part.start_point], points))]
+                end = points[np.argmin(cdist([part.end_point], points))]
+
+                # Add the direction of the track
+                vec = end - start
+                length = np.linalg.norm(vec)
+                if length:
+                    vec /= length
+
+                comp_dict['size'] = len(points)
+                comp_dict['length'] = length
+                comp_dict.update(
+                        {'dir_x': vec[0], 'dir_y': vec[1], 'dir_z': vec[2]})
+
+                # Chunk out the track along gaps, estimate gap length
+                chunk_labels = self.cluster_track_chunks(
+                        points, start, end, pixel_size)
+                gaps = self.sequential_cluster_distances(
+                        points, chunk_labels, start)
+
+                # Substract minimum gap distance due to rasterization
+                min_gap = pixel_size/np.max(np.abs(vec))
+                gaps -= min_gap
+
+                # Store gap information
+                comp_dict['num_gaps'] = len(gaps)
+                comp_dict['gap_length'] = np.sum(gaps)
+                comp_dict['gap_frac'] = np.sum(gaps)/length
+
+                # Append the dictionary to the CSV log
+                self.append(prefix, **comp_dict)
+
+    @staticmethod
+    def cluster_track_chunks(points, start_point, end_point, pixel_size):
+        """Find point where the track is broken, divide out the track
+        into self-contained chunks which are Linf connect (Moore neighbors).
+
+        Parameters
+        ----------
+        points : np.ndarray
+            (N, 3) List of track cluster point coordinates
+        start_point : np.ndarray
+            (3) Start point of the track cluster
+        end_point : np.ndarray
+            (3) End point of the track cluster
+        pixel_size : float
+            Dimension of one pixel, used to identify what is big enough to
+            constitute a break
+
+        Returns
+        -------
+        np.ndarray
+            (N) Track chunk labels
+        """
+        # Project and cluster on the projected axis
+        direction = (end_point-start_point)/np.linalg.norm(end_point-start_point)
+        projs = np.dot(points - start_point, direction)
+        perm = np.argsort(projs)
+        seps = projs[perm][1:] - projs[perm][:-1]
+        breaks = np.where(seps > pixel_size*1.1)[0] + 1
+        cluster_labels = np.empty(len(projs), dtype=int)
+        for i, index in enumerate(np.split(np.arange(len(projs)), breaks)):
+            cluster_labels[perm[index]] = i
+            
+        return cluster_labels
+
+    @staticmethod
+    def sequential_cluster_distances(points, labels, start_point):
+        """Order clusters in order of distance from a starting point, compute
+        the distances between successive clusters. 
+
+        Parameters
+        ----------
+        points : np.ndarray
+            (N, 3) List of track cluster point coordinates
+        labels : np.ndarray
+            (N) Track chunk labels
+        start_point : np.ndarray
+            (3) Start point of the track cluster
+        """
+        # If there's only one cluster, nothing to do here
+        unique_labels = np.unique(labels)
+        if len(unique_labels) < 2:
+            return np.empty(0, dtype=float), np.empty(0, dtype=float)
+        
+        # Order clusters
+        start_dist = cdist([start_point], points).flatten()
+        start_clust_dist = np.empty(len(unique_labels))
+        for i, c in enumerate(unique_labels):
+            start_clust_dist[i] = np.min(start_dist[labels == c])
+        ordered_labels = unique_labels[np.argsort(start_clust_dist)]
+        
+        # Compute the intercluster distance and relative angle
+        n_gaps = len(ordered_labels) - 1
+        dists = np.empty(n_gaps, dtype=float)
+        for i in range(n_gaps):
+            points_i = points[labels == ordered_labels[i]]
+            points_j = points[labels == ordered_labels[i + 1]]
+            dists[i] = np.min(cdist(points_i, points_j))
+            
+        return dists
@@ -2,11 +2,11 @@
 
 from spine.utils.factory import module_dict, instantiate
 
-from . import metric, script
+from . import diag, metric, script
 
 # Build a dictionary of available calibration modules
 ANA_DICT = {}
-for module in [metric, script]:
+for module in [diag, metric, script]:
     ANA_DICT.update(**module_dict(module))
 
 
 
@@ -139,9 +139,7 @@ def process(self, data):
             prefix, obj_type = key.split('_')
             other = other_prefix[prefix]
             attrs = self.attrs[key]
-            attrs_other = self.attrs[f'{other}_{obj_type}']
             lengths = self.lengths
-            lengths_other = self.lengths
             if (self.match_mode is None or
                 self.match_mode == f'{other}_to_{prefix}'):
                 # If there is no matches, save objects by themselves
@@ -153,6 +151,8 @@ def process(self, data):
                 # match on a single row
                 match_suffix = f'{prefix[0]}2{other[0]}'
                 match_key = f'{obj_type[:-1]}_matches_{match_suffix}'
+                attrs_other = self.attrs[f'{other}_{obj_type}']
+                lengths_other = self.lengths # TODO
                 for idx, (obj_i, obj_j) in enumerate(data[match_key]):
                     src_dict = obj_i.scalar_dict(attrs, lengths)
                     if obj_j is not None: