Allow segmentation parser to take multiple paths

AndrewRadev · Dec 6, 2024 · 2bd72d5 · 2bd72d5
1 parent 63f2e6b
commit 2bd72d5
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 16 deletions.
diff --git a/lib/segmentation/__init__.py b/lib/segmentation/__init__.py
@@ -24,14 +24,16 @@ def write_segmentations(seg_objects, output_file):
 
 
 class SegmentationParser:
-    def __init__(self, path: Path|str):
+    def __init__(self, *paths: list[Path|str]):
         """
-        The intended input is a filesystem path where segmentation data can be
+        The intended inputs are filesystem paths where segmentation data can be
         found.
 
-        TODO multiple paths
+        These could be PDBs or trajectories, but they will most likely be the
+        final results of external tools that need to be parsed into a unified
+        format.
         """
-        self.path = path
+        self.paths = paths
 
     @abstractmethod
     def parse(self) -> Iterator[Tuple[str, int, str]]:

diff --git a/lib/segmentation/chainsaw.py b/lib/segmentation/chainsaw.py
@@ -6,13 +6,19 @@
 
 
 class Parser(SegmentationParser):
+    def __init__(self, csv_path):
+        super().__init__(csv_path)
+
     def parse(self) -> Iterator[Tuple[str, int, str]]:
-        rows = self._read_csv_rows(self.path, delimiter='\t')
+        csv_path = self.paths[0]
+
+        rows = _read_csv_rows(csv_path, delimiter='\t')
         data = rows[0]
 
         yield ("Chainsaw", data['ndom'], data['chopping'])
 
-    def _read_csv_rows(self, path, **kwargs):
-        with open(self.path) as f:
-            reader = csv.DictReader(f, **kwargs)
-            return [row for row in reader]
+
+def _read_csv_rows(path, **kwargs):
+    with open(path) as f:
+        reader = csv.DictReader(f, **kwargs)
+        return [row for row in reader]
diff --git a/lib/segmentation/geostas.py b/lib/segmentation/geostas.py
@@ -7,15 +7,20 @@
 
 
 class Parser(SegmentationParser):
+    def __init__(self, clustering_directory_path):
+        super().__init__(clustering_directory_path)
+
     def parse(self) -> Iterator[Tuple[str, int, str]]:
-        for file in sorted(Path(self.path).glob('clustering_kmeans_*.json')):
+        clustering_directory_path = Path(self.paths[0])
+
+        for file in sorted(clustering_directory_path.glob('clustering_kmeans_*.json')):
             atom_groups = json.loads(Path(file).read_text())
             chopping    = self._generate_chopping(atom_groups)
             method      = "GeoStaS K-means"
 
             yield (method, len(atom_groups), chopping)
 
-        for file in sorted(Path(self.path).glob('clustering_hier_*.json')):
+        for file in sorted(clustering_directory_path.glob('clustering_hier_*.json')):
             atom_groups = json.loads(Path(file).read_text())
             chopping    = self._generate_chopping(atom_groups)
             method      = "GeoStaS Hierarchical"

diff --git a/lib/segmentation/merizo.py b/lib/segmentation/merizo.py
@@ -6,16 +6,22 @@
 
 
 class Parser(SegmentationParser):
+    def __init__(self, csv_path):
+        super().__init__(csv_path)
+
     def parse(self) -> Iterator[Tuple[str, int, str]]:
-        rows = self._read_csv_rows(self.path, delimiter='\t')
+        csv_path = self.paths[0]
+
+        rows = _read_csv_rows(csv_path, delimiter='\t')
         data = rows[0]
 
         domain_count = data[4]
         chopping     = data[7]
 
         yield ("Merizo", domain_count, chopping)
 
-    def _read_csv_rows(self, path, **kwargs):
-        with open(self.path) as f:
-            reader = csv.reader(f, **kwargs)
-            return [row for row in reader]
+
+def _read_csv_rows(path, **kwargs):
+    with open(path) as f:
+        reader = csv.reader(f, **kwargs)
+        return [row for row in reader]