Skip to content

Commit

Permalink
Allow segmentation parser to take multiple paths
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrewRadev committed Dec 6, 2024
1 parent 63f2e6b commit 2bd72d5
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 16 deletions.
10 changes: 6 additions & 4 deletions lib/segmentation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ def write_segmentations(seg_objects, output_file):


class SegmentationParser:
def __init__(self, path: Path|str):
def __init__(self, *paths: list[Path|str]):
"""
The intended input is a filesystem path where segmentation data can be
The intended inputs are filesystem paths where segmentation data can be
found.
TODO multiple paths
These could be PDBs or trajectories, but they will most likely be the
final results of external tools that need to be parsed into a unified
format.
"""
self.path = path
self.paths = paths

@abstractmethod
def parse(self) -> Iterator[Tuple[str, int, str]]:
Expand Down
16 changes: 11 additions & 5 deletions lib/segmentation/chainsaw.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,19 @@


class Parser(SegmentationParser):
def __init__(self, csv_path):
super().__init__(csv_path)

def parse(self) -> Iterator[Tuple[str, int, str]]:
rows = self._read_csv_rows(self.path, delimiter='\t')
csv_path = self.paths[0]

rows = _read_csv_rows(csv_path, delimiter='\t')
data = rows[0]

yield ("Chainsaw", data['ndom'], data['chopping'])

def _read_csv_rows(self, path, **kwargs):
with open(self.path) as f:
reader = csv.DictReader(f, **kwargs)
return [row for row in reader]

def _read_csv_rows(path, **kwargs):
with open(path) as f:
reader = csv.DictReader(f, **kwargs)
return [row for row in reader]
9 changes: 7 additions & 2 deletions lib/segmentation/geostas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,20 @@


class Parser(SegmentationParser):
def __init__(self, clustering_directory_path):
super().__init__(clustering_directory_path)

def parse(self) -> Iterator[Tuple[str, int, str]]:
for file in sorted(Path(self.path).glob('clustering_kmeans_*.json')):
clustering_directory_path = Path(self.paths[0])

for file in sorted(clustering_directory_path.glob('clustering_kmeans_*.json')):
atom_groups = json.loads(Path(file).read_text())
chopping = self._generate_chopping(atom_groups)
method = "GeoStaS K-means"

yield (method, len(atom_groups), chopping)

for file in sorted(Path(self.path).glob('clustering_hier_*.json')):
for file in sorted(clustering_directory_path.glob('clustering_hier_*.json')):
atom_groups = json.loads(Path(file).read_text())
chopping = self._generate_chopping(atom_groups)
method = "GeoStaS Hierarchical"
Expand Down
16 changes: 11 additions & 5 deletions lib/segmentation/merizo.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,22 @@


class Parser(SegmentationParser):
def __init__(self, csv_path):
super().__init__(csv_path)

def parse(self) -> Iterator[Tuple[str, int, str]]:
rows = self._read_csv_rows(self.path, delimiter='\t')
csv_path = self.paths[0]

rows = _read_csv_rows(csv_path, delimiter='\t')
data = rows[0]

domain_count = data[4]
chopping = data[7]

yield ("Merizo", domain_count, chopping)

def _read_csv_rows(self, path, **kwargs):
with open(self.path) as f:
reader = csv.reader(f, **kwargs)
return [row for row in reader]

def _read_csv_rows(path, **kwargs):
with open(path) as f:
reader = csv.reader(f, **kwargs)
return [row for row in reader]

0 comments on commit 2bd72d5

Please sign in to comment.