Skip to content

Commit 7f686ca

Browse files
authored
'MDAnalysis.analysis.nucleicacids' parallelization (#4727)
- Fixes #4670 - Parallelization of the backend support to the class `NucPairDist` in nucleicacids.py - Addition of parallelization tests in test_nucleicacids.py and fixtures in conftest.py - Updated Changelog
1 parent c6bfa09 commit 7f686ca

File tree

4 files changed

+40
-17
lines changed

4 files changed

+40
-17
lines changed

package/CHANGELOG

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ The rules for this file:
1414

1515

1616
-------------------------------------------------------------------------------
17-
??/??/?? IAlibay, ChiahsinChu, RMeli, tanishy7777
17+
??/??/?? IAlibay, ChiahsinChu, RMeli, tanishy7777, talagayev
1818

1919
* 2.9.0
2020

@@ -25,6 +25,7 @@ Fixes
2525
the function to prevent shared state. (Issue #4655)
2626

2727
Enhancements
28+
* Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670)
2829
* Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)
2930
* Added `precision` for XYZWriter (Issue #4775, PR #4771)
3031

@@ -98,11 +99,11 @@ Enhancements
9899
* Introduce parallelization API to `AnalysisBase` and to `analysis.rms.RMSD` class
99100
(Issue #4158, PR #4304)
100101
* Enables parallelization for analysis.gnm.GNMAnalysis (Issue #4672)
101-
* explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680)
102-
* enables parallelization for analysis.bat.BAT (Issue #4663)
103-
* enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin}
102+
* Explicitly mark `analysis.pca.PCA` as not parallelizable (Issue #4680)
103+
* Enables parallelization for analysis.bat.BAT (Issue #4663)
104+
* Enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin}
104105
(Issue #4673)
105-
* enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674)
106+
* Enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674)
106107
* Enables parallelization for analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis (Issue #4664)
107108
* Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642)
108109
* Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP)

package/MDAnalysis/analysis/nucleicacids.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070

7171
import MDAnalysis as mda
7272
from .distances import calc_bonds
73-
from .base import AnalysisBase, Results
73+
from .base import AnalysisBase, ResultsGroup
7474
from MDAnalysis.core.groups import Residue, ResidueGroup
7575

7676

@@ -159,13 +159,23 @@ class NucPairDist(AnalysisBase):
159159
.. versionchanged:: 2.7.0
160160
Added static method :attr:`select_strand_atoms` as a
161161
helper for selecting atom pairs for distance analysis.
162+
163+
.. versionchanged:: 2.9.0
164+
Enabled **parallel execution** with the ``multiprocessing`` and ``dask``
165+
backends; use the new method :meth:`get_supported_backends` to see all
166+
supported backends.
162167
"""
163168

169+
_analysis_algorithm_is_parallelizable = True
170+
171+
@classmethod
172+
def get_supported_backends(cls):
173+
return ('serial', 'multiprocessing', 'dask')
174+
164175
_s1: mda.AtomGroup
165176
_s2: mda.AtomGroup
166177
_n_sel: int
167-
_res_dict: Dict[int, List[float]]
168-
178+
169179
def __init__(self, selection1: List[mda.AtomGroup],
170180
selection2: List[mda.AtomGroup],
171181
**kwargs) -> None:
@@ -276,7 +286,7 @@ def select_strand_atoms(
276286
return (sel1, sel2)
277287

278288
def _prepare(self) -> None:
279-
self._res_array: np.ndarray = np.zeros(
289+
self.results.distances: np.ndarray = np.zeros(
280290
[self.n_frames, self._n_sel]
281291
)
282292

@@ -285,13 +295,17 @@ def _single_frame(self) -> None:
285295
self._s1.positions, self._s2.positions
286296
)
287297

288-
self._res_array[self._frame_index, :] = dist
298+
self.results.distances[self._frame_index, :] = dist
289299

290300
def _conclude(self) -> None:
291-
self.results['distances'] = self._res_array
292301
self.results['pair_distances'] = self.results['distances']
293302
# TODO: remove pair_distances in 3.0.0
294303

304+
def _get_aggregator(self):
305+
return ResultsGroup(lookup={
306+
'distances': ResultsGroup.ndarray_vstack,
307+
}
308+
)
295309

296310
class WatsonCrickDist(NucPairDist):
297311
r"""

testsuite/MDAnalysisTests/analysis/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import (
1515
HydrogenBondAnalysis,
1616
)
17+
from MDAnalysis.analysis.nucleicacids import NucPairDist
1718
from MDAnalysis.lib.util import is_installed
1819

1920

@@ -141,3 +142,10 @@ def client_DSSP(request):
141142
@pytest.fixture(scope='module', params=params_for_cls(HydrogenBondAnalysis))
142143
def client_HydrogenBondAnalysis(request):
143144
return request.param
145+
146+
147+
# MDAnalysis.analysis.nucleicacids
148+
149+
@pytest.fixture(scope="module", params=params_for_cls(NucPairDist))
150+
def client_NucPairDist(request):
151+
return request.param

testsuite/MDAnalysisTests/analysis/test_nucleicacids.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@ def test_empty_ag_error(strand):
5555

5656

5757
@pytest.fixture(scope='module')
58-
def wc_rna(strand):
58+
def wc_rna(strand, client_NucPairDist):
5959
strand1 = ResidueGroup([strand.residues[0], strand.residues[21]])
6060
strand2 = ResidueGroup([strand.residues[1], strand.residues[22]])
6161

6262
WC = WatsonCrickDist(strand1, strand2)
63-
WC.run()
63+
WC.run(**client_NucPairDist)
6464
return WC
6565

6666

@@ -114,23 +114,23 @@ def test_wc_dis_results_keyerrs(wc_rna, key):
114114
wc_rna.results[key]
115115

116116

117-
def test_minor_dist(strand):
117+
def test_minor_dist(strand, client_NucPairDist):
118118
strand1 = ResidueGroup([strand.residues[2], strand.residues[19]])
119119
strand2 = ResidueGroup([strand.residues[16], strand.residues[4]])
120120

121121
MI = MinorPairDist(strand1, strand2)
122-
MI.run()
122+
MI.run(**client_NucPairDist)
123123

124124
assert MI.results.distances[0, 0] == approx(15.06506, rel=1e-3)
125125
assert MI.results.distances[0, 1] == approx(3.219116, rel=1e-3)
126126

127127

128-
def test_major_dist(strand):
128+
def test_major_dist(strand, client_NucPairDist):
129129
strand1 = ResidueGroup([strand.residues[1], strand.residues[4]])
130130
strand2 = ResidueGroup([strand.residues[11], strand.residues[8]])
131131

132132
MA = MajorPairDist(strand1, strand2)
133-
MA.run()
133+
MA.run(**client_NucPairDist)
134134

135135
assert MA.results.distances[0, 0] == approx(26.884272, rel=1e-3)
136136
assert MA.results.distances[0, 1] == approx(13.578535, rel=1e-3)

0 commit comments

Comments
 (0)