From 78b0cbca475b13a37cbf992e48b74c0ee5c9ceb9 Mon Sep 17 00:00:00 2001 From: Tanish Yelgoe <23110328@iitgn.ac.in> Date: Fri, 13 Dec 2024 19:44:24 +0530 Subject: [PATCH 1/2] Fix for issue 4655 removing mutable data structures and function calls as default arguments in the entire codebase --- package/MDAnalysis/analysis/align.py | 5 +- package/MDAnalysis/analysis/base.py | 4 +- .../analysis/encore/clustering/cluster.py | 19 +-- .../reduce_dimensionality.py | 19 +-- .../MDAnalysis/analysis/encore/similarity.py | 126 ++++++++++-------- package/MDAnalysis/analysis/helix_analysis.py | 19 ++- package/MDAnalysis/core/universe.py | 15 ++- package/MDAnalysis/topology/ITPParser.py | 4 +- 8 files changed, 129 insertions(+), 82 deletions(-) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index fd7f15a822..acf87dbf65 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -1592,9 +1592,12 @@ def get_matching_atoms(ag1, ag2, tol_mass=0.1, strict=False, match_atoms=True): rsize_mismatches = np.absolute(rsize1 - rsize2) mismatch_mask = (rsize_mismatches > 0) if np.any(mismatch_mask): - def get_atoms_byres(g, match_mask=np.logical_not(mismatch_mask)): + + def get_atoms_byres(g, match_mask=None): # not pretty... but need to do things on a per-atom basis in # order to preserve original selection + if match_mask is None: + match_mask = np.logical_not(mismatch_mask) ag = g.atoms good = ag.residues.resids[match_mask] # resid for each residue resids = ag.resids # resid for each atom diff --git a/package/MDAnalysis/analysis/base.py b/package/MDAnalysis/analysis/base.py index f940af58e8..4e7f58dc0b 100644 --- a/package/MDAnalysis/analysis/base.py +++ b/package/MDAnalysis/analysis/base.py @@ -479,7 +479,7 @@ def _conclude(self): def _compute(self, indexed_frames: np.ndarray, verbose: bool = None, - *, progressbar_kwargs={}) -> "AnalysisBase": + *, progressbar_kwargs=None) -> "AnalysisBase": """Perform the calculation on a balanced slice of frames that have been setup prior to that using _setup_computation_groups() @@ -500,6 +500,8 @@ def _compute(self, indexed_frames: np.ndarray, .. versionadded:: 2.8.0 """ + if progressbar_kwargs is None: + progressbar_kwargs = {} logger.info("Choosing frames to analyze") # if verbose unchanged, use class default verbose = getattr(self, "_verbose", False) if verbose is None else verbose diff --git a/package/MDAnalysis/analysis/encore/clustering/cluster.py b/package/MDAnalysis/analysis/encore/clustering/cluster.py index 1c43f2cfd7..3bffa49023 100644 --- a/package/MDAnalysis/analysis/encore/clustering/cluster.py +++ b/package/MDAnalysis/analysis/encore/clustering/cluster.py @@ -44,13 +44,15 @@ from . import ClusteringMethod -def cluster(ensembles, - method = ClusteringMethod.AffinityPropagationNative(), - select="name CA", - distance_matrix=None, - allow_collapsed_result=True, - ncores=1, - **kwargs): +def cluster( + ensembles, + method=None, + select="name CA", + distance_matrix=None, + allow_collapsed_result=True, + ncores=1, + **kwargs, +): """Cluster frames from one or more ensembles, using one or more clustering methods. The function optionally takes pre-calculated distances matrices as an argument. Note that not all clustering procedure can work @@ -154,7 +156,8 @@ def cluster(ensembles, [array([1, 1, 1, 1, 2]), array([1, 1, 1, 1, 1])] """ - + if method is None: + method = ClusteringMethod.AffinityPropagationNative() # Internally, ensembles are always transformed to a list of lists if ensembles is not None: if not hasattr(ensembles, '__iter__'): diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py b/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py index 281d681203..82e805c91b 100644 --- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py +++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py @@ -44,13 +44,15 @@ StochasticProximityEmbeddingNative) -def reduce_dimensionality(ensembles, - method=StochasticProximityEmbeddingNative(), - select="name CA", - distance_matrix=None, - allow_collapsed_result=True, - ncores=1, - **kwargs): +def reduce_dimensionality( + ensembles, + method=None, + select="name CA", + distance_matrix=None, + allow_collapsed_result=True, + ncores=1, + **kwargs, +): """ Reduce dimensions in frames from one or more ensembles, using one or more dimensionality reduction methods. The function optionally takes @@ -152,7 +154,8 @@ def reduce_dimensionality(ensembles, encore.StochasticProximityEmbeddingNative(dimension=2)]) """ - + if method is None: + method = StochasticProximityEmbeddingNative() if ensembles is not None: if not hasattr(ensembles, '__iter__'): ensembles = [ensembles] diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index 4fe6f0e35a..c9a8ff1486 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -952,20 +952,17 @@ def hes(ensembles, return values, details -def ces(ensembles, - select="name CA", - clustering_method=AffinityPropagationNative( - preference=-1.0, - max_iter=500, - convergence_iter=50, - damping=0.9, - add_noise=True), - distance_matrix=None, - estimate_error=False, - bootstrapping_samples=10, - ncores=1, - calc_diagonal=False, - allow_collapsed_result=True): +def ces( + ensembles, + select="name CA", + clustering_method=None, + distance_matrix=None, + estimate_error=False, + bootstrapping_samples=10, + ncores=1, + calc_diagonal=False, + allow_collapsed_result=True, +): """ Calculates the Clustering Ensemble Similarity (CES) between ensembles @@ -1084,6 +1081,14 @@ def ces(ensembles, [0.25331629 0. ]] """ + if clustering_method is None: + clustering_method = AffinityPropagationNative( + preference=-1.0, + max_iter=500, + convergence_iter=50, + damping=0.9, + add_noise=True, + ) for ensemble in ensembles: ensemble.transfer_to_memory() @@ -1218,22 +1223,18 @@ def ces(ensembles, return values, details -def dres(ensembles, - select="name CA", - dimensionality_reduction_method = StochasticProximityEmbeddingNative( - dimension=3, - distance_cutoff = 1.5, - min_lam=0.1, - max_lam=2.0, - ncycle=100, - nstep=10000), - distance_matrix=None, - nsamples=1000, - estimate_error=False, - bootstrapping_samples=100, - ncores=1, - calc_diagonal=False, - allow_collapsed_result=True): +def dres( + ensembles, + select="name CA", + dimensionality_reduction_method=None, + distance_matrix=None, + nsamples=1000, + estimate_error=False, + bootstrapping_samples=100, + ncores=1, + calc_diagonal=False, + allow_collapsed_result=True, +): """ Calculates the Dimensional Reduction Ensemble Similarity (DRES) between @@ -1354,6 +1355,15 @@ def dres(ensembles, :mod:`MDAnalysis.analysis.encore.dimensionality_reduction.reduce_dimensionality`` """ + if dimensionality_reduction_method is None: + dimensionality_reduction_method = StochasticProximityEmbeddingNative( + dimension=3, + distance_cutoff=1.5, + min_lam=0.1, + max_lam=2.0, + ncycle=100, + nstep=10000, + ) for ensemble in ensembles: ensemble.transfer_to_memory() @@ -1484,16 +1494,13 @@ def dres(ensembles, return values, details -def ces_convergence(original_ensemble, - window_size, - select="name CA", - clustering_method=AffinityPropagationNative( - preference=-1.0, - max_iter=500, - convergence_iter=50, - damping=0.9, - add_noise=True), - ncores=1): +def ces_convergence( + original_ensemble, + window_size, + select="name CA", + clustering_method=None, + ncores=1, +): """ Use the CES to evaluate the convergence of the ensemble/trajectory. CES will be calculated between the whole trajectory contained in an @@ -1559,6 +1566,14 @@ def ces_convergence(original_ensemble, [0. ]] """ + if clustering_method is None: + clustering_method = AffinityPropagationNative( + preference=-1.0, + max_iter=500, + convergence_iter=50, + damping=0.9, + add_noise=True, + ) ensembles = prepare_ensembles_for_convergence_increasing_window( original_ensemble, window_size, select=select) @@ -1584,20 +1599,14 @@ def ces_convergence(original_ensemble, return out -def dres_convergence(original_ensemble, - window_size, - select="name CA", - dimensionality_reduction_method = \ - StochasticProximityEmbeddingNative( - dimension=3, - distance_cutoff=1.5, - min_lam=0.1, - max_lam=2.0, - ncycle=100, - nstep=10000 - ), - nsamples=1000, - ncores=1): +def dres_convergence( + original_ensemble, + window_size, + select="name CA", + dimensionality_reduction_method=None, + nsamples=1000, + ncores=1, +): """ Use the DRES to evaluate the convergence of the ensemble/trajectory. DRES will be calculated between the whole trajectory contained in an @@ -1660,6 +1669,15 @@ def dres_convergence(original_ensemble, much the trajectory keeps on resampling the same ares of the conformational space, and therefore of convergence. """ + if dimensionality_reduction_method is None: + dimensionality_reduction_method = StochasticProximityEmbeddingNative( + dimension=3, + distance_cutoff=1.5, + min_lam=0.1, + max_lam=2.0, + ncycle=100, + nstep=10000, + ) ensembles = prepare_ensembles_for_convergence_increasing_window( original_ensemble, window_size, select=select) diff --git a/package/MDAnalysis/analysis/helix_analysis.py b/package/MDAnalysis/analysis/helix_analysis.py index 9c287fb750..1b1bdf9ce3 100644 --- a/package/MDAnalysis/analysis/helix_analysis.py +++ b/package/MDAnalysis/analysis/helix_analysis.py @@ -186,7 +186,7 @@ def local_screw_angles(global_axis, ref_axis, helix_directions): return np.rad2deg(to_ortho) -def helix_analysis(positions, ref_axis=[0, 0, 1]): +def helix_analysis(positions, ref_axis=(0, 0, 1)): r""" Calculate helix properties from atomic coordinates. @@ -387,11 +387,18 @@ class HELANAL(AnalysisBase): 'local_screw_angles': (-2,), } - def __init__(self, universe, select='name CA', ref_axis=[0, 0, 1], - verbose=False, flatten_single_helix=True, - split_residue_sequences=True): - super(HELANAL, self).__init__(universe.universe.trajectory, - verbose=verbose) + def __init__( + self, + universe, + select="name CA", + ref_axis=(0, 0, 1), + verbose=False, + flatten_single_helix=True, + split_residue_sequences=True, + ): + super(HELANAL, self).__init__( + universe.universe.trajectory, verbose=verbose + ) selections = util.asiterable(select) atomgroups = [universe.select_atoms(s) for s in selections] consecutive = [] diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 7fed2cde8c..504d07c02c 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1474,9 +1474,16 @@ def _fragdict(self): return fragdict @classmethod - def from_smiles(cls, smiles, sanitize=True, addHs=True, - generate_coordinates=True, numConfs=1, - rdkit_kwargs={}, **kwargs): + def from_smiles( + cls, + smiles, + sanitize=True, + addHs=True, + generate_coordinates=True, + numConfs=1, + rdkit_kwargs=None, + **kwargs, + ): """Create a Universe from a SMILES string with rdkit Parameters @@ -1530,6 +1537,8 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True, .. versionadded:: 2.0.0 """ + if rdkit_kwargs is None: + rdkit_kwargs = {} try: from rdkit import Chem from rdkit.Chem import AllChem diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py index 83b43711c8..9968f854df 100644 --- a/package/MDAnalysis/topology/ITPParser.py +++ b/package/MDAnalysis/topology/ITPParser.py @@ -429,8 +429,10 @@ def shift_indices(self, atomid=0, resid=0, molnum=0, cgnr=0, n_res=0, n_atoms=0) return atom_order, new_params, molnums, self.moltypes, residx - def add_param(self, line, container, n_funct=2, funct_values=[]): + def add_param(self, line, container, n_funct=2, funct_values=None): """Add defined GROMACS directive lines, only if the funct is in ``funct_values``""" + if funct_values is None: + funct_values = [] values = line.split() funct = int(values[n_funct]) if funct in funct_values: From 5e2c1040da8b75b31d96daaea0ecb7a9f2055fae Mon Sep 17 00:00:00 2001 From: Tanish Yelgoe <23110328@iitgn.ac.in> Date: Fri, 13 Dec 2024 19:47:40 +0530 Subject: [PATCH 2/2] Updated CHANGELOG --- package/CHANGELOG | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 83bff74002..9af08b1a42 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -14,11 +14,13 @@ The rules for this file: ------------------------------------------------------------------------------- -??/??/?? IAlibay, ChiahsinChu, RMeli +??/??/?? IAlibay, ChiahsinChu, RMeli, tanishy7777 * 2.9.0 Fixes + * Replaced mutable defaults with None and initialized them within + the function to prevent shared state. (Issue #4655) Enhancements * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)