From 78b0cbca475b13a37cbf992e48b74c0ee5c9ceb9 Mon Sep 17 00:00:00 2001
From: Tanish Yelgoe <23110328@iitgn.ac.in>
Date: Fri, 13 Dec 2024 19:44:24 +0530
Subject: [PATCH 1/2] Fix for issue 4655 removing mutable data structures and
 function calls as default arguments in the entire codebase

---
 package/MDAnalysis/analysis/align.py          |   5 +-
 package/MDAnalysis/analysis/base.py           |   4 +-
 .../analysis/encore/clustering/cluster.py     |  19 +--
 .../reduce_dimensionality.py                  |  19 +--
 .../MDAnalysis/analysis/encore/similarity.py  | 126 ++++++++++--------
 package/MDAnalysis/analysis/helix_analysis.py |  19 ++-
 package/MDAnalysis/core/universe.py           |  15 ++-
 package/MDAnalysis/topology/ITPParser.py      |   4 +-
 8 files changed, 129 insertions(+), 82 deletions(-)

diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py
index fd7f15a822..acf87dbf65 100644
--- a/package/MDAnalysis/analysis/align.py
+++ b/package/MDAnalysis/analysis/align.py
@@ -1592,9 +1592,12 @@ def get_matching_atoms(ag1, ag2, tol_mass=0.1, strict=False, match_atoms=True):
         rsize_mismatches = np.absolute(rsize1 - rsize2)
         mismatch_mask = (rsize_mismatches > 0)
         if np.any(mismatch_mask):
-            def get_atoms_byres(g, match_mask=np.logical_not(mismatch_mask)):
+
+            def get_atoms_byres(g, match_mask=None):
                 # not pretty... but need to do things on a per-atom basis in
                 # order to preserve original selection
+                if match_mask is None:
+                    match_mask = np.logical_not(mismatch_mask)
                 ag = g.atoms
                 good = ag.residues.resids[match_mask]  # resid for each residue
                 resids = ag.resids                     # resid for each atom
diff --git a/package/MDAnalysis/analysis/base.py b/package/MDAnalysis/analysis/base.py
index f940af58e8..4e7f58dc0b 100644
--- a/package/MDAnalysis/analysis/base.py
+++ b/package/MDAnalysis/analysis/base.py
@@ -479,7 +479,7 @@ def _conclude(self):
 
     def _compute(self, indexed_frames: np.ndarray,
                  verbose: bool = None,
-                 *, progressbar_kwargs={}) -> "AnalysisBase":
+                 *, progressbar_kwargs=None) -> "AnalysisBase":
         """Perform the calculation on a balanced slice of frames
         that have been setup prior to that using _setup_computation_groups()
 
@@ -500,6 +500,8 @@ def _compute(self, indexed_frames: np.ndarray,
 
         .. versionadded:: 2.8.0
         """
+        if progressbar_kwargs is None:
+            progressbar_kwargs = {}
         logger.info("Choosing frames to analyze")
         # if verbose unchanged, use class default
         verbose = getattr(self, "_verbose", False) if verbose is None else verbose
diff --git a/package/MDAnalysis/analysis/encore/clustering/cluster.py b/package/MDAnalysis/analysis/encore/clustering/cluster.py
index 1c43f2cfd7..3bffa49023 100644
--- a/package/MDAnalysis/analysis/encore/clustering/cluster.py
+++ b/package/MDAnalysis/analysis/encore/clustering/cluster.py
@@ -44,13 +44,15 @@
 from . import ClusteringMethod
 
 
-def cluster(ensembles,
-            method = ClusteringMethod.AffinityPropagationNative(),
-            select="name CA",
-            distance_matrix=None,
-            allow_collapsed_result=True,
-            ncores=1,
-            **kwargs):
+def cluster(
+    ensembles,
+    method=None,
+    select="name CA",
+    distance_matrix=None,
+    allow_collapsed_result=True,
+    ncores=1,
+    **kwargs,
+):
     """Cluster frames from one or more ensembles, using one or more
     clustering methods. The function optionally takes pre-calculated distances
     matrices as an argument. Note that not all clustering procedure can work
@@ -154,7 +156,8 @@ def cluster(ensembles,
         [array([1, 1, 1, 1, 2]), array([1, 1, 1, 1, 1])]
 
     """
-
+    if method is None:
+        method = ClusteringMethod.AffinityPropagationNative()
     # Internally, ensembles are always transformed to a list of lists
     if ensembles is not None:
         if not hasattr(ensembles, '__iter__'):
diff --git a/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py b/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py
index 281d681203..82e805c91b 100644
--- a/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py
+++ b/package/MDAnalysis/analysis/encore/dimensionality_reduction/reduce_dimensionality.py
@@ -44,13 +44,15 @@
     StochasticProximityEmbeddingNative)
 
 
-def reduce_dimensionality(ensembles,
-                          method=StochasticProximityEmbeddingNative(),
-                          select="name CA",
-                          distance_matrix=None,
-                          allow_collapsed_result=True,
-                          ncores=1,
-                          **kwargs):
+def reduce_dimensionality(
+    ensembles,
+    method=None,
+    select="name CA",
+    distance_matrix=None,
+    allow_collapsed_result=True,
+    ncores=1,
+    **kwargs,
+):
     """
     Reduce dimensions in frames from one or more ensembles, using one or more
     dimensionality reduction methods. The function optionally takes
@@ -152,7 +154,8 @@ def reduce_dimensionality(ensembles,
                              encore.StochasticProximityEmbeddingNative(dimension=2)])
 
     """
-
+    if method is None:
+        method = StochasticProximityEmbeddingNative()
     if ensembles is not None:
         if not hasattr(ensembles, '__iter__'):
             ensembles = [ensembles]
diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py
index 4fe6f0e35a..c9a8ff1486 100644
--- a/package/MDAnalysis/analysis/encore/similarity.py
+++ b/package/MDAnalysis/analysis/encore/similarity.py
@@ -952,20 +952,17 @@ def hes(ensembles,
     return values, details
 
 
-def ces(ensembles,
-        select="name CA",
-        clustering_method=AffinityPropagationNative(
-            preference=-1.0,
-            max_iter=500,
-            convergence_iter=50,
-            damping=0.9,
-            add_noise=True),
-        distance_matrix=None,
-            estimate_error=False,
-        bootstrapping_samples=10,
-        ncores=1,
-        calc_diagonal=False,
-        allow_collapsed_result=True):
+def ces(
+    ensembles,
+    select="name CA",
+    clustering_method=None,
+    distance_matrix=None,
+    estimate_error=False,
+    bootstrapping_samples=10,
+    ncores=1,
+    calc_diagonal=False,
+    allow_collapsed_result=True,
+):
     """
 
     Calculates the Clustering Ensemble Similarity (CES) between ensembles
@@ -1084,6 +1081,14 @@ def ces(ensembles,
          [0.25331629 0.        ]]
 
     """
+    if clustering_method is None:
+        clustering_method = AffinityPropagationNative(
+            preference=-1.0,
+            max_iter=500,
+            convergence_iter=50,
+            damping=0.9,
+            add_noise=True,
+        )
 
     for ensemble in ensembles:
         ensemble.transfer_to_memory()
@@ -1218,22 +1223,18 @@ def ces(ensembles,
     return values, details
 
 
-def dres(ensembles,
-         select="name CA",
-         dimensionality_reduction_method = StochasticProximityEmbeddingNative(
-             dimension=3,
-             distance_cutoff = 1.5,
-             min_lam=0.1,
-             max_lam=2.0,
-             ncycle=100,
-             nstep=10000),
-         distance_matrix=None,
-         nsamples=1000,
-         estimate_error=False,
-         bootstrapping_samples=100,
-         ncores=1,
-         calc_diagonal=False,
-         allow_collapsed_result=True):
+def dres(
+    ensembles,
+    select="name CA",
+    dimensionality_reduction_method=None,
+    distance_matrix=None,
+    nsamples=1000,
+    estimate_error=False,
+    bootstrapping_samples=100,
+    ncores=1,
+    calc_diagonal=False,
+    allow_collapsed_result=True,
+):
     """
 
     Calculates the Dimensional Reduction Ensemble Similarity (DRES) between
@@ -1354,6 +1355,15 @@ def dres(ensembles,
     :mod:`MDAnalysis.analysis.encore.dimensionality_reduction.reduce_dimensionality``
 
     """
+    if dimensionality_reduction_method is None:
+        dimensionality_reduction_method = StochasticProximityEmbeddingNative(
+            dimension=3,
+            distance_cutoff=1.5,
+            min_lam=0.1,
+            max_lam=2.0,
+            ncycle=100,
+            nstep=10000,
+        )
 
     for ensemble in ensembles:
         ensemble.transfer_to_memory()
@@ -1484,16 +1494,13 @@ def dres(ensembles,
     return values, details
 
 
-def ces_convergence(original_ensemble,
-                    window_size,
-                    select="name CA",
-                    clustering_method=AffinityPropagationNative(
-                        preference=-1.0,
-                        max_iter=500,
-                        convergence_iter=50,
-                        damping=0.9,
-                        add_noise=True),
-                    ncores=1):
+def ces_convergence(
+    original_ensemble,
+    window_size,
+    select="name CA",
+    clustering_method=None,
+    ncores=1,
+):
     """
     Use the CES to evaluate the convergence of the ensemble/trajectory.
     CES will be calculated between the whole trajectory contained in an
@@ -1559,6 +1566,14 @@ def ces_convergence(original_ensemble,
          [0.        ]]
 
     """
+    if clustering_method is None:
+        clustering_method = AffinityPropagationNative(
+            preference=-1.0,
+            max_iter=500,
+            convergence_iter=50,
+            damping=0.9,
+            add_noise=True,
+        )
 
     ensembles = prepare_ensembles_for_convergence_increasing_window(
         original_ensemble, window_size, select=select)
@@ -1584,20 +1599,14 @@ def ces_convergence(original_ensemble,
     return out
 
 
-def dres_convergence(original_ensemble,
-                     window_size,
-                     select="name CA",
-                     dimensionality_reduction_method = \
-                            StochasticProximityEmbeddingNative(
-                                dimension=3,
-                                distance_cutoff=1.5,
-                                min_lam=0.1,
-                                max_lam=2.0,
-                                ncycle=100,
-                                nstep=10000
-                            ),
-                     nsamples=1000,
-                     ncores=1):
+def dres_convergence(
+    original_ensemble,
+    window_size,
+    select="name CA",
+    dimensionality_reduction_method=None,
+    nsamples=1000,
+    ncores=1,
+):
     """
     Use the DRES to evaluate the convergence of the ensemble/trajectory.
     DRES will be calculated between the whole trajectory contained in an
@@ -1660,6 +1669,15 @@ def dres_convergence(original_ensemble,
     much the trajectory keeps on resampling the same ares of the conformational
     space, and therefore of convergence.
     """
+    if dimensionality_reduction_method is None:
+        dimensionality_reduction_method = StochasticProximityEmbeddingNative(
+            dimension=3,
+            distance_cutoff=1.5,
+            min_lam=0.1,
+            max_lam=2.0,
+            ncycle=100,
+            nstep=10000,
+        )
 
     ensembles = prepare_ensembles_for_convergence_increasing_window(
         original_ensemble, window_size, select=select)
diff --git a/package/MDAnalysis/analysis/helix_analysis.py b/package/MDAnalysis/analysis/helix_analysis.py
index 9c287fb750..1b1bdf9ce3 100644
--- a/package/MDAnalysis/analysis/helix_analysis.py
+++ b/package/MDAnalysis/analysis/helix_analysis.py
@@ -186,7 +186,7 @@ def local_screw_angles(global_axis, ref_axis, helix_directions):
     return np.rad2deg(to_ortho)
 
 
-def helix_analysis(positions, ref_axis=[0, 0, 1]):
+def helix_analysis(positions, ref_axis=(0, 0, 1)):
     r"""
     Calculate helix properties from atomic coordinates.
 
@@ -387,11 +387,18 @@ class HELANAL(AnalysisBase):
         'local_screw_angles': (-2,),
     }
 
-    def __init__(self, universe, select='name CA', ref_axis=[0, 0, 1],
-                 verbose=False, flatten_single_helix=True,
-                 split_residue_sequences=True):
-        super(HELANAL, self).__init__(universe.universe.trajectory,
-                                      verbose=verbose)
+    def __init__(
+        self,
+        universe,
+        select="name CA",
+        ref_axis=(0, 0, 1),
+        verbose=False,
+        flatten_single_helix=True,
+        split_residue_sequences=True,
+    ):
+        super(HELANAL, self).__init__(
+            universe.universe.trajectory, verbose=verbose
+        )
         selections = util.asiterable(select)
         atomgroups = [universe.select_atoms(s) for s in selections]
         consecutive = []
diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py
index 7fed2cde8c..504d07c02c 100644
--- a/package/MDAnalysis/core/universe.py
+++ b/package/MDAnalysis/core/universe.py
@@ -1474,9 +1474,16 @@ def _fragdict(self):
         return fragdict
 
     @classmethod
-    def from_smiles(cls, smiles, sanitize=True, addHs=True,
-                    generate_coordinates=True, numConfs=1,
-                    rdkit_kwargs={}, **kwargs):
+    def from_smiles(
+        cls,
+        smiles,
+        sanitize=True,
+        addHs=True,
+        generate_coordinates=True,
+        numConfs=1,
+        rdkit_kwargs=None,
+        **kwargs,
+    ):
         """Create a Universe from a SMILES string with rdkit
 
         Parameters
@@ -1530,6 +1537,8 @@ def from_smiles(cls, smiles, sanitize=True, addHs=True,
         .. versionadded:: 2.0.0
 
         """
+        if rdkit_kwargs is None:
+            rdkit_kwargs = {}
         try:
             from rdkit import Chem
             from rdkit.Chem import AllChem
diff --git a/package/MDAnalysis/topology/ITPParser.py b/package/MDAnalysis/topology/ITPParser.py
index 83b43711c8..9968f854df 100644
--- a/package/MDAnalysis/topology/ITPParser.py
+++ b/package/MDAnalysis/topology/ITPParser.py
@@ -429,8 +429,10 @@ def shift_indices(self, atomid=0, resid=0, molnum=0, cgnr=0, n_res=0, n_atoms=0)
 
         return atom_order, new_params, molnums, self.moltypes, residx
 
-    def add_param(self, line, container, n_funct=2, funct_values=[]):
+    def add_param(self, line, container, n_funct=2, funct_values=None):
         """Add defined GROMACS directive lines, only if the funct is in ``funct_values``"""
+        if funct_values is None:
+            funct_values = []
         values = line.split()
         funct = int(values[n_funct])
         if funct in funct_values:

From 5e2c1040da8b75b31d96daaea0ecb7a9f2055fae Mon Sep 17 00:00:00 2001
From: Tanish Yelgoe <23110328@iitgn.ac.in>
Date: Fri, 13 Dec 2024 19:47:40 +0530
Subject: [PATCH 2/2] Updated CHANGELOG

---
 package/CHANGELOG | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/package/CHANGELOG b/package/CHANGELOG
index 83bff74002..9af08b1a42 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -14,11 +14,13 @@ The rules for this file:
 
 
 -------------------------------------------------------------------------------
-??/??/?? IAlibay, ChiahsinChu, RMeli
+??/??/?? IAlibay, ChiahsinChu, RMeli, tanishy7777
 
  * 2.9.0
 
 Fixes
+ * Replaced mutable defaults with None and initialized them within
+   the function to prevent shared state. (Issue #4655)
 
 Enhancements
  * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824)