Integrated rpy2 into tests -> fix

Add rpy2 and r-bio3d to pyproject.toml Added correct versions in .toml Revert "Integrated rpy2 into tests -> fix" This reverts commit 8133498. Added rpy2/r-bio3d -> fix Revert "Added correct versions in .toml" This reverts commit 02562ee. Revert "Add rpy2 and r-bio3d to pyproject.toml" This reverts commit ecc420f. Update .gitignore Added rpy2/r-bio3d to GitHub-Workflow Removed mem_eff argument for DCC
biotite-dev · Feb 22, 2024 · e7941d5 · e7941d5
1 parent 21018c4
commit e7941d5
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 70 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -19,10 +19,10 @@ jobs:
           auto-update-conda: true
           python-version: '3.10'
       - name: Installing dependencies
-        run: conda install -c conda-forge poetry prody pytest
+        run: conda install -c conda-forge poetry prody pytest r-bio3d rpy2
       - name: Building distribution
         run: poetry build -f wheel
       - name: Installing distribution
         run: pip install ./dist/*.whl
       - name: Testing code
-        run: pytest
+        run: pytest
diff --git a/.gitignore b/.gitignore
@@ -137,6 +137,7 @@ dmypy.json
 # VS Code
 *.code-workspace
 .vscode/*
+profile.json
 
 # Temporarily disable freezing of dependency versions
 poetry.lock

diff --git a/profile.json b/profile.json
diff --git a/src/springcraft/nma.py b/src/springcraft/nma.py
@@ -228,8 +228,7 @@ def bfactor(enm, mode_subset=None, tem=None,
 
     return b_factors
 
-def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B, 
-            mem_eff=True):
+def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B):
     r"""
     Computes the normalized *dynamic cross-correlation* between 
     nodes of the GNM/ANM.
@@ -256,10 +255,6 @@ def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B,
     tem_factors : int, float, optional
         Factors included in temperature weighting 
         (with :math:`k_B` as preset).
-    mem_eff : bool, optional
-        Select between a memory-efficient (True; standard setting)
-        and inefficient, but potentially faster method implementation
-        (False).
         
     Returns
     -------
@@ -323,28 +318,10 @@ def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B,
             "Trivial modes are included in the current selection."
             " Please check your input."
             )
-
-    eig_values = eig_values[mode_subset]
-    eig_vectors = eig_vectors[mode_subset]
-
-    # Reshape array of eigenvectors
-    # (k,3n) -> (k,n,3) for ANMs; (k,n) -> (k,n,1) for GNMs
-    modes_reshaped = np.reshape(
-                        eig_vectors, (len(mode_subset), -1, num_dim)
-                        )
-
-    # Memory inefficient: Large NumPy-Arrays
-    if not mem_eff:
-        # Create residue modes matrix (3N -> N for ANMs)
-        modes_mat_n = modes_reshaped[:, :, np.newaxis, :] *\
-                      modes_reshaped[:, np.newaxis, :, :]
-        modes_mat_n = np.sum(modes_mat_n, axis=-1)
-        modes_mat_n = modes_mat_n / eig_values[:, np.newaxis, np.newaxis]
-        dcc = np.sum(modes_mat_n, axis=0)
 
     ## Shortcut if all modes are included in computations
     # GNM -> DCC corresponds to inverted Kirchhoff 
-    elif is_gnm and all_modes:
+    if is_gnm and all_modes:
         dcc = enm.covariance
     # ANM -> ...to the trace of the inverted Hessian's 3x3 superelements
     elif all_modes:
@@ -359,6 +336,14 @@ def dcc(enm, mode_subset=None, norm=True, tem=None, tem_factors=K_B,
         dcc = np.einsum("...ii->...", reshaped)
     # Slower method for custom mode range
     else:
+        eig_values = eig_values[mode_subset]
+        eig_vectors = eig_vectors[mode_subset]
+
+        # Reshape array of eigenvectors
+        # (k,3n) -> (k,n,3) for ANMs; (k,n) -> (k,n,1) for GNMs
+        modes_reshaped = np.reshape(
+                            eig_vectors, (len(mode_subset), -1, num_dim)
+                            )
         dcc = np.zeros((n_nodes, n_nodes))
         for ev, evec in zip(eig_values, modes_reshaped):
             dcc += (evec @ evec.T) / ev

diff --git a/tests/test_forcefield.py b/tests/test_forcefield.py
@@ -11,46 +11,78 @@
 from rpy2.robjects.packages import importr
 from rpy2.robjects.vectors import DataFrame as DataFrame_r
 from rpy2.robjects import numpy2ri
+from rpy2.robjects.conversion import localconverter
 from .util import data_dir
 
 # Load bio3d in R
 bio3d = importr("bio3d")
 
-def coords_to_rnumvec(coords):
-    # Activate automatic Numpy <-> R interconversion
-    numpy2ri.activate()
-    # -> To R matrix
-    r_mat = robjects.r.matrix(robjects.FloatVector(coords.ravel()), 
-                                 nrow=coords.shape[0], 
-                                 ncol=coords.shape[1]
-                                )
-    # Concatenate Matrix to vector -> Flatten
-    r_as_vec = robjects.r["as.vector"]
-    r_vec = r_as_vec(r_mat)
-    numpy2ri.deactivate()
-
-    return r_vec
-
 # Convert AtomArray into bio3d-PDB objects
 def aarray_to_bio3d(aarray):
     coords = aarray.coord
+
+    # Resid names
+    residue_starts = struc.get_residue_starts(aarray)
+    res_names = aarray.res_name[residue_starts]
 
-    # Activate automatic Numpy <-> R interconversion
-    numpy2ri.activate()    
-    xyz_r = robjects.r.matrix(robjects.FloatVector(aarray.coord.ravel()), 
-                             nrow=coords.shape[0], 
-                             ncol=coords.shape[1]
+
+    xyz_r = robjects.r.matrix(robjects.FloatVector(coords.ravel()), 
+                             nrow = 1
                             )
-    res_names_r = robjects.StrVector(aarray.res_name)
-    chain_r = robjects.StrVector(aarray.chain_id)
+
+    assert not any(aarray.hetero)
+    # Local converter for Numpy -> R conversion
+    with localconverter(robjects.default_converter + numpy2ri.converter):
+        # TODO: HETATM should also be included 
+        #       -> AtomArrays should only contain protein atoms here for now
+        type_r = robjects.StrVector(["ATOM"]*len(aarray)) 
+        atom_id_r = robjects.IntVector(np.arange(1, len(aarray) + 1))
+        atom_names_r = robjects.StrVector(aarray.atom_name)
+        alt_r = robjects.StrVector(["NA"]*len(aarray))
+        res_names_r = robjects.StrVector(aarray.res_name)
+        chain_r = robjects.StrVector(aarray.chain_id)
+        resid_r = robjects.IntVector(aarray.res_id)
+        x_r = robjects.IntVector(coords[:,0])
+        y_r = robjects.IntVector(coords[:,1])
+        z_r = robjects.IntVector(coords[:,2])
+        o_r = robjects.IntVector([1]*len(aarray))
+        b_r = robjects.IntVector([0]*len(aarray))
 
     # Create a R dataframe
     # equivalent of res_name is resid in bio3d 
-    atoms_r = DataFrame_r({"resid": res_names_r, "chain": chain_r})
+    atoms_r = DataFrame_r({
+                            "type": type_r,
+                            "eleno": atom_id_r,
+                            "elety": atom_names_r,
+                            "alt": alt_r,
+                            "resid": res_names_r, 
+                            "chain": chain_r,
+                            "resno": resid_r,
+                            "x": x_r,
+                            "y": y_r,
+                            "z": z_r,
+                            "o": o_r,
+                            "b": b_r
+                        })
+
+    # Create bio3d PDB -> ListVector (list in R)
+    pdb_bio3d = robjects.ListVector({"xyz": xyz_r, 
+                                     "atom": atoms_r,
+                                     "calpha": robjects.NULL,
+                                     })
+    # Create S3 R class object
+    pdb_bio3d.rclass = robjects.StrVector(["pdb", "sse"])
+
+    # Get indices of Calpha atoms, as required by bio3d
+    ca = aarray[aarray.atom_name=="CA"]
+    ca_inds = ca.res_id
+    seq_all_atoms = np.arange(aarray.res_id[0], aarray.res_id[-1] + 1)
 
-    # Create PDB as ListVector (bio3d)
-    pdb_bio3d = robjects.ListVector({"xyz": xyz_r, "atom": atoms_r})
-    numpy2ri.deactivate()
+    # Local converter for Numpy -> R conversion
+    with localconverter(robjects.default_converter + numpy2ri.converter):
+        pdb_bio3d.rx2["calpha"] = np.isin(
+                        seq_all_atoms, ca_inds
+                        )
 
     return pdb_bio3d
 
@@ -454,35 +486,25 @@ def test_compare_with_bio3d(atoms_singlechain, ff_name):
     atoms_singlechain
     if ff_name == "Hinsen":
         ff = springcraft.HinsenForceField()
-        ff_bio3d_str = "calpha"
-        #ref_file = "hessian_calpha_bio3d.csv"
+        ff_bio3d_str = 'calpha'
     if ff_name == "sdENM":
         ff = springcraft.TabulatedForceField.sd_enm(atoms_singlechain)
         ff_bio3d_str = "sdenm"
-        #ref_file = "hessian_sdenm_bio3d.csv"
     if ff_name == "pfENM":
-        #ref_file = "hessian_pfenm_bio3d.csv"
         ff = springcraft.ParameterFreeForceField()
-        ff_bio3d_str = "pfenm"
+        ff_bio3d_str = "pfanm"
 
-    coord_vec_r = coords_to_rnumvec(atoms_singlechain.coord)
     ff_bio3d = bio3d.load_enmff(ff=ff_bio3d_str)
     pdb_bio3d = aarray_to_bio3d(atoms_singlechain)
-    print(type(pdb_bio3d))
-    ref_hessian = bio3d.build_hessian(coord_vec_r, 
-                                      pfc_fun=ff_bio3d, 
+    ref_hessian = bio3d.build_hessian(pdb_bio3d.rx2("xyz"), 
+                                      pfc_fun=ff_bio3d,
                                       pdb=pdb_bio3d
                                     )
 
     test_hessian, _ = springcraft.compute_hessian(atoms_singlechain.coord, ff)
 
-    #ref_hessian = np.genfromtxt(
-    #    join(data_dir(), ref_file),
-    #    skip_header=1, delimiter=","
-    #)
-
     # Higher deviation for Hinsen-FF
     if ff_name == "Hinsen":
-        assert np.allclose(test_hessian, ref_hessian, atol=1e-04)
+        assert np.allclose(test_hessian, np.asarray(ref_hessian), atol=1e-04)
     else:
-        assert np.allclose(test_hessian, ref_hessian)
+        assert np.allclose(test_hessian, np.asarray(ref_hessian))