Add pymatgen.io.openff module (#3729)

* Add functionality and tests for pymatgen.io.openff. * Add test files. * Update testing fixture with correct test file paths. * Update docstrings to google format. * Add openff install from source to test.yml * Lint with black. * Small formatting change. * Try changing test.yml * Add manual install of units. * Add import warning. * Try adding install for openff-utilities. * Add openbabel install. * Add openbabel dependency to setup.py * Change openbabel version * fix babel dep. * Try building babel from source. * Remove openbabel from setup.py and requirements-optional.txt. * Attempt new testing build with micromamba. * Remove bader and enumlib * Update testing file * Try chaning testing to install pytest. * Run testing with micromamba. * Fix minor typo. * Try switching to editable install. * Try changing TEST_FILES_DIR * Undo previous change * Add manual builds of packmol, enumlib, and bader. * Switch to openff-toolkit-base * Convert manual builds of enumlib, packmol, bader, and openbabel to conda installs. * Try limiting windows openbabel install. * Skip openff tests if openff-toolkit not installed * Try adding uv and skipping tests. * Fix typo * Add non-failing optional import. * Add importorskip call. * Fix import or skip and reimport bson. * Reinstall pymongo in CI to fix bson error. Remove commented out manual installs. * Fix errors in babel.py * Remove unneeded step. * fix TestMoleculeGraph.test_construction expected error message on using inappropriate strategy for molecules * swap assert_array_equal for assert_allclose in test_outputs.py, maybe fixes TestQCOutput.test_all * cast coords to float in BabelMolAdaptor ob_atom.SetVector * Refactor assert statements in test_outputs.py to use approx() for dicts * Coerce formal charge to int in molgraph_to_openff_mol * rename micromamba to pmg * micromamba activate pmg * remove pymongo install, should not be needed? * micromamba activate pmg in pytest step * set shell: bash -l {0} for mamba env activate * replace hard-to-install bson.BSON (93406b8) with pymatgen-native assert_msonable * replace isinstance with issubclass in assert_msonable * assert_msonable replace obj.to_json() with json.dumps(obj.as_dict(), cls=MontyEncoder) * skip failing TestQCOutput.test_all remove unused self.maxDiff = None * rename mol(''->_)graph_to_openff_mol * missed some --------- Co-authored-by: Janosh Riebesell <janosh.riebesell@gmail.com>
materialsproject · Apr 10, 2024 · 7064c43 · 7064c43
1 parent 55869a1
commit 7064c43
Show file tree

Hide file tree

Showing 28 changed files with 698 additions and 116 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -19,6 +19,9 @@ jobs:
   test:
     # prevent this action from running on forks
     if: github.repository == 'materialsproject/pymatgen'
+    defaults:
+      run:
+        shell: bash -l {0} # enables conda/mamba env activation by reading bash profile
     strategy:
       fail-fast: false
       matrix:
@@ -48,69 +51,41 @@ jobs:
       - name: Check out repo
         uses: actions/checkout@v4
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-          cache: pip
-          cache-dependency-path: setup.py
+      - name: Set up micromamba
+        uses: mamba-org/setup-micromamba@main
+
+      - name: Create mamba environment
+        run: |
+          micromamba create -n pmg python=${{ matrix.python-version }} --yes
 
       - name: Install uv
-        run: pip install uv
+        run: micromamba run -n pmg pip install uv
 
       - name: Copy GULP to bin
         if: matrix.os == 'ubuntu-latest'
         run: |
           sudo cp cmd_line/gulp/Linux_64bit/* /usr/local/bin/
 
-      - name: Install Bader
+      - name: Install ubuntu-only conda dependencies
         if: matrix.os == 'ubuntu-latest'
         run: |
-          wget https://theory.cm.utexas.edu/henkelman/code/bader/download/bader_lnx_64.tar.gz
-          tar xvzf bader_lnx_64.tar.gz
-          sudo mv bader /usr/local/bin/
-        continue-on-error: true # This is not critical to succeed.
+          micromamba install -n pmg -c conda-forge enumlib packmol bader openbabel openff-toolkit --yes
 
-      - name: Install Enumlib
-        if: matrix.os == 'ubuntu-latest'
-        run: |
-          git clone --recursive https://github.com/msg-byu/enumlib
-          cd enumlib/symlib/src
-          export F90=gfortran
-          make
-          cd ../../src
-          make enum.x
-          sudo mv enum.x /usr/local/bin/
-          cd ..
-          sudo cp aux_src/makeStr.py /usr/local/bin/
-        continue-on-error: true # This is not critical to succeed.
-
-      - name: Install Packmol
-        if: matrix.os == 'ubuntu-latest'
-        run: |
-          wget -O packmol.tar.gz https://github.com/m3g/packmol/archive/refs/tags/v20.14.2.tar.gz
-          tar xvzf packmol.tar.gz
-          export F90=gfortran
-          cd packmol-20.14.2
-          ./configure
-          make
-          sudo mv packmol /usr/local/bin/
-          cd ..
-        continue-on-error: true # This is not critical to succeed.
-
-      - name: Install dependencies
+      - name: Install pymatgen and dependencies
         run: |
+          micromamba activate pmg
           # TODO remove temporary fix. added since uv install torch is flaky.
           # track https://github.com/astral-sh/uv/issues/1921 for resolution
           pip install torch
 
-          uv pip install numpy cython --system
+          uv pip install numpy cython
 
-          uv pip install -e '.[dev,optional]' --system
+          uv pip install --editable '.[dev,optional]'
 
           # TODO remove next line installing ase from main branch when FrechetCellFilter is released
-          uv pip install --upgrade 'git+https://gitlab.com/ase/ase' --system
+          uv pip install --upgrade 'git+https://gitlab.com/ase/ase'
 
       - name: pytest split ${{ matrix.split }}
         run: |
+          micromamba activate pmg
           pytest --splits 10 --group ${{ matrix.split }} --durations-path tests/files/.pytest-split-durations tests
diff --git a/pymatgen/analysis/bond_dissociation.py b/pymatgen/analysis/bond_dissociation.py
@@ -246,9 +246,9 @@ def fragment_and_process(self, bonds):
     def search_fragment_entries(self, frag) -> list:
         """
         Search all fragment entries for those isomorphic to the given fragment.
-        We distinguish between entries where both initial and final molgraphs are isomorphic to the
-        given fragment (entries) vs those where only the initial molgraph is isomorphic to the given
-        fragment (initial_entries) vs those where only the final molgraph is isomorphic (final_entries).
+        We distinguish between entries where both initial and final MoleculeGraphs are isomorphic to the
+        given fragment (entries) vs those where only the initial MoleculeGraph is isomorphic to the given
+        fragment (initial_entries) vs those where only the final MoleculeGraph is isomorphic (final_entries).
 
         Args:
             frag: Fragment

diff --git a/pymatgen/analysis/functional_groups.py b/pymatgen/analysis/functional_groups.py
@@ -106,16 +106,16 @@ def get_heteroatoms(self, elements=None):
         Returns:
             set of ints representing node indices
         """
-        heteroatoms = set()
+        hetero_atoms = set()
 
         for node in self.molgraph.graph.nodes():
             if elements is not None:
                 if str(self.species[node]) in elements:
-                    heteroatoms.add(node)
+                    hetero_atoms.add(node)
             elif str(self.species[node]) not in ["C", "H"]:
-                heteroatoms.add(node)
+                hetero_atoms.add(node)
 
-        return heteroatoms
+        return hetero_atoms
 
     def get_special_carbon(self, elements=None):
         """

diff --git a/pymatgen/io/abinit/abiobjects.py b/pymatgen/io/abinit/abiobjects.py
@@ -386,7 +386,7 @@ def as_dict(self):
     @classmethod
     def from_dict(cls, dct: dict) -> Self:
         """Build object from dict."""
-        return cls(**{k: dct[k] for k in dct if k in cls._fields})
+        return cls(**{key: dct[key] for key in dct if key in cls._fields})
 
 
 # An handy Multiton

diff --git a/pymatgen/io/babel.py b/pymatgen/io/babel.py
@@ -70,7 +70,7 @@ def __init__(self, mol: Molecule | openbabel.OBMol | pybel.Molecule) -> None:
                 ob_atom = openbabel.OBAtom()
                 ob_atom.thisown = 0
                 ob_atom.SetAtomicNum(atom_no)
-                ob_atom.SetVector(*coords)
+                ob_atom.SetVector(*map(float, coords))
                 ob_mol.AddAtom(ob_atom)
                 del ob_atom
             ob_mol.ConnectTheDots()
@@ -182,14 +182,14 @@ def rotor_conformer(self, *rotor_args, algo: str = "WeightedRotorSearch", forcef
         else:
             self.add_hydrogen()
 
-        ff = openbabel.OBForceField_FindType(forcefield)
+        ff = openbabel.OBForceField.FindType(forcefield)
         if ff == 0:
             warnings.warn(
                 f"This input {forcefield=} is not supported "
                 "in openbabel. The forcefield will be reset as "
                 "default 'mmff94' for now."
             )
-            ff = openbabel.OBForceField_FindType("mmff94")
+            ff = openbabel.OBForceField.FindType("mmff94")
 
         try:
             rotor_search = getattr(ff, algo)
@@ -264,10 +264,10 @@ def confab_conformers(
         else:
             self.add_hydrogen()
 
-        ff = openbabel.OBForceField_FindType(forcefield)
+        ff = openbabel.OBForceField.FindType(forcefield)
         if ff == 0:
             print(f"Could not find {forcefield=} in openbabel, the forcefield will be reset as default 'mmff94'")
-            ff = openbabel.OBForceField_FindType("mmff94")
+            ff = openbabel.OBForceField.FindType("mmff94")
 
         if freeze_atoms:
             print(f"{len(freeze_atoms)} atoms will be freezed")
@@ -346,8 +346,8 @@ def from_molecule_graph(cls, mol: MoleculeGraph) -> Self:
         """
         return cls(mol.molecule)
 
-    @needs_openbabel
     @classmethod
+    @needs_openbabel
     def from_str(cls, string_data: str, file_format: str = "xyz") -> Self:
         """
         Uses OpenBabel to read a molecule from a string in all supported