Merge branch 'master' into breaking-ptable-rare-earth

Signed-off-by: Shyue Ping Ong <shyuep@users.noreply.github.com>
materialsproject · May 30, 2024 · a9a6c59 · a9a6c59
2 parents 83ba373 + a772ddb
commit a9a6c59
Show file tree

Hide file tree

Showing 11 changed files with 81 additions and 39 deletions.
diff --git a/pymatgen/analysis/magnetism/jahnteller.py b/pymatgen/analysis/magnetism/jahnteller.py
@@ -345,10 +345,10 @@ def _get_number_of_d_electrons(species: Species) -> float:
         # TODO: replace with more generic Hund's rule algorithm?
 
         # taken from get_crystal_field_spin
-        elec = species.full_electronic_structure
+        elec = species.element.full_electronic_structure
         if len(elec) < 4 or elec[-1][1] != "s" or elec[-2][1] != "d":
             raise AttributeError(f"Invalid element {species.symbol} for crystal field calculation.")
-        n_electrons = int(elec[-1][2] + elec[-2][2] - species.oxi_state)
+        n_electrons = int(elec[-1][2] + elec[-2][2] - species.oxi_state)  # type: ignore
         if n_electrons < 0 or n_electrons > 10:
             raise AttributeError(f"Invalid oxidation state {species.oxi_state} for element {species.symbol}")
 

diff --git a/pymatgen/core/periodic_table.py b/pymatgen/core/periodic_table.py
@@ -1075,6 +1075,23 @@ def spin(self) -> float | None:
         """Spin of Species."""
         return self._spin
 
+    @property
+    def full_electronic_structure(self) -> list[tuple[int, str, int]]:
+        """Full electronic structure as tuple. Not implemented for Species as of now."""
+        raise NotImplementedError
+
+    @property
+    def electronic_structure(self) -> list[tuple[int, str, int]]:
+        """Electronic structure as tuple. Not implemented for Species as of now."""
+        raise NotImplementedError
+
+    @property
+    def valence(self) -> tuple[int | np.nan, int]:
+        """Valence subshell angular moment (L) and number of valence e- (v_e),
+        obtained from full electron config. Not implemented for Species as of now.
+        """
+        raise NotImplementedError
+
     @property
     def ionic_radius(self) -> float | None:
         """Ionic radius of specie. Returns None if data is not present."""
@@ -1224,11 +1241,11 @@ def get_crystal_field_spin(
         if coordination not in ("oct", "tet") or spin_config not in ("high", "low"):
             raise ValueError("Invalid coordination or spin config")
 
-        elec = self.full_electronic_structure
+        elec = self.element.full_electronic_structure
         if len(elec) < 4 or elec[-1][1] != "s" or elec[-2][1] != "d":
             raise AttributeError(f"Invalid element {self.symbol} for crystal field calculation")
 
-        n_electrons = elec[-1][2] + elec[-2][2] - self.oxi_state
+        n_electrons = elec[-1][2] + elec[-2][2] - self.oxi_state  # type: ignore
         if n_electrons < 0 or n_electrons > 10:
             raise AttributeError(f"Invalid oxidation state {self.oxi_state} for element {self.symbol}")
 

diff --git a/pymatgen/core/surface.py b/pymatgen/core/surface.py
@@ -946,7 +946,7 @@ def __init__(
                 the c direction is parallel to the third lattice vector
         """
 
-        def reduce_vector(vector: tuple[int, int, int]) -> tuple[int, int, int]:
+        def reduce_vector(vector: MillerIndex) -> MillerIndex:
             """Helper function to reduce vectors."""
             divisor = abs(reduce(gcd, vector))  # type: ignore[arg-type]
             return cast(tuple[int, int, int], tuple(int(idx / divisor) for idx in vector))
@@ -1201,6 +1201,7 @@ def get_slabs(
         max_broken_bonds: int = 0,
         symmetrize: bool = False,
         repair: bool = False,
+        ztol: float = 0,
     ) -> list[Slab]:
         """Generate slabs with shift values calculated from the internal
         calculate_possible_shifts method. If the user decide to avoid breaking
@@ -1222,6 +1223,8 @@ def get_slabs(
             repair (bool): Whether to repair terminations with broken bonds (True)
                 or just omit them (False). Default to False as repairing terminations
                 can lead to many more possible slabs.
+            ztol (float): Fractional tolerance for determine overlapping z-ranges,
+                smaller ztol might result in more possible Slabs.
 
         Returns:
             list[Slab]: All possible Slabs of a particular surface,
@@ -1282,7 +1285,10 @@ def gen_possible_shifts(ftol: float) -> list[float]:
 
             return sorted(shifts)
 
-        def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]) -> list[tuple[float, float]]:
+        def get_z_ranges(
+            bonds: dict[tuple[Species | Element, Species | Element], float],
+            ztol: float,
+        ) -> list[tuple[float, float]]:
             """Collect occupied z ranges where each z_range is a (lower_z, upper_z) tuple.
 
             This method examines all sites in the oriented unit cell (OUC)
@@ -1292,7 +1298,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]
 
             Args:
                 bonds (dict): A {(species1, species2): max_bond_dist} dict.
-                tol (float): Fractional tolerance for determine overlapping positions.
+                ztol (float): Fractional tolerance for determine overlapping z-ranges.
             """
             # Sanitize species in dict keys
             bonds = {(get_el_sp(s1), get_el_sp(s2)): dist for (s1, s2), dist in bonds.items()}
@@ -1315,15 +1321,13 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]
                                     z_ranges.extend([(0, z_range[1]), (z_range[0] + 1, 1)])
 
                                 # Neglect overlapping positions
-                                elif z_range[0] != z_range[1]:
-                                    # TODO (@DanielYang59): use the following for equality check
-                                    # elif not isclose(z_range[0], z_range[1], abs_tol=tol):
+                                elif not isclose(z_range[0], z_range[1], abs_tol=ztol):
                                     z_ranges.append(z_range)
 
             return z_ranges
 
         # Get occupied z_ranges
-        z_ranges = [] if bonds is None else get_z_ranges(bonds)
+        z_ranges = [] if bonds is None else get_z_ranges(bonds, ztol)
 
         slabs = []
         for shift in gen_possible_shifts(ftol=ftol):
@@ -1350,7 +1354,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]
         # Filter out surfaces that might be the same
         matcher = StructureMatcher(ltol=tol, stol=tol, primitive_cell=False, scale=False)
 
-        final_slabs = []
+        final_slabs: list[Slab] = []
         for group in matcher.group_structures(slabs):
             # For each unique slab, symmetrize the
             # surfaces by removing sites from the bottom
@@ -1365,7 +1369,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]
             matcher_sym = StructureMatcher(ltol=tol, stol=tol, primitive_cell=False, scale=False)
             final_slabs = [group[0] for group in matcher_sym.group_structures(final_slabs)]
 
-        return sorted(final_slabs, key=lambda slab: slab.energy)  # type: ignore[return-value, arg-type]
+        return cast(list[Slab], sorted(final_slabs, key=lambda slab: slab.energy))
 
     def repair_broken_bonds(
         self,
@@ -2127,7 +2131,7 @@ def math_lcm(a: int, b: int) -> int:
     if len([i for i in transf_hkl if i < 0]) > 1:
         transf_hkl *= -1
 
-    return tuple(transf_hkl)  # type: ignore[return-value]
+    return tuple(transf_hkl)
 
 
 def miller_index_from_sites(

diff --git a/pymatgen/io/cif.py b/pymatgen/io/cif.py
@@ -323,9 +323,9 @@ def __init__(
         """
         Args:
             filename (PathLike): CIF file, gzipped or bzipped CIF files are fine too.
-            occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the
-                occupancies will be scaled down to 1.
-            site_tolerance (float): This tolerance is used to determine if two sites are in the same position,
+            occupancy_tolerance (float): If total occupancy of a site is between
+                1 and occupancy_tolerance, it will be scaled down to 1.
+            site_tolerance (float): This tolerance is used to determine if two sites are at the same position,
                 in which case they will be combined to a single disordered site. Defaults to 1e-4.
             frac_tolerance (float): This tolerance is used to determine is a coordinate should be rounded to an ideal
                 value. e.g. 0.6667 is rounded to 2/3. This is desired if symmetry operations are going to be applied.
@@ -1027,11 +1027,11 @@ def get_matching_coord(
 
             # Get occupancy
             try:
-                occu = str2float(data["_atom_site_occupancy"][idx])
+                occu: float = str2float(data["_atom_site_occupancy"][idx])
             except (KeyError, ValueError):
                 occu = 1
 
-            # If check_occu is True or the occupancy is greater than 0, create comp_d
+            # If don't check_occu or the occupancy is greater than 0, create comp_dict
             if not check_occu or occu > 0:
                 # Create site coordinate
                 coord: Vector3D = (
@@ -1073,7 +1073,7 @@ def get_matching_coord(
 
         if any(occu > 1 for occu in _sum_occupancies):
             msg = (
-                f"Some occupancies ({_sum_occupancies}) sum to > 1! If they are within "
+                f"Some occupancies ({filter(lambda x: x<=1, _sum_occupancies)}) sum to > 1! If they are within "
                 "the occupancy_tolerance, they will be rescaled. "
                 f"The current occupancy_tolerance is set to: {self._occupancy_tolerance}"
             )
@@ -1149,7 +1149,10 @@ def get_matching_coord(
             all_species_noedit = all_species.copy()  # save copy before scaling in case of check_occu=False, used below
             for idx, species in enumerate(all_species):
                 total_occu = sum(species.values())
-                if 1 < total_occu <= self._occupancy_tolerance:
+                if check_occu and total_occu > self._occupancy_tolerance:
+                    raise ValueError(f"Occupancy {total_occu} exceeded tolerance.")
+
+                if total_occu > 1:
                     all_species[idx] = species / total_occu
 
         if all_species and len(all_species) == len(all_coords) and len(all_species) == len(all_magmoms):
@@ -1198,6 +1201,7 @@ def get_matching_coord(
                         all_coords[idx],
                         lattice,
                         properties=site_properties,
+                        label=all_labels[idx],
                         skip_checks=True,
                     )
 
@@ -1278,8 +1282,6 @@ def parse_structures(
                 "in the CIF file as is. If you want the primitive cell, please set primitive=True explicitly.",
                 UserWarning,
             )
-        if not check_occu:  # added in https://github.com/materialsproject/pymatgen/pull/2836
-            warnings.warn("Structures with unphysical site occupancies are not compatible with many pymatgen features.")
 
         if primitive and symmetrized:
             raise ValueError(

diff --git a/pymatgen/io/lammps/data.py b/pymatgen/io/lammps/data.py
@@ -664,17 +664,17 @@ def from_file(cls, filename: str, atom_style: str = "full", sort_id: bool = Fals
         def parse_section(sec_lines) -> tuple[str, pd.DataFrame]:
             title_info = sec_lines[0].split("#", 1)
             kw = title_info[0].strip()
-            sio = StringIO("".join(sec_lines[2:]))  # skip the 2nd line
+            str_io = StringIO("".join(sec_lines[2:]))  # skip the 2nd line
             if kw.endswith("Coeffs") and not kw.startswith("PairIJ"):
                 df_list = [
-                    pd.read_csv(StringIO(line), header=None, comment="#", delim_whitespace=True)
+                    pd.read_csv(StringIO(line), header=None, comment="#", sep=r"\s+")
                     for line in sec_lines[2:]
                     if line.strip()
                 ]
                 df = pd.concat(df_list, ignore_index=True)
                 names = ["id"] + [f"coeff{i}" for i in range(1, df.shape[1])]
             else:
-                df = pd.read_csv(sio, header=None, comment="#", delim_whitespace=True)
+                df = pd.read_csv(str_io, header=None, comment="#", sep=r"\s+")
                 if kw == "PairIJ Coeffs":
                     names = ["id1", "id2"] + [f"coeff{i}" for i in range(1, df.shape[1] - 1)]
                     df.index.name = None
@@ -1381,12 +1381,12 @@ def parse_xyz(cls, filename: str | Path) -> pd.DataFrame:
         with zopen(filename, mode="rt") as file:
             lines = file.readlines()
 
-        sio = StringIO("".join(lines[2:]))  # skip the 2nd line
+        str_io = StringIO("".join(lines[2:]))  # skip the 2nd line
         df = pd.read_csv(
-            sio,
+            str_io,
             header=None,
             comment="#",
-            delim_whitespace=True,
+            sep=r"\s+",
             names=["atom", "x", "y", "z"],
         )
         df.index += 1

diff --git a/pymatgen/io/lammps/outputs.py b/pymatgen/io/lammps/outputs.py
@@ -69,7 +69,7 @@ def from_str(cls, string: str) -> Self:
             bounds -= np.array([[min(x), max(x)], [min(y), max(y)], [0, 0]])
         box = LammpsBox(bounds, tilt)
         data_head = lines[8].replace("ITEM: ATOMS", "").split()
-        data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, delim_whitespace=True)
+        data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, sep=r"\s+")
         return cls(time_step, n_atoms, box, data)
 
     @classmethod
@@ -180,7 +180,7 @@ def _parse_thermo(lines: list[str]) -> pd.DataFrame:
             df = df[columns]
         # one line thermo data
         else:
-            df = pd.read_csv(StringIO("".join(lines)), delim_whitespace=True)
+            df = pd.read_csv(StringIO("".join(lines)), sep=r"\s+")
         return df
 
     runs = []

diff --git a/pymatgen/io/xyz.py b/pymatgen/io/xyz.py
@@ -118,9 +118,9 @@ def as_dataframe(self):
             pandas.DataFrame
         """
         lines = str(self)
-        sio = StringIO(lines)
+        str_io = StringIO(lines)
         df_xyz = pd.read_csv(
-            sio, header=None, skiprows=(0, 1), comment="#", delim_whitespace=True, names=("atom", "x", "y", "z")
+            str_io, header=None, skiprows=(0, 1), comment="#", sep=r"\s+", names=("atom", "x", "y", "z")
         )
         df_xyz.index += 1
         return df_xyz

diff --git a/pymatgen/util/provenance.py b/pymatgen/util/provenance.py
@@ -46,10 +46,10 @@ def is_valid_bibtex(reference: str) -> bool:
     """
     # str is necessary since pybtex seems to have an issue with unicode. The
     # filter expression removes all non-ASCII characters.
-    sio = StringIO(reference.encode("ascii", "ignore").decode("ascii"))
+    str_io = StringIO(reference.encode("ascii", "ignore").decode("ascii"))
     parser = bibtex.Parser()
     errors.set_strict_mode(enable=False)
-    bib_data = parser.parse_stream(sio)
+    bib_data = parser.parse_stream(str_io)
     return len(bib_data.entries) > 0
 
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 numpy==1.26.0
 sympy==1.12
-requests==2.31.0
-monty>=2024.5.24
+requests==2.32.0
+monty==2024.5.24
 ruamel.yaml==0.18.6
 scipy==1.11.3
 tabulate==0.9.0

diff --git a/tests/core/test_periodic_table.py b/tests/core/test_periodic_table.py
@@ -591,6 +591,14 @@ def test_sort(self):
         )
         assert sp.spin == 5
 
+    def test_not_implemented(self):
+        with pytest.raises(NotImplementedError):
+            _ = Species("Fe", 2).full_electronic_structure
+        with pytest.raises(NotImplementedError):
+            _ = Species("Fe", 2).electronic_structure
+        with pytest.raises(NotImplementedError):
+            _ = Species("Fe", 2).valence
+
 
 def test_get_el_sp():
     assert get_el_sp("Fe2+") == Species("Fe", 2)

diff --git a/tests/io/test_cif.py b/tests/io/test_cif.py
@@ -731,17 +731,28 @@ def test_empty(self):
         cb2 = CifBlock.from_str(str(cb))
         assert cb == cb2
 
-    def test_bad_cif(self):
+    def test_bad_occu(self):
         filepath = f"{TEST_FILES_DIR}/cif/bad_occu.cif"
         parser = CifParser(filepath)
         with pytest.raises(
-            ValueError, match="No structure parsed for section 1 in CIF.\nSpecies occupancies sum to more than 1!"
+            ValueError, match="No structure parsed for section 1 in CIF.\nOccupancy 1.556 exceeded tolerance."
         ):
             parser.parse_structures(on_error="raise")
         parser = CifParser(filepath, occupancy_tolerance=2)
         struct = parser.parse_structures()[0]
         assert struct[0].species["Al3+"] == approx(0.778)
 
+    def test_not_check_occu(self):
+        # Test large occupancy with check_occu turned off
+        with open(f"{TEST_FILES_DIR}/cif/site_type_symbol_test.cif") as cif_file:
+            cif_str = cif_file.read()
+        cif_str = cif_str.replace("Te    Te 1.0000", "Te_label    Te 10.0", 1)
+
+        structs = CifParser.from_str(cif_str).parse_structures(check_occu=False)
+
+        assert len(structs) > 0
+        assert set(structs[0].labels) == {"Te_label", "Ge"}
+
     def test_one_line_symm(self):
         cif_file = f"{TEST_FILES_DIR}/cif/OneLineSymmP1.cif"
         parser = CifParser(cif_file)