diff --git a/pymatgen/analysis/magnetism/jahnteller.py b/pymatgen/analysis/magnetism/jahnteller.py index f5decc005f6..a4b8513b42f 100644 --- a/pymatgen/analysis/magnetism/jahnteller.py +++ b/pymatgen/analysis/magnetism/jahnteller.py @@ -345,10 +345,10 @@ def _get_number_of_d_electrons(species: Species) -> float: # TODO: replace with more generic Hund's rule algorithm? # taken from get_crystal_field_spin - elec = species.full_electronic_structure + elec = species.element.full_electronic_structure if len(elec) < 4 or elec[-1][1] != "s" or elec[-2][1] != "d": raise AttributeError(f"Invalid element {species.symbol} for crystal field calculation.") - n_electrons = int(elec[-1][2] + elec[-2][2] - species.oxi_state) + n_electrons = int(elec[-1][2] + elec[-2][2] - species.oxi_state) # type: ignore if n_electrons < 0 or n_electrons > 10: raise AttributeError(f"Invalid oxidation state {species.oxi_state} for element {species.symbol}") diff --git a/pymatgen/core/periodic_table.py b/pymatgen/core/periodic_table.py index 054a0005c3c..a70aa046fe5 100644 --- a/pymatgen/core/periodic_table.py +++ b/pymatgen/core/periodic_table.py @@ -1075,6 +1075,23 @@ def spin(self) -> float | None: """Spin of Species.""" return self._spin + @property + def full_electronic_structure(self) -> list[tuple[int, str, int]]: + """Full electronic structure as tuple. Not implemented for Species as of now.""" + raise NotImplementedError + + @property + def electronic_structure(self) -> list[tuple[int, str, int]]: + """Electronic structure as tuple. Not implemented for Species as of now.""" + raise NotImplementedError + + @property + def valence(self) -> tuple[int | np.nan, int]: + """Valence subshell angular moment (L) and number of valence e- (v_e), + obtained from full electron config. Not implemented for Species as of now. + """ + raise NotImplementedError + @property def ionic_radius(self) -> float | None: """Ionic radius of specie. Returns None if data is not present.""" @@ -1224,11 +1241,11 @@ def get_crystal_field_spin( if coordination not in ("oct", "tet") or spin_config not in ("high", "low"): raise ValueError("Invalid coordination or spin config") - elec = self.full_electronic_structure + elec = self.element.full_electronic_structure if len(elec) < 4 or elec[-1][1] != "s" or elec[-2][1] != "d": raise AttributeError(f"Invalid element {self.symbol} for crystal field calculation") - n_electrons = elec[-1][2] + elec[-2][2] - self.oxi_state + n_electrons = elec[-1][2] + elec[-2][2] - self.oxi_state # type: ignore if n_electrons < 0 or n_electrons > 10: raise AttributeError(f"Invalid oxidation state {self.oxi_state} for element {self.symbol}") diff --git a/pymatgen/core/surface.py b/pymatgen/core/surface.py index 1d5c08652d3..6f1f293e481 100644 --- a/pymatgen/core/surface.py +++ b/pymatgen/core/surface.py @@ -946,7 +946,7 @@ def __init__( the c direction is parallel to the third lattice vector """ - def reduce_vector(vector: tuple[int, int, int]) -> tuple[int, int, int]: + def reduce_vector(vector: MillerIndex) -> MillerIndex: """Helper function to reduce vectors.""" divisor = abs(reduce(gcd, vector)) # type: ignore[arg-type] return cast(tuple[int, int, int], tuple(int(idx / divisor) for idx in vector)) @@ -1201,6 +1201,7 @@ def get_slabs( max_broken_bonds: int = 0, symmetrize: bool = False, repair: bool = False, + ztol: float = 0, ) -> list[Slab]: """Generate slabs with shift values calculated from the internal calculate_possible_shifts method. If the user decide to avoid breaking @@ -1222,6 +1223,8 @@ def get_slabs( repair (bool): Whether to repair terminations with broken bonds (True) or just omit them (False). Default to False as repairing terminations can lead to many more possible slabs. + ztol (float): Fractional tolerance for determine overlapping z-ranges, + smaller ztol might result in more possible Slabs. Returns: list[Slab]: All possible Slabs of a particular surface, @@ -1282,7 +1285,10 @@ def gen_possible_shifts(ftol: float) -> list[float]: return sorted(shifts) - def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]) -> list[tuple[float, float]]: + def get_z_ranges( + bonds: dict[tuple[Species | Element, Species | Element], float], + ztol: float, + ) -> list[tuple[float, float]]: """Collect occupied z ranges where each z_range is a (lower_z, upper_z) tuple. This method examines all sites in the oriented unit cell (OUC) @@ -1292,7 +1298,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float] Args: bonds (dict): A {(species1, species2): max_bond_dist} dict. - tol (float): Fractional tolerance for determine overlapping positions. + ztol (float): Fractional tolerance for determine overlapping z-ranges. """ # Sanitize species in dict keys bonds = {(get_el_sp(s1), get_el_sp(s2)): dist for (s1, s2), dist in bonds.items()} @@ -1315,15 +1321,13 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float] z_ranges.extend([(0, z_range[1]), (z_range[0] + 1, 1)]) # Neglect overlapping positions - elif z_range[0] != z_range[1]: - # TODO (@DanielYang59): use the following for equality check - # elif not isclose(z_range[0], z_range[1], abs_tol=tol): + elif not isclose(z_range[0], z_range[1], abs_tol=ztol): z_ranges.append(z_range) return z_ranges # Get occupied z_ranges - z_ranges = [] if bonds is None else get_z_ranges(bonds) + z_ranges = [] if bonds is None else get_z_ranges(bonds, ztol) slabs = [] for shift in gen_possible_shifts(ftol=ftol): @@ -1350,7 +1354,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float] # Filter out surfaces that might be the same matcher = StructureMatcher(ltol=tol, stol=tol, primitive_cell=False, scale=False) - final_slabs = [] + final_slabs: list[Slab] = [] for group in matcher.group_structures(slabs): # For each unique slab, symmetrize the # surfaces by removing sites from the bottom @@ -1365,7 +1369,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float] matcher_sym = StructureMatcher(ltol=tol, stol=tol, primitive_cell=False, scale=False) final_slabs = [group[0] for group in matcher_sym.group_structures(final_slabs)] - return sorted(final_slabs, key=lambda slab: slab.energy) # type: ignore[return-value, arg-type] + return cast(list[Slab], sorted(final_slabs, key=lambda slab: slab.energy)) def repair_broken_bonds( self, @@ -2127,7 +2131,7 @@ def math_lcm(a: int, b: int) -> int: if len([i for i in transf_hkl if i < 0]) > 1: transf_hkl *= -1 - return tuple(transf_hkl) # type: ignore[return-value] + return tuple(transf_hkl) def miller_index_from_sites( diff --git a/pymatgen/io/cif.py b/pymatgen/io/cif.py index c94c114d6d5..c3b6083d83f 100644 --- a/pymatgen/io/cif.py +++ b/pymatgen/io/cif.py @@ -323,9 +323,9 @@ def __init__( """ Args: filename (PathLike): CIF file, gzipped or bzipped CIF files are fine too. - occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the - occupancies will be scaled down to 1. - site_tolerance (float): This tolerance is used to determine if two sites are in the same position, + occupancy_tolerance (float): If total occupancy of a site is between + 1 and occupancy_tolerance, it will be scaled down to 1. + site_tolerance (float): This tolerance is used to determine if two sites are at the same position, in which case they will be combined to a single disordered site. Defaults to 1e-4. frac_tolerance (float): This tolerance is used to determine is a coordinate should be rounded to an ideal value. e.g. 0.6667 is rounded to 2/3. This is desired if symmetry operations are going to be applied. @@ -1027,11 +1027,11 @@ def get_matching_coord( # Get occupancy try: - occu = str2float(data["_atom_site_occupancy"][idx]) + occu: float = str2float(data["_atom_site_occupancy"][idx]) except (KeyError, ValueError): occu = 1 - # If check_occu is True or the occupancy is greater than 0, create comp_d + # If don't check_occu or the occupancy is greater than 0, create comp_dict if not check_occu or occu > 0: # Create site coordinate coord: Vector3D = ( @@ -1073,7 +1073,7 @@ def get_matching_coord( if any(occu > 1 for occu in _sum_occupancies): msg = ( - f"Some occupancies ({_sum_occupancies}) sum to > 1! If they are within " + f"Some occupancies ({filter(lambda x: x<=1, _sum_occupancies)}) sum to > 1! If they are within " "the occupancy_tolerance, they will be rescaled. " f"The current occupancy_tolerance is set to: {self._occupancy_tolerance}" ) @@ -1149,7 +1149,10 @@ def get_matching_coord( all_species_noedit = all_species.copy() # save copy before scaling in case of check_occu=False, used below for idx, species in enumerate(all_species): total_occu = sum(species.values()) - if 1 < total_occu <= self._occupancy_tolerance: + if check_occu and total_occu > self._occupancy_tolerance: + raise ValueError(f"Occupancy {total_occu} exceeded tolerance.") + + if total_occu > 1: all_species[idx] = species / total_occu if all_species and len(all_species) == len(all_coords) and len(all_species) == len(all_magmoms): @@ -1198,6 +1201,7 @@ def get_matching_coord( all_coords[idx], lattice, properties=site_properties, + label=all_labels[idx], skip_checks=True, ) @@ -1278,8 +1282,6 @@ def parse_structures( "in the CIF file as is. If you want the primitive cell, please set primitive=True explicitly.", UserWarning, ) - if not check_occu: # added in https://github.com/materialsproject/pymatgen/pull/2836 - warnings.warn("Structures with unphysical site occupancies are not compatible with many pymatgen features.") if primitive and symmetrized: raise ValueError( diff --git a/pymatgen/io/lammps/data.py b/pymatgen/io/lammps/data.py index 3f7e3b88240..0119341be15 100644 --- a/pymatgen/io/lammps/data.py +++ b/pymatgen/io/lammps/data.py @@ -664,17 +664,17 @@ def from_file(cls, filename: str, atom_style: str = "full", sort_id: bool = Fals def parse_section(sec_lines) -> tuple[str, pd.DataFrame]: title_info = sec_lines[0].split("#", 1) kw = title_info[0].strip() - sio = StringIO("".join(sec_lines[2:])) # skip the 2nd line + str_io = StringIO("".join(sec_lines[2:])) # skip the 2nd line if kw.endswith("Coeffs") and not kw.startswith("PairIJ"): df_list = [ - pd.read_csv(StringIO(line), header=None, comment="#", delim_whitespace=True) + pd.read_csv(StringIO(line), header=None, comment="#", sep=r"\s+") for line in sec_lines[2:] if line.strip() ] df = pd.concat(df_list, ignore_index=True) names = ["id"] + [f"coeff{i}" for i in range(1, df.shape[1])] else: - df = pd.read_csv(sio, header=None, comment="#", delim_whitespace=True) + df = pd.read_csv(str_io, header=None, comment="#", sep=r"\s+") if kw == "PairIJ Coeffs": names = ["id1", "id2"] + [f"coeff{i}" for i in range(1, df.shape[1] - 1)] df.index.name = None @@ -1381,12 +1381,12 @@ def parse_xyz(cls, filename: str | Path) -> pd.DataFrame: with zopen(filename, mode="rt") as file: lines = file.readlines() - sio = StringIO("".join(lines[2:])) # skip the 2nd line + str_io = StringIO("".join(lines[2:])) # skip the 2nd line df = pd.read_csv( - sio, + str_io, header=None, comment="#", - delim_whitespace=True, + sep=r"\s+", names=["atom", "x", "y", "z"], ) df.index += 1 diff --git a/pymatgen/io/lammps/outputs.py b/pymatgen/io/lammps/outputs.py index 30757f6614d..033a7d58b4a 100644 --- a/pymatgen/io/lammps/outputs.py +++ b/pymatgen/io/lammps/outputs.py @@ -69,7 +69,7 @@ def from_str(cls, string: str) -> Self: bounds -= np.array([[min(x), max(x)], [min(y), max(y)], [0, 0]]) box = LammpsBox(bounds, tilt) data_head = lines[8].replace("ITEM: ATOMS", "").split() - data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, delim_whitespace=True) + data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, sep=r"\s+") return cls(time_step, n_atoms, box, data) @classmethod @@ -180,7 +180,7 @@ def _parse_thermo(lines: list[str]) -> pd.DataFrame: df = df[columns] # one line thermo data else: - df = pd.read_csv(StringIO("".join(lines)), delim_whitespace=True) + df = pd.read_csv(StringIO("".join(lines)), sep=r"\s+") return df runs = [] diff --git a/pymatgen/io/xyz.py b/pymatgen/io/xyz.py index aefaa24f00a..595be4d7b14 100644 --- a/pymatgen/io/xyz.py +++ b/pymatgen/io/xyz.py @@ -118,9 +118,9 @@ def as_dataframe(self): pandas.DataFrame """ lines = str(self) - sio = StringIO(lines) + str_io = StringIO(lines) df_xyz = pd.read_csv( - sio, header=None, skiprows=(0, 1), comment="#", delim_whitespace=True, names=("atom", "x", "y", "z") + str_io, header=None, skiprows=(0, 1), comment="#", sep=r"\s+", names=("atom", "x", "y", "z") ) df_xyz.index += 1 return df_xyz diff --git a/pymatgen/util/provenance.py b/pymatgen/util/provenance.py index f1763eeaa0b..4402653b9b5 100644 --- a/pymatgen/util/provenance.py +++ b/pymatgen/util/provenance.py @@ -46,10 +46,10 @@ def is_valid_bibtex(reference: str) -> bool: """ # str is necessary since pybtex seems to have an issue with unicode. The # filter expression removes all non-ASCII characters. - sio = StringIO(reference.encode("ascii", "ignore").decode("ascii")) + str_io = StringIO(reference.encode("ascii", "ignore").decode("ascii")) parser = bibtex.Parser() errors.set_strict_mode(enable=False) - bib_data = parser.parse_stream(sio) + bib_data = parser.parse_stream(str_io) return len(bib_data.entries) > 0 diff --git a/requirements.txt b/requirements.txt index 6f58f99eda1..787b6b1c803 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy==1.26.0 sympy==1.12 -requests==2.31.0 -monty>=2024.5.24 +requests==2.32.0 +monty==2024.5.24 ruamel.yaml==0.18.6 scipy==1.11.3 tabulate==0.9.0 diff --git a/tests/core/test_periodic_table.py b/tests/core/test_periodic_table.py index 74aa0f8061e..1446665360e 100644 --- a/tests/core/test_periodic_table.py +++ b/tests/core/test_periodic_table.py @@ -591,6 +591,14 @@ def test_sort(self): ) assert sp.spin == 5 + def test_not_implemented(self): + with pytest.raises(NotImplementedError): + _ = Species("Fe", 2).full_electronic_structure + with pytest.raises(NotImplementedError): + _ = Species("Fe", 2).electronic_structure + with pytest.raises(NotImplementedError): + _ = Species("Fe", 2).valence + def test_get_el_sp(): assert get_el_sp("Fe2+") == Species("Fe", 2) diff --git a/tests/io/test_cif.py b/tests/io/test_cif.py index 5f0a897f0bf..87627ee9a7c 100644 --- a/tests/io/test_cif.py +++ b/tests/io/test_cif.py @@ -731,17 +731,28 @@ def test_empty(self): cb2 = CifBlock.from_str(str(cb)) assert cb == cb2 - def test_bad_cif(self): + def test_bad_occu(self): filepath = f"{TEST_FILES_DIR}/cif/bad_occu.cif" parser = CifParser(filepath) with pytest.raises( - ValueError, match="No structure parsed for section 1 in CIF.\nSpecies occupancies sum to more than 1!" + ValueError, match="No structure parsed for section 1 in CIF.\nOccupancy 1.556 exceeded tolerance." ): parser.parse_structures(on_error="raise") parser = CifParser(filepath, occupancy_tolerance=2) struct = parser.parse_structures()[0] assert struct[0].species["Al3+"] == approx(0.778) + def test_not_check_occu(self): + # Test large occupancy with check_occu turned off + with open(f"{TEST_FILES_DIR}/cif/site_type_symbol_test.cif") as cif_file: + cif_str = cif_file.read() + cif_str = cif_str.replace("Te Te 1.0000", "Te_label Te 10.0", 1) + + structs = CifParser.from_str(cif_str).parse_structures(check_occu=False) + + assert len(structs) > 0 + assert set(structs[0].labels) == {"Te_label", "Ge"} + def test_one_line_symm(self): cif_file = f"{TEST_FILES_DIR}/cif/OneLineSymmP1.cif" parser = CifParser(cif_file)