Skip to content

Commit

Permalink
Merge branch 'master' into breaking-ptable-rare-earth
Browse files Browse the repository at this point in the history
Signed-off-by: Shyue Ping Ong <shyuep@users.noreply.github.com>
  • Loading branch information
shyuep authored May 30, 2024
2 parents 83ba373 + a772ddb commit a9a6c59
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 39 deletions.
4 changes: 2 additions & 2 deletions pymatgen/analysis/magnetism/jahnteller.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,10 +345,10 @@ def _get_number_of_d_electrons(species: Species) -> float:
# TODO: replace with more generic Hund's rule algorithm?

# taken from get_crystal_field_spin
elec = species.full_electronic_structure
elec = species.element.full_electronic_structure
if len(elec) < 4 or elec[-1][1] != "s" or elec[-2][1] != "d":
raise AttributeError(f"Invalid element {species.symbol} for crystal field calculation.")
n_electrons = int(elec[-1][2] + elec[-2][2] - species.oxi_state)
n_electrons = int(elec[-1][2] + elec[-2][2] - species.oxi_state) # type: ignore
if n_electrons < 0 or n_electrons > 10:
raise AttributeError(f"Invalid oxidation state {species.oxi_state} for element {species.symbol}")

Expand Down
21 changes: 19 additions & 2 deletions pymatgen/core/periodic_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,23 @@ def spin(self) -> float | None:
"""Spin of Species."""
return self._spin

@property
def full_electronic_structure(self) -> list[tuple[int, str, int]]:
"""Full electronic structure as tuple. Not implemented for Species as of now."""
raise NotImplementedError

@property
def electronic_structure(self) -> list[tuple[int, str, int]]:
"""Electronic structure as tuple. Not implemented for Species as of now."""
raise NotImplementedError

@property
def valence(self) -> tuple[int | np.nan, int]:
"""Valence subshell angular moment (L) and number of valence e- (v_e),
obtained from full electron config. Not implemented for Species as of now.
"""
raise NotImplementedError

@property
def ionic_radius(self) -> float | None:
"""Ionic radius of specie. Returns None if data is not present."""
Expand Down Expand Up @@ -1224,11 +1241,11 @@ def get_crystal_field_spin(
if coordination not in ("oct", "tet") or spin_config not in ("high", "low"):
raise ValueError("Invalid coordination or spin config")

elec = self.full_electronic_structure
elec = self.element.full_electronic_structure
if len(elec) < 4 or elec[-1][1] != "s" or elec[-2][1] != "d":
raise AttributeError(f"Invalid element {self.symbol} for crystal field calculation")

n_electrons = elec[-1][2] + elec[-2][2] - self.oxi_state
n_electrons = elec[-1][2] + elec[-2][2] - self.oxi_state # type: ignore
if n_electrons < 0 or n_electrons > 10:
raise AttributeError(f"Invalid oxidation state {self.oxi_state} for element {self.symbol}")

Expand Down
24 changes: 14 additions & 10 deletions pymatgen/core/surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,7 @@ def __init__(
the c direction is parallel to the third lattice vector
"""

def reduce_vector(vector: tuple[int, int, int]) -> tuple[int, int, int]:
def reduce_vector(vector: MillerIndex) -> MillerIndex:
"""Helper function to reduce vectors."""
divisor = abs(reduce(gcd, vector)) # type: ignore[arg-type]
return cast(tuple[int, int, int], tuple(int(idx / divisor) for idx in vector))
Expand Down Expand Up @@ -1201,6 +1201,7 @@ def get_slabs(
max_broken_bonds: int = 0,
symmetrize: bool = False,
repair: bool = False,
ztol: float = 0,
) -> list[Slab]:
"""Generate slabs with shift values calculated from the internal
calculate_possible_shifts method. If the user decide to avoid breaking
Expand All @@ -1222,6 +1223,8 @@ def get_slabs(
repair (bool): Whether to repair terminations with broken bonds (True)
or just omit them (False). Default to False as repairing terminations
can lead to many more possible slabs.
ztol (float): Fractional tolerance for determine overlapping z-ranges,
smaller ztol might result in more possible Slabs.
Returns:
list[Slab]: All possible Slabs of a particular surface,
Expand Down Expand Up @@ -1282,7 +1285,10 @@ def gen_possible_shifts(ftol: float) -> list[float]:

return sorted(shifts)

def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]) -> list[tuple[float, float]]:
def get_z_ranges(
bonds: dict[tuple[Species | Element, Species | Element], float],
ztol: float,
) -> list[tuple[float, float]]:
"""Collect occupied z ranges where each z_range is a (lower_z, upper_z) tuple.
This method examines all sites in the oriented unit cell (OUC)
Expand All @@ -1292,7 +1298,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]
Args:
bonds (dict): A {(species1, species2): max_bond_dist} dict.
tol (float): Fractional tolerance for determine overlapping positions.
ztol (float): Fractional tolerance for determine overlapping z-ranges.
"""
# Sanitize species in dict keys
bonds = {(get_el_sp(s1), get_el_sp(s2)): dist for (s1, s2), dist in bonds.items()}
Expand All @@ -1315,15 +1321,13 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]
z_ranges.extend([(0, z_range[1]), (z_range[0] + 1, 1)])

# Neglect overlapping positions
elif z_range[0] != z_range[1]:
# TODO (@DanielYang59): use the following for equality check
# elif not isclose(z_range[0], z_range[1], abs_tol=tol):
elif not isclose(z_range[0], z_range[1], abs_tol=ztol):
z_ranges.append(z_range)

return z_ranges

# Get occupied z_ranges
z_ranges = [] if bonds is None else get_z_ranges(bonds)
z_ranges = [] if bonds is None else get_z_ranges(bonds, ztol)

slabs = []
for shift in gen_possible_shifts(ftol=ftol):
Expand All @@ -1350,7 +1354,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]
# Filter out surfaces that might be the same
matcher = StructureMatcher(ltol=tol, stol=tol, primitive_cell=False, scale=False)

final_slabs = []
final_slabs: list[Slab] = []
for group in matcher.group_structures(slabs):
# For each unique slab, symmetrize the
# surfaces by removing sites from the bottom
Expand All @@ -1365,7 +1369,7 @@ def get_z_ranges(bonds: dict[tuple[Species | Element, Species | Element], float]
matcher_sym = StructureMatcher(ltol=tol, stol=tol, primitive_cell=False, scale=False)
final_slabs = [group[0] for group in matcher_sym.group_structures(final_slabs)]

return sorted(final_slabs, key=lambda slab: slab.energy) # type: ignore[return-value, arg-type]
return cast(list[Slab], sorted(final_slabs, key=lambda slab: slab.energy))

def repair_broken_bonds(
self,
Expand Down Expand Up @@ -2127,7 +2131,7 @@ def math_lcm(a: int, b: int) -> int:
if len([i for i in transf_hkl if i < 0]) > 1:
transf_hkl *= -1

return tuple(transf_hkl) # type: ignore[return-value]
return tuple(transf_hkl)


def miller_index_from_sites(
Expand Down
20 changes: 11 additions & 9 deletions pymatgen/io/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,9 @@ def __init__(
"""
Args:
filename (PathLike): CIF file, gzipped or bzipped CIF files are fine too.
occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the
occupancies will be scaled down to 1.
site_tolerance (float): This tolerance is used to determine if two sites are in the same position,
occupancy_tolerance (float): If total occupancy of a site is between
1 and occupancy_tolerance, it will be scaled down to 1.
site_tolerance (float): This tolerance is used to determine if two sites are at the same position,
in which case they will be combined to a single disordered site. Defaults to 1e-4.
frac_tolerance (float): This tolerance is used to determine is a coordinate should be rounded to an ideal
value. e.g. 0.6667 is rounded to 2/3. This is desired if symmetry operations are going to be applied.
Expand Down Expand Up @@ -1027,11 +1027,11 @@ def get_matching_coord(

# Get occupancy
try:
occu = str2float(data["_atom_site_occupancy"][idx])
occu: float = str2float(data["_atom_site_occupancy"][idx])
except (KeyError, ValueError):
occu = 1

# If check_occu is True or the occupancy is greater than 0, create comp_d
# If don't check_occu or the occupancy is greater than 0, create comp_dict
if not check_occu or occu > 0:
# Create site coordinate
coord: Vector3D = (
Expand Down Expand Up @@ -1073,7 +1073,7 @@ def get_matching_coord(

if any(occu > 1 for occu in _sum_occupancies):
msg = (
f"Some occupancies ({_sum_occupancies}) sum to > 1! If they are within "
f"Some occupancies ({filter(lambda x: x<=1, _sum_occupancies)}) sum to > 1! If they are within "
"the occupancy_tolerance, they will be rescaled. "
f"The current occupancy_tolerance is set to: {self._occupancy_tolerance}"
)
Expand Down Expand Up @@ -1149,7 +1149,10 @@ def get_matching_coord(
all_species_noedit = all_species.copy() # save copy before scaling in case of check_occu=False, used below
for idx, species in enumerate(all_species):
total_occu = sum(species.values())
if 1 < total_occu <= self._occupancy_tolerance:
if check_occu and total_occu > self._occupancy_tolerance:
raise ValueError(f"Occupancy {total_occu} exceeded tolerance.")

if total_occu > 1:
all_species[idx] = species / total_occu

if all_species and len(all_species) == len(all_coords) and len(all_species) == len(all_magmoms):
Expand Down Expand Up @@ -1198,6 +1201,7 @@ def get_matching_coord(
all_coords[idx],
lattice,
properties=site_properties,
label=all_labels[idx],
skip_checks=True,
)

Expand Down Expand Up @@ -1278,8 +1282,6 @@ def parse_structures(
"in the CIF file as is. If you want the primitive cell, please set primitive=True explicitly.",
UserWarning,
)
if not check_occu: # added in https://github.com/materialsproject/pymatgen/pull/2836
warnings.warn("Structures with unphysical site occupancies are not compatible with many pymatgen features.")

if primitive and symmetrized:
raise ValueError(
Expand Down
12 changes: 6 additions & 6 deletions pymatgen/io/lammps/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,17 +664,17 @@ def from_file(cls, filename: str, atom_style: str = "full", sort_id: bool = Fals
def parse_section(sec_lines) -> tuple[str, pd.DataFrame]:
title_info = sec_lines[0].split("#", 1)
kw = title_info[0].strip()
sio = StringIO("".join(sec_lines[2:])) # skip the 2nd line
str_io = StringIO("".join(sec_lines[2:])) # skip the 2nd line
if kw.endswith("Coeffs") and not kw.startswith("PairIJ"):
df_list = [
pd.read_csv(StringIO(line), header=None, comment="#", delim_whitespace=True)
pd.read_csv(StringIO(line), header=None, comment="#", sep=r"\s+")
for line in sec_lines[2:]
if line.strip()
]
df = pd.concat(df_list, ignore_index=True)
names = ["id"] + [f"coeff{i}" for i in range(1, df.shape[1])]
else:
df = pd.read_csv(sio, header=None, comment="#", delim_whitespace=True)
df = pd.read_csv(str_io, header=None, comment="#", sep=r"\s+")
if kw == "PairIJ Coeffs":
names = ["id1", "id2"] + [f"coeff{i}" for i in range(1, df.shape[1] - 1)]
df.index.name = None
Expand Down Expand Up @@ -1381,12 +1381,12 @@ def parse_xyz(cls, filename: str | Path) -> pd.DataFrame:
with zopen(filename, mode="rt") as file:
lines = file.readlines()

sio = StringIO("".join(lines[2:])) # skip the 2nd line
str_io = StringIO("".join(lines[2:])) # skip the 2nd line
df = pd.read_csv(
sio,
str_io,
header=None,
comment="#",
delim_whitespace=True,
sep=r"\s+",
names=["atom", "x", "y", "z"],
)
df.index += 1
Expand Down
4 changes: 2 additions & 2 deletions pymatgen/io/lammps/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def from_str(cls, string: str) -> Self:
bounds -= np.array([[min(x), max(x)], [min(y), max(y)], [0, 0]])
box = LammpsBox(bounds, tilt)
data_head = lines[8].replace("ITEM: ATOMS", "").split()
data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, delim_whitespace=True)
data = pd.read_csv(StringIO("\n".join(lines[9:])), names=data_head, sep=r"\s+")
return cls(time_step, n_atoms, box, data)

@classmethod
Expand Down Expand Up @@ -180,7 +180,7 @@ def _parse_thermo(lines: list[str]) -> pd.DataFrame:
df = df[columns]
# one line thermo data
else:
df = pd.read_csv(StringIO("".join(lines)), delim_whitespace=True)
df = pd.read_csv(StringIO("".join(lines)), sep=r"\s+")
return df

runs = []
Expand Down
4 changes: 2 additions & 2 deletions pymatgen/io/xyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ def as_dataframe(self):
pandas.DataFrame
"""
lines = str(self)
sio = StringIO(lines)
str_io = StringIO(lines)
df_xyz = pd.read_csv(
sio, header=None, skiprows=(0, 1), comment="#", delim_whitespace=True, names=("atom", "x", "y", "z")
str_io, header=None, skiprows=(0, 1), comment="#", sep=r"\s+", names=("atom", "x", "y", "z")
)
df_xyz.index += 1
return df_xyz
Expand Down
4 changes: 2 additions & 2 deletions pymatgen/util/provenance.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ def is_valid_bibtex(reference: str) -> bool:
"""
# str is necessary since pybtex seems to have an issue with unicode. The
# filter expression removes all non-ASCII characters.
sio = StringIO(reference.encode("ascii", "ignore").decode("ascii"))
str_io = StringIO(reference.encode("ascii", "ignore").decode("ascii"))
parser = bibtex.Parser()
errors.set_strict_mode(enable=False)
bib_data = parser.parse_stream(sio)
bib_data = parser.parse_stream(str_io)
return len(bib_data.entries) > 0


Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
numpy==1.26.0
sympy==1.12
requests==2.31.0
monty>=2024.5.24
requests==2.32.0
monty==2024.5.24
ruamel.yaml==0.18.6
scipy==1.11.3
tabulate==0.9.0
Expand Down
8 changes: 8 additions & 0 deletions tests/core/test_periodic_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,14 @@ def test_sort(self):
)
assert sp.spin == 5

def test_not_implemented(self):
with pytest.raises(NotImplementedError):
_ = Species("Fe", 2).full_electronic_structure
with pytest.raises(NotImplementedError):
_ = Species("Fe", 2).electronic_structure
with pytest.raises(NotImplementedError):
_ = Species("Fe", 2).valence


def test_get_el_sp():
assert get_el_sp("Fe2+") == Species("Fe", 2)
Expand Down
15 changes: 13 additions & 2 deletions tests/io/test_cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,17 +731,28 @@ def test_empty(self):
cb2 = CifBlock.from_str(str(cb))
assert cb == cb2

def test_bad_cif(self):
def test_bad_occu(self):
filepath = f"{TEST_FILES_DIR}/cif/bad_occu.cif"
parser = CifParser(filepath)
with pytest.raises(
ValueError, match="No structure parsed for section 1 in CIF.\nSpecies occupancies sum to more than 1!"
ValueError, match="No structure parsed for section 1 in CIF.\nOccupancy 1.556 exceeded tolerance."
):
parser.parse_structures(on_error="raise")
parser = CifParser(filepath, occupancy_tolerance=2)
struct = parser.parse_structures()[0]
assert struct[0].species["Al3+"] == approx(0.778)

def test_not_check_occu(self):
# Test large occupancy with check_occu turned off
with open(f"{TEST_FILES_DIR}/cif/site_type_symbol_test.cif") as cif_file:
cif_str = cif_file.read()
cif_str = cif_str.replace("Te Te 1.0000", "Te_label Te 10.0", 1)

structs = CifParser.from_str(cif_str).parse_structures(check_occu=False)

assert len(structs) > 0
assert set(structs[0].labels) == {"Te_label", "Ge"}

def test_one_line_symm(self):
cif_file = f"{TEST_FILES_DIR}/cif/OneLineSymmP1.cif"
parser = CifParser(cif_file)
Expand Down

0 comments on commit a9a6c59

Please sign in to comment.