Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve unphysical (greater than 1) occupancy handling in CifParser and add missing site label if not check_occu #3819

Merged
merged 21 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions pymatgen/io/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ class CifParser:
def __init__(
self,
filename: str | StringIO,
occupancy_tolerance: float = 1.0,
occupancy_tolerance: float = 0.0,
DanielYang59 marked this conversation as resolved.
Show resolved Hide resolved
site_tolerance: float = 1e-4,
frac_tolerance: float = 1e-4,
check_cif: bool = True,
Expand All @@ -296,7 +296,7 @@ def __init__(
"""
Args:
filename (str): CIF filename, gzipped or bzipped CIF files are fine too.
occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the
occupancy_tolerance (float): If total occupancy of a site is between 1 and 1 + occupancy_tolerance, the
occupancies will be scaled down to 1.
site_tolerance (float): This tolerance is used to determine if two sites are sitting in the same position,
in which case they will be combined to a single disordered site. Defaults to 1e-4.
Expand Down Expand Up @@ -1082,8 +1082,10 @@ def get_matching_coord(coord):
all_species_noedit = all_species.copy() # save copy before scaling in case of check_occu=False, used below
for idx, species in enumerate(all_species):
total_occu = sum(species.values())
if 1 < total_occu <= self._occupancy_tolerance:
if 1 < total_occu <= 1 + self._occupancy_tolerance:
all_species[idx] = species / total_occu
elif total_occu > 1 + self._occupancy_tolerance:
raise ValueError(f"Occupancy {total_occu} exceeded tolerance.")

if all_species and len(all_species) == len(all_coords) and len(all_species) == len(all_magmoms):
site_properties = {}
Expand Down
6 changes: 3 additions & 3 deletions tests/io/test_cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,10 +735,10 @@ def test_bad_cif(self):
filepath = f"{TEST_FILES_DIR}/cif/bad_occu.cif"
parser = CifParser(filepath)
with pytest.raises(
ValueError, match="No structure parsed for section 1 in CIF.\nSpecies occupancies sum to more than 1!"
Copy link
Contributor Author

@DanielYang59 DanielYang59 May 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previous error message might be misleading, as this fails because the occupancy is greater than tolerance, not 1.

ValueError, match="No structure parsed for section 1 in CIF.\nOccupancy 1.556 exceeded tolerance."
):
parser.parse_structures(on_error="raise")
parser = CifParser(filepath, occupancy_tolerance=2)
parser = CifParser(filepath, occupancy_tolerance=1.0)
struct = parser.parse_structures()[0]
assert struct[0].species["Al3+"] == approx(0.778)

Expand Down Expand Up @@ -851,7 +851,7 @@ def test_no_check_occu(self):
# should fail without setting custom occupancy tolerance
CifParser.from_str(cif_str).parse_structures()

for tol in (1.5, 10):
for tol in (0.5, 9):
parser = CifParser.from_str(cif_str, occupancy_tolerance=tol)
structs = parser.parse_structures(primitive=False, check_occu=False)[0]
assert structs[0].species.as_dict()["Te"] == 1.5
Expand Down