-
Notifications
You must be signed in to change notification settings - Fork 876
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
216 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
""" | ||
This module provides conversion between structure entries following the | ||
OPTIMADE (https://optimade.org) standard and pymatgen Structure objects. | ||
The code is adapted from the `optimade.adapters.structures.pymatgen` module in | ||
optimade-python-tools (https://github.com/Materials-Consortia/optimade-python-tools), | ||
and aims to work without requiring the explicit installation of the `optimade-python-tools`. | ||
""" | ||
|
||
from __future__ import annotations | ||
|
||
import itertools | ||
import json | ||
import math | ||
import re | ||
from functools import reduce | ||
from typing import TYPE_CHECKING | ||
|
||
from pymatgen.core.structure import Lattice, Structure | ||
|
||
if TYPE_CHECKING: | ||
from collections.abc import Generator | ||
from typing import Any | ||
|
||
|
||
__author__ = "Matthew Evans" | ||
|
||
|
||
def _pymatgen_species( | ||
nsites: int, | ||
species_at_sites: list[str], | ||
) -> list[dict[str, float]]: | ||
"""Create list of {"symbol": "concentration"} per site for constructing pymatgen Species objects. | ||
Removes vacancies, if they are present. | ||
This function is adapted from the `optimade.adapters.structures.pymatgen` module in `optimade-python-tools`, | ||
with some of the generality removed (in terms of partial occupancy). | ||
""" | ||
species = [{"name": _, "concentration": [1.0], "chemical_symbols": [_]} for _ in set(species_at_sites)] | ||
species_dict = {_["name"]: _ for _ in species} | ||
|
||
pymatgen_species = [] | ||
for site_number in range(nsites): | ||
species_name = species_at_sites[site_number] | ||
current_species = species_dict[species_name] | ||
|
||
chemical_symbols = [] | ||
concentration = [] | ||
for index, symbol in enumerate(current_species["chemical_symbols"]): | ||
if symbol == "vacancy": | ||
# Skip. This is how pymatgen handles vacancies; | ||
# to not include them, while keeping the concentration in a site less than 1. | ||
continue | ||
chemical_symbols.append(symbol) | ||
concentration.append(current_species["concentration"][index]) | ||
|
||
pymatgen_species.append(dict(zip(chemical_symbols, concentration))) | ||
|
||
return pymatgen_species | ||
|
||
|
||
def _optimade_anonymous_element_generator() -> Generator[str, None, None]: | ||
"""Generator that yields the next symbol in the A, B, Aa, ... Az OPTIMADE anonymous | ||
element naming scheme. | ||
""" | ||
from string import ascii_lowercase | ||
|
||
for size in itertools.count(1): | ||
for tuple_strings in itertools.product(ascii_lowercase, repeat=size): | ||
list_strings = list(tuple_strings) | ||
list_strings[0] = list_strings[0].upper() | ||
yield "".join(list_strings) | ||
|
||
|
||
def _optimade_reduce_or_anonymize_formula(formula: str, alphabetize: bool = True, anonymize: bool = False) -> str: | ||
"""Takes an input formula, reduces it and either alphabetizes or anonymizes it | ||
following the OPTIMADE standard. | ||
""" | ||
|
||
numbers: list[int] = [int(n.strip() or 1) for n in re.split(r"[A-Z][a-z]*", formula)[1:]] | ||
# Need to remove leading 1 from split and convert to ints | ||
|
||
species: list[str] = re.findall("[A-Z][a-z]*", formula) | ||
|
||
gcd = reduce(math.gcd, numbers) | ||
|
||
if not len(species) == len(numbers): | ||
raise ValueError(f"Something is wrong with the input formula: {formula}") | ||
|
||
numbers = [n // gcd for n in numbers] | ||
|
||
if anonymize: | ||
numbers = sorted(numbers, reverse=True) | ||
species = [s for _, s in zip(numbers, _optimade_anonymous_element_generator())] | ||
|
||
elif alphabetize: | ||
species, numbers = zip(*sorted(zip(species, numbers))) # type: ignore[assignment] | ||
|
||
return "".join(f"{s}{n if n != 1 else ''}" for n, s in zip(numbers, species)) | ||
|
||
|
||
class OptimadeStructureAdapter: | ||
"""Adapter serves as a bridge between OPTIMADE structures and pymatgen objects.""" | ||
|
||
@staticmethod | ||
def get_optimade_structure(structure: Structure, **kwargs) -> dict[str, str | dict[str, Any]]: | ||
"""Get a dictionary in the OPTIMADE Structure format from a pymatgen structure or molecule. | ||
Args: | ||
structure (Structure): pymatgen Structure | ||
**kwargs: passed to the ASE Atoms constructor | ||
Returns: | ||
A dictionary serialization of the structure in the OPTIMADE format. | ||
""" | ||
if not structure.is_ordered: | ||
raise ValueError("OPTIMADE Adapter currently only supports ordered structures") | ||
|
||
attributes: dict[str, Any] = {} | ||
attributes["cartesian_site_positions"] = structure.lattice.get_cartesian_coords(structure.frac_coords).tolist() | ||
attributes["lattice_vectors"] = structure.lattice.matrix.tolist() | ||
attributes["species_at_sites"] = [_.symbol for _ in structure.species] | ||
attributes["species"] = [ | ||
{"name": _.symbol, "chemical_symbols": [_.symbol], "concentration": [1]} | ||
for _ in set(structure.composition.elements) | ||
] | ||
attributes["dimension_types"] = [int(_) for _ in structure.lattice.pbc] | ||
attributes["nperiodic_dimensions"] = sum(attributes["dimension_types"]) | ||
attributes["nelements"] = len(structure.composition.elements) | ||
attributes["chemical_formula_anonymous"] = _optimade_reduce_or_anonymize_formula( | ||
structure.composition.formula, anonymize=True | ||
) | ||
attributes["elements"] = sorted([_.symbol for _ in structure.composition.elements]) | ||
attributes["chemical_formula_reduced"] = _optimade_reduce_or_anonymize_formula( | ||
structure.composition.formula, anonymize=False | ||
) | ||
attributes["chemical_formula_descriptive"] = structure.composition.formula | ||
attributes["elements_ratios"] = [structure.composition.get_atomic_fraction(e) for e in attributes["elements"]] | ||
attributes["nsites"] = len(attributes["species_at_sites"]) | ||
|
||
attributes["last_modified"] = None | ||
attributes["immutable_id"] = None | ||
attributes["structure_features"] = [] | ||
|
||
return {"attributes": attributes} | ||
|
||
@staticmethod | ||
def get_structure(resource: dict, embed_as_properties: bool = True) -> Structure: | ||
"""Get pymatgen structure from an OPTIMADE structure resource. | ||
Args: | ||
resource: OPTIMADE structure resource as a dictionary, JSON string, or the | ||
corresponding attributes dictionary (i.e., `resource["attributes"]`). | ||
embed_as_properties: Whether to embed the entire OPTIMADE resource dictionary | ||
(which may include additional custom fields) as properties in the pymatgen Structure. | ||
Returns: | ||
Structure: Equivalent pymatgen Structure | ||
""" | ||
if isinstance(resource, str): | ||
try: | ||
resource = json.loads(resource) | ||
except json.JSONDecodeError as exc: | ||
raise ValueError(f"Could not decode the input OPTIMADE resource as JSON: {exc}") | ||
|
||
if "attributes" not in resource: | ||
resource = {"attributes": resource} | ||
|
||
id = resource.get("id", None) | ||
attributes = resource["attributes"] | ||
properties: dict[str, Any] = {"optimade_id": id} | ||
if embed_as_properties: | ||
properties["optimade_attributes"] = attributes | ||
|
||
return Structure( | ||
lattice=Lattice( | ||
attributes["lattice_vectors"], | ||
[bool(d) for d in attributes["dimension_types"]], # type: ignore[arg-type] | ||
), | ||
species=_pymatgen_species( | ||
nsites=attributes["nsites"], | ||
species_at_sites=attributes["species_at_sites"], | ||
), | ||
coords=attributes["cartesian_site_positions"], | ||
coords_are_cartesian=True, | ||
properties=properties, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
from __future__ import annotations | ||
|
||
import numpy as np | ||
|
||
from pymatgen.core import Structure | ||
from pymatgen.io.optimade import OptimadeStructureAdapter | ||
from pymatgen.util.testing import TEST_FILES_DIR, VASP_IN_DIR | ||
|
||
STRUCTURE = Structure.from_file(f"{VASP_IN_DIR}/POSCAR") | ||
XYZ_STRUCTURE = f"{TEST_FILES_DIR}/io/xyz/acetylene.xyz" | ||
|
||
|
||
def test_get_optimade_from_structure(): | ||
optimade_structure = OptimadeStructureAdapter.get_optimade_structure(STRUCTURE) | ||
|
||
assert optimade_structure["attributes"]["nsites"] == len(STRUCTURE) | ||
assert optimade_structure["attributes"]["elements"] == ["Fe", "O", "P"] | ||
assert optimade_structure["attributes"]["nelements"] == 3 | ||
assert optimade_structure["attributes"]["chemical_formula_reduced"] == "FeO4P" | ||
assert optimade_structure["attributes"]["species_at_sites"] == 4 * ["Fe"] + 4 * ["P"] + 16 * ["O"] | ||
np.testing.assert_array_almost_equal( | ||
np.abs(optimade_structure["attributes"]["lattice_vectors"]), np.abs(STRUCTURE.lattice.matrix) | ||
) |