From 39fbcf5eb28b3ac3b9cc7a004ae55ff1cb371e5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiziano=20M=C3=BCller?= Date: Mon, 11 May 2020 18:06:41 +0200 Subject: [PATCH] refactor structure loading --- aiida_cp2k/cli/data/structure.py | 81 ++------------------------ aiida_cp2k/cli/utils/structure.py | 94 +++++++++++++++++++++++++++++++ aiida_cp2k/cli/workflows/base.py | 24 ++------ 3 files changed, 105 insertions(+), 94 deletions(-) create mode 100644 aiida_cp2k/cli/utils/structure.py diff --git a/aiida_cp2k/cli/data/structure.py b/aiida_cp2k/cli/data/structure.py index 39b52b12..7b4e84d2 100644 --- a/aiida_cp2k/cli/data/structure.py +++ b/aiida_cp2k/cli/data/structure.py @@ -1,15 +1,13 @@ # -*- coding: utf-8 -*- """Command line utilities to create and inspect `StructureData` nodes from CP2K input files.""" -import re - import click -import numpy as np from aiida.cmdline.params import options from aiida.cmdline.utils import decorators, echo from . import cmd_data +from ..utils.structure import structure_from_cp2k_inp @cmd_data.group('structure') @@ -24,79 +22,10 @@ def cmd_structure(): def cmd_import(filename, dry_run): """Import a `StructureData` from a CP2K input file.""" - # pylint: disable=import-outside-toplevel,invalid-name,too-many-locals,too-many-statements,too-many-branches - - from cp2k_input_tools.parser import CP2KInputParser - from aiida.orm.nodes.data.structure import StructureData, Kind, Site, symop_fract_from_ortho - from ase.geometry.cell import cell_to_cellpar, cellpar_to_cell - - # the following was taken from aiida-quantumespresso - VALID_ELEMENTS_REGEX = re.compile( - r""" - ^( - H | He | - Li | Be | B | C | N | O | F | Ne | - Na | Mg | Al | Si | P | S | Cl | Ar | - K | Ca | Sc | Ti | V | Cr | Mn | Fe | Co | Ni | Cu | Zn | Ga | Ge | As | Se | Br | Kr | - Rb | Sr | Y | Zr | Nb | Mo | Tc | Ru | Rh | Pd | Ag | Cd | In | Sn | Sb | Te | I | Xe | - Cs | Ba | Hf | Ta | W | Re | Os | Ir | Pt | Au | Hg | Tl | Pb | Bi | Po | At | Rn | - Fr | Ra | Rf | Db | Sg | Bh | Hs | Mt | - La | Ce | Pr | Nd | Pm | Sm | Eu | Gd | Tb | Dy | Ho | Er | Tm | Yb | Lu | # Lanthanides - Ac | Th | Pa | U | Np | Pu | Am | Cm | Bk | Cf | Es | Fm | Md | No | Lr | # Actinides - ) - """, re.VERBOSE | re.IGNORECASE) - - parser = CP2KInputParser() - tree = parser.parse(filename) - force_eval_no = -1 - - for force_eval_no, force_eval in enumerate(tree['+force_eval']): - try: - cell = force_eval['+subsys']['+cell'] - kinds = force_eval['+subsys']['+kind'] - coord = force_eval['+subsys']['+coord'] - break # for now grab the first &COORD found - except KeyError: - continue - else: - echo.echo_critical('no STRUCTURE, CELL, or KIND found in the given input file') - - # CP2K can get its cell information in two ways: - # - A, B, C: cell vectors - # - ABC: scaling of cell vectors, ALPHA_BETA_GAMMA: angles between the cell vectors (optional) - - if 'a' in cell: - unit_cell = np.array([cell['a'], cell['b'], cell['c']]) # unit vectors given - cellpar = cell_to_cellpar(unit_cell) - elif 'abc' in cell: - cellpar = cell['abc'] + cell.get('alpha_beta_gamma', [90., 90., 90.]) - unit_cell = cellpar_to_cell(cellpar) - else: - echo.echo_critical('incomplete &CELL section') - - pbc = [c in cell.get('periodic', 'XYZ') for c in 'XYZ'] - - structure = StructureData(cell=unit_cell, pbc=pbc) - - if coord.get('scaled', False): - tmat = symop_fract_from_ortho(cellpar) - else: - tmat = np.eye(3) - - for kind in kinds: - name = kind['_'] - - try: - # prefer the ELEMENT keyword, fallback to extracting from name - element = kind.get('element', VALID_ELEMENTS_REGEX.search(name)[0]) - except TypeError: - echo.echo_critical('ELEMENT not set and unable to extract from {name}'.format(name=name)) - - structure.append_kind(Kind(name=name, symbols=element)) - - for name, position, _ in parser.coords(force_eval_no): - # positions can be scaled, apply transformation matrix - structure.append_site(Site(kind_name=name, position=tmat @ np.array(position))) + try: + structure = structure_from_cp2k_inp(filename) + except ValueError as exc: + echo.echo_critical(str(exc)) formula = structure.get_formula() diff --git a/aiida_cp2k/cli/utils/structure.py b/aiida_cp2k/cli/utils/structure.py new file mode 100644 index 00000000..b26a8cd5 --- /dev/null +++ b/aiida_cp2k/cli/utils/structure.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +"""Helper functions for building CLI functions dealing with structures""" + +import re + +import numpy as np + + +def structure_from_cp2k_inp(filename): + """Create an AiiDA StructureData from a structure inside a CP2K input file""" + # pylint: disable=import-outside-toplevel,invalid-name,too-many-locals,too-many-statements,too-many-branches + + from cp2k_input_tools.parser import CP2KInputParser + from aiida.orm.nodes.data.structure import StructureData, Kind, Site, symop_fract_from_ortho + from ase.geometry.cell import cell_to_cellpar, cellpar_to_cell + import ase.io + + # the following was taken from aiida-quantumespresso + VALID_ELEMENTS_REGEX = re.compile( + r""" + ^( + H | He | + Li | Be | B | C | N | O | F | Ne | + Na | Mg | Al | Si | P | S | Cl | Ar | + K | Ca | Sc | Ti | V | Cr | Mn | Fe | Co | Ni | Cu | Zn | Ga | Ge | As | Se | Br | Kr | + Rb | Sr | Y | Zr | Nb | Mo | Tc | Ru | Rh | Pd | Ag | Cd | In | Sn | Sb | Te | I | Xe | + Cs | Ba | Hf | Ta | W | Re | Os | Ir | Pt | Au | Hg | Tl | Pb | Bi | Po | At | Rn | + Fr | Ra | Rf | Db | Sg | Bh | Hs | Mt | + La | Ce | Pr | Nd | Pm | Sm | Eu | Gd | Tb | Dy | Ho | Er | Tm | Yb | Lu | # Lanthanides + Ac | Th | Pa | U | Np | Pu | Am | Cm | Bk | Cf | Es | Fm | Md | No | Lr | # Actinides + ) + """, re.VERBOSE | re.IGNORECASE) + + parser = CP2KInputParser() + tree = parser.parse(filename) + force_eval_no = -1 + force_eval = None + + for force_eval_no, force_eval in enumerate(tree['+force_eval']): + try: + cell = force_eval['+subsys']['+cell'] + kinds = force_eval['+subsys']['+kind'] + break # for now grab the first &COORD found + except KeyError: + continue + else: + raise ValueError('no CELL, or KIND found in the given input file') + + # CP2K can get its cell information in two ways: + # - A, B, C: cell vectors + # - ABC: scaling of cell vectors, ALPHA_BETA_GAMMA: angles between the cell vectors (optional) + + if 'a' in cell: + unit_cell = np.array([cell['a'], cell['b'], cell['c']]) # unit vectors given + cellpar = cell_to_cellpar(unit_cell) + elif 'abc' in cell: + cellpar = cell['abc'] + cell.get('alpha_beta_gamma', [90., 90., 90.]) + unit_cell = cellpar_to_cell(cellpar) + else: + raise ValueError('incomplete &CELL section') + + pbc = [c in cell.get('periodic', 'XYZ') for c in 'XYZ'] + + structure = StructureData(cell=unit_cell, pbc=pbc) + + if force_eval['+subsys'].get('+coord', {}).get('scaled', False): + tmat = symop_fract_from_ortho(cellpar) + else: + tmat = np.eye(3) + + for kind in kinds: + name = kind['_'] + + try: + # prefer the ELEMENT keyword, fallback to extracting from name + element = kind.get('element', VALID_ELEMENTS_REGEX.search(name)[0]) + except TypeError: + raise ValueError('ELEMENT not set and unable to extract from {name}'.format(name=name)) + + structure.append_kind(Kind(name=name, symbols=element)) + + try: + structfn = force_eval["+subsys"]["+topology"]["coord_file_name"] + atoms = ase.io.read(structfn) + + for name, position in zip(atoms.symbols, atoms.positions): + structure.append_site(Site(kind_name=name, position=tmat @ np.array(position))) + + except KeyError: + for name, position, _ in parser.coords(force_eval_no): + # positions can be scaled, apply transformation matrix + structure.append_site(Site(kind_name=name, position=tmat @ np.array(position))) + + return structure diff --git a/aiida_cp2k/cli/workflows/base.py b/aiida_cp2k/cli/workflows/base.py index 2dbc12b1..4d5f782a 100644 --- a/aiida_cp2k/cli/workflows/base.py +++ b/aiida_cp2k/cli/workflows/base.py @@ -9,19 +9,7 @@ from . import cmd_launch from ..utils import cp2k_options, launch_process - - -def _import_struct(structfn): - """Import a data structure from a filename using ASE""" - - from aiida.orm import StructureData - import ase.io - - asecell = ase.io.read(structfn) - structure = StructureData(ase=asecell) - structure.store() - - return structure +from ..utils.structure import structure_from_cp2k_inp @cmd_launch.command('base') @@ -60,11 +48,11 @@ def cmd_launch_workflow(code, daemon, cp2k_input_file, label, description, struc else: try: - structfn = tree["FORCE_EVAL"]["SUBSYS"]["TOPOLOGY"].pop("COORD_FILE_NAME") - builder.cp2k.structure = _import_struct(structfn) - echo.echo("Created StructureData<{}> from file {}]\n".format(builder.cp2k.structure.pk, structfn)) - except KeyError: - pass + with open(cp2k_input_file.name, "r") as fhandle: + builder.cp2k.structure = structure_from_cp2k_inp(fhandle) + builder.cp2k.structure.store() + echo.echo("Created StructureData<{}> from file {}]\n".format(builder.cp2k.structure.pk, + cp2k_input_file.name)) except ValueError as err: echo.echo_critical(str(err))