Skip to content

Commit

Permalink
Fix parser for CP2K v2024.3 (#223)
Browse files Browse the repository at this point in the history
* Fix parser for v2024.3.
* Add tests for CP2K v2024.3.
* Add an example run for the advanced parser.
* Improve regexes for getting step info.
* Simplify reading end-of-step info.
* Base workchain: improve output parsing.

---------

Co-authored-by: Aliaksandr Yakutovich <yakutovicha@gmail.com>
  • Loading branch information
cpignedoli and yakutovicha authored Jan 28, 2025
1 parent 1a44be7 commit f5116d8
Show file tree
Hide file tree
Showing 7 changed files with 3,903 additions and 19 deletions.
50 changes: 37 additions & 13 deletions aiida_cp2k/utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def parse_cp2k_output_advanced(
edens_rspace = None
scf_converged = True

print_now = False
dump_step_info = False
data = line.split()
# Parse general info
if line.startswith(" CELL|"):
Expand Down Expand Up @@ -208,24 +208,48 @@ def parse_cp2k_output_advanced(
# Parse specific info
if result_dict["run_type"] in ["ENERGY", "ENERGY_FORCE"]:
if energy is not None and not result_dict["motion_step_info"]["step"]:
print_now = True
dump_step_info = True
if result_dict["run_type"] in ["GEO_OPT", "CELL_OPT"]:
# Note: with CELL_OPT/LBFGS there is no "STEP 0", while there is with CELL_OPT/BFGS

# Getting the step number.
if re.search(r"Informations at step", line):
step = int(data[5])
if re.search(r"Max. step size =", line):
elif re.search(
r"OPT\| Step number ", line
): # Fix for new CP2K versions.
step = int(data[-1])

# Getting the maximum step size.
if re.search(
r"OPT\| Maximum step size\s*[-+]?\d*\.?\d+", line
) or re.search(r"Max. step size\s+=", line):
max_step = float(data[-1])
if re.search(r"RMS step size =", line):

# Getting the RMS step size.
if re.search(
r"OPT\| RMS step size\s*[-+]?\d*\.?\d+", line
) or re.search(r"RMS step size\s+=", line):
rms_step = float(data[-1])
if re.search(r"Max. gradient =", line):

# Getting the maximum gradient.
if re.search(
r"OPT\| Maximum gradient\s*[-+]?\d*\.?\d+", line
) or re.search(r"Max. gradient\s+=", line):
max_grad = float(data[-1])
if re.search(r"RMS gradient =", line):

# Getting the RMS gradient.
if re.search(
r"OPT\| RMS gradient\s*[-+]?\d*\.?\d+",
line,
) or re.search(r"RMS gradient\s{3,}=", line):
rms_grad = float(data[-1])

if (
len(data) == 1
and data[0] == "---------------------------------------------------"
):
print_now = True # 51('-')
) or re.search(r"OPT\| Estimated peak process memory", line):
dump_step_info = True # 51('-')
if re.search(
r"Reevaluating energy at the minimum", line
): # not clear why it is doing a last one...
Expand All @@ -239,16 +263,16 @@ def parse_cp2k_output_advanced(
step = int(data[3])
if re.search(r"INITIAL PRESSURE\[bar\]", line):
pressure = float(data[3])
print_now = True
dump_step_info = True
if re.search(r"PRESSURE \[bar\]", line):
pressure = float(data[3])
print_now = True
dump_step_info = True
if result_dict["run_type"] == "MD-NPT_F":
if re.search(r"^ STEP NUMBER", line):
step = int(data[3])
if re.search(r"^ INITIAL PRESSURE\[bar\]", line):
pressure = float(data[3])
print_now = True
dump_step_info = True
if re.search(r"^ PRESSURE \[bar\]", line):
pressure = float(data[3])
if re.search(r"^ VOLUME\[bohr\^3\]", line):
Expand All @@ -261,9 +285,9 @@ def parse_cp2k_output_advanced(
cell_alp = float(data[3])
cell_bet = float(data[4])
cell_gam = float(data[5])
print_now = True
dump_step_info = True

if print_now and energy is not None:
if dump_step_info and energy is not None:
result_dict["motion_step_info"]["step"].append(step)
result_dict["motion_step_info"]["energy_au"].append(energy)
result_dict["motion_step_info"]["dispersion_energy_au"].append(
Expand Down
4 changes: 3 additions & 1 deletion aiida_cp2k/workchains/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Base work chain to run a CP2K calculation."""

import re

from aiida import common, engine, orm, plugins

from .. import utils
Expand Down Expand Up @@ -85,7 +87,7 @@ def restart_incomplete_calculation(self, calc):
content_string = calc.outputs.retrieved.base.repository.get_object_content(calc.base.attributes.get('output_filename'))

# CP2K was updating geometry - continue with that.
restart_geometry_transformation = "Max. gradient =" in content_string or "MD| Step number" in content_string
restart_geometry_transformation = re.search(r"Max. gradient\s+=", content_string) or re.search(r"OPT\| Maximum gradient\s*[-+]?\d*\.?\d+", content_string) or "MD| Step number" in content_string
end_inner_scf_loop = "Total energy: " in content_string
# The message is written in the log file when the CP2K input parameter `LOG_PRINT_KEY` is set to True.
if not (restart_geometry_transformation or end_inner_scf_loop or "Writing RESTART" in content_string):
Expand Down
162 changes: 162 additions & 0 deletions examples/single_calculations/example_geopt_advanced_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
###############################################################################
# Copyright (c), The AiiDA-CP2K authors. #
# SPDX-License-Identifier: MIT #
# AiiDA-CP2K is hosted on GitHub at https://github.com/aiidateam/aiida-cp2k #
# For further information on the license, see the LICENSE.txt file. #
###############################################################################
"""Run DFT geometry optimization."""

import os
import sys

import ase.io
import click
from aiida.common import NotExistent
from aiida.engine import run
from aiida.orm import Dict, SinglefileData, load_code
from aiida.plugins import DataFactory

StructureData = DataFactory("core.structure")


def example_geopt(cp2k_code):
"""Run DFT geometry optimization."""

print("Testing CP2K GEO_OPT on H2O (DFT)...")

thisdir = os.path.dirname(os.path.realpath(__file__))

# Structure.
structure = StructureData(
ase=ase.io.read(os.path.join(thisdir, "..", "files", "h2.xyz"))
)

# Basis set.
basis_file = SinglefileData(
file=os.path.join(thisdir, "..", "files", "BASIS_MOLOPT")
)

# Pseudopotentials.
pseudo_file = SinglefileData(
file=os.path.join(thisdir, "..", "files", "GTH_POTENTIALS")
)

# Parameters.
parameters = Dict(
{
"GLOBAL": {
"RUN_TYPE": "GEO_OPT",
},
"FORCE_EVAL": {
"METHOD": "Quickstep",
"DFT": {
"BASIS_SET_FILE_NAME": "BASIS_MOLOPT",
"POTENTIAL_FILE_NAME": "GTH_POTENTIALS",
"QS": {
"EPS_DEFAULT": 1.0e-12,
"WF_INTERPOLATION": "ps",
"EXTRAPOLATION_ORDER": 3,
},
"MGRID": {
"NGRIDS": 4,
"CUTOFF": 280,
"REL_CUTOFF": 30,
},
"XC": {
"XC_FUNCTIONAL": {
"_": "PBE",
},
},
"POISSON": {
"PERIODIC": "none",
"PSOLVER": "MT",
},
},
"SUBSYS": {
"KIND": [
{
"_": "O",
"BASIS_SET": "DZVP-MOLOPT-SR-GTH",
"POTENTIAL": "GTH-PBE-q6",
},
{
"_": "H",
"BASIS_SET": "DZVP-MOLOPT-SR-GTH",
"POTENTIAL": "GTH-PBE-q1",
},
],
},
},
}
)

# Construct process builder.
builder = cp2k_code.get_builder()
builder.structure = structure
builder.parameters = parameters
builder.code = cp2k_code
builder.file = {
"basis": basis_file,
"pseudo": pseudo_file,
}
builder.metadata.options.resources = {
"num_machines": 1,
"num_mpiprocs_per_machine": 1,
}
builder.metadata.options.max_wallclock_seconds = 1 * 3 * 60
builder.metadata.options.parser_name = "cp2k_advanced_parser"

print("Submitted calculation...")
calc = run(builder)

# Check walltime not exceeded.
assert calc["output_parameters"]["exceeded_walltime"] is False

# Check energy.
expected_energy = -1.17212345935
if abs(calc["output_parameters"]["energy"] - expected_energy) < 1e-10:
print("OK, energy has the expected value.")
else:
print("ERROR!")
print(f"Expected energy value: {expected_energy}")
print(f"Actual energy value: {calc['output_parameters']['energy']}")
sys.exit(3)

# Check geometry.
expected_dist = 0.732594809575
dist = calc["output_structure"].get_ase().get_distance(0, 1)
if abs(dist - expected_dist) < 1e-7:
print("OK, H-H distance has the expected value.")
else:
print("ERROR!")
print(f"Expected dist value: {expected_dist}")
print(f"Actual dist value: {dist}")
sys.exit(3)

# Check motion step information.
assert calc["output_parameters"]["motion_step_info"]["step"] == [
0,
1,
2,
], "ERROR: motion step info is incorrect"
assert calc["output_parameters"]["motion_step_info"]["cell_a_angs"] == [
4.0,
4.0,
4.0,
], "ERROR: motion step info is incorrect"


@click.command("cli")
@click.argument("codelabel")
def cli(codelabel):
"""Click interface."""
try:
code = load_code(codelabel)
except NotExistent:
print(f"The code '{codelabel}' does not exist.")
sys.exit(1)
example_geopt(code)


if __name__ == "__main__":
cli()
11 changes: 7 additions & 4 deletions examples/workchains/fixme_example_base_md_reftraj_restart.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def example_base(cp2k_code):
"GLOBAL": {
"RUN_TYPE": "MD",
"PRINT_LEVEL": "LOW",
"WALLTIME": 3,
"WALLTIME": 2,
"PROJECT": "aiida",
},
"MOTION": {
Expand Down Expand Up @@ -151,9 +151,12 @@ def example_base(cp2k_code):
"basis": basis_file,
"pseudo": pseudo_file,
}
builder.cp2k.metadata.options.resources = {
"num_machines": 1,
"num_mpiprocs_per_machine": 1,
builder.cp2k.metadata.options = {
"max_wallclock_seconds": 100,
"resources": {
"num_machines": 1,
"num_mpiprocs_per_machine": 1,
},
}

print("Submitted calculation...")
Expand Down
Loading

0 comments on commit f5116d8

Please sign in to comment.