Skip to content

Commit 70b38f6

Browse files
yakutovichabastonero
authored andcommitted
PpCalculation: Make parsing of output files optional (aiidateam#1029)
The `parse_data_files` option is added. When switched to `False` the parser will not parse the outputs files but just keep the raw files. The existing option `keep_plot_file` is deprecated in favor of the renamed `keep_data_files` option to make it coherent with the new option.
1 parent 2c564e2 commit 70b38f6

File tree

4 files changed

+128
-47
lines changed

4 files changed

+128
-47
lines changed

src/aiida_quantumespresso/calculations/pp.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# -*- coding: utf-8 -*-
22
"""`CalcJob` implementation for the pp.x code of Quantum ESPRESSO."""
33
import os
4+
import warnings
45

56
from aiida import orm
67
from aiida.common import datastructures, exceptions
8+
from aiida.common.warnings import AiidaDeprecationWarning
79

810
from aiida_quantumespresso.calculations import _lowercase_dict, _uppercase_dict
911
from aiida_quantumespresso.utils.convert import convert_input_to_namelist_entry
@@ -82,7 +84,9 @@ def define(cls, spec):
8284
spec.input('metadata.options.output_filename', valid_type=str, default=cls._DEFAULT_OUTPUT_FILE)
8385
spec.input('metadata.options.parser_name', valid_type=str, default='quantumespresso.pp')
8486
spec.input('metadata.options.withmpi', valid_type=bool, default=True)
85-
spec.input('metadata.options.keep_plot_file', valid_type=bool, default=False)
87+
spec.input('metadata.options.keep_plot_file', valid_type=bool, required=False)
88+
spec.input('metadata.options.keep_data_files', valid_type=bool, default=False)
89+
spec.input('metadata.options.parse_data_files', valid_type=bool, default=True)
8690

8791
spec.output('output_parameters', valid_type=orm.Dict)
8892
spec.output('output_data', valid_type=orm.ArrayData)
@@ -218,10 +222,16 @@ def prepare_for_submission(self, folder): # pylint: disable=too-many-branches,t
218222
# distinguish them from one another. The `fileout` filename will be the full data filename with the `fileout`
219223
# value as a suffix.
220224
retrieve_tuples = [self._FILEOUT, (f'{self._FILPLOT}_*{self._FILEOUT}', '.', 0)]
221-
222-
if self.inputs.metadata.options.keep_plot_file:
225+
if 'keep_plot_file' in self.inputs.metadata.options:
226+
self.inputs.metadata.options.keep_data_files = self.inputs.metadata.options.keep_plot_file
227+
warnings.warn(
228+
"The input parameter 'keep_plot_file' is deprecated and will be removed in version 5.0.0. "
229+
"Please use 'keep_data_files' instead.", AiidaDeprecationWarning
230+
)
231+
if self.inputs.metadata.options.keep_data_files:
223232
calcinfo.retrieve_list.extend(retrieve_tuples)
224-
else:
233+
# If we do not want to parse the retrieved files, temporary retrieval is meaningless
234+
elif self.inputs.metadata.options.parse_data_files:
225235
calcinfo.retrieve_temporary_list.extend(retrieve_tuples)
226236

227237
return calcinfo

src/aiida_quantumespresso/parsers/pp.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -117,35 +117,35 @@ def get_key_from_filename(filename):
117117
matches = re.search(pattern, filename)
118118
return matches.group(1)
119119

120-
for filename in filenames:
121-
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
122-
# of each file is released from memory after parsing, to improve memory usage.
123-
if filename.endswith(filename_suffix):
124-
# Read the file to memory
125-
try:
126-
with file_opener(filename) as handle:
127-
data_raw = handle.read()
128-
except OSError:
129-
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
130-
# Parse the file
131-
try:
132-
key = get_key_from_filename(filename)
133-
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
134-
del data_raw
135-
except Exception as exception: # pylint: disable=broad-except
136-
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)
137-
138-
# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
139-
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
140-
# should be retrieved there really is no way to check this explicitly.
141-
if not data_parsed:
142-
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)
143-
144-
# Create output nodes
145-
if len(data_parsed) == 1:
146-
self.out('output_data', data_parsed[0][1])
147-
else:
148-
self.out('output_data_multiple', dict(data_parsed))
120+
if self.node.base.attributes.get('parse_data_files'):
121+
for filename in filenames:
122+
# Directly parse the retrieved files after reading them to memory (`data_raw`). The raw data
123+
# of each file is released from memory after parsing, to improve memory usage.
124+
if filename.endswith(filename_suffix):
125+
# Read the file to memory
126+
try:
127+
with file_opener(filename) as handle:
128+
data_raw = handle.read()
129+
except OSError:
130+
return self.exit_codes.ERROR_OUTPUT_DATAFILE_READ.format(filename=filename)
131+
# Parse the file
132+
try:
133+
key = get_key_from_filename(filename)
134+
data_parsed.append((key, parsers[iflag](data_raw, self.units_dict[parsed_data['plot_num']])))
135+
del data_raw
136+
except Exception as exception: # pylint: disable=broad-except
137+
return self.exit_codes.ERROR_OUTPUT_DATAFILE_PARSE.format(filename=filename, exception=exception)
138+
139+
# If we don't have any parsed files, we exit. Note that this will not catch the case where there should be more
140+
# than one file, but the engine did not retrieve all of them. Since often we anyway don't know how many files
141+
# should be retrieved there really is no way to check this explicitly.
142+
if not data_parsed:
143+
return self.exit_codes.ERROR_OUTPUT_DATAFILE_MISSING.format(filename=filename_prefix)
144+
145+
if len(data_parsed) == 1:
146+
self.out('output_data', data_parsed[0][1])
147+
else:
148+
self.out('output_data_multiple', dict(data_parsed))
149149

150150
return self.exit(logs=logs)
151151

tests/calculations/test_pp.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ def test_pp_default(fixture_sandbox, generate_calc_job, generate_inputs, file_re
6060
file_regression.check(input_written, encoding='utf-8', extension='.in')
6161

6262

63-
def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs):
63+
def test_pp_keep_data_files(fixture_sandbox, generate_calc_job, generate_inputs):
6464
"""Test a `PpCalculation` where we want to retrieve the plot file."""
6565
entry_point_name = 'quantumespresso.pp'
6666
inputs = generate_inputs()
67-
inputs.metadata.options.keep_plot_file = True
67+
inputs.metadata.options.keep_data_files = True
6868

6969
calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs)
7070
retrieve_list = ['aiida.out', 'aiida.fileout', ('aiida.filplot_*aiida.fileout', '.', 0)]
@@ -80,6 +80,26 @@ def test_pp_keep_plot_file(fixture_sandbox, generate_calc_job, generate_inputs):
8080
assert element in calc_info.retrieve_list
8181

8282

83+
def test_pp_parse_data_files(fixture_sandbox, generate_calc_job, generate_inputs):
84+
"""Test a `PpCalculation` where we want to retrieve the plot file."""
85+
entry_point_name = 'quantumespresso.pp'
86+
inputs = generate_inputs()
87+
inputs.metadata.options.parse_data_files = False
88+
89+
calc_info = generate_calc_job(fixture_sandbox, entry_point_name, inputs)
90+
retrieve_list = ['aiida.out']
91+
retrieve_temporary_list = []
92+
local_copy_list = []
93+
94+
# When both `keep_data_files` (default) and `parse_data_files` are set to False, the data files won't be pulled.
95+
assert isinstance(calc_info, datastructures.CalcInfo)
96+
assert sorted(calc_info.local_copy_list) == sorted(local_copy_list)
97+
assert sorted(calc_info.retrieve_temporary_list) == sorted(retrieve_temporary_list)
98+
assert len(calc_info.retrieve_list) == 1
99+
for element in retrieve_list:
100+
assert element in calc_info.retrieve_list
101+
102+
83103
def test_pp_cmdline_setting(fixture_sandbox, generate_calc_job, generate_inputs):
84104
"""Test a `PpCalculation` with user-defined cmdline settings."""
85105
entry_point_name = 'quantumespresso.pp'

tests/parsers/test_pp.py

Lines changed: 63 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,11 @@ def test_pp_default_1d(
125125
entry_point_calc_job = 'quantumespresso.pp'
126126
entry_point_parser = 'quantumespresso.pp'
127127

128-
node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_1d', generate_inputs_1d)
128+
attributes = {'keep_data_files': False, 'parse_data_files': True}
129+
130+
node = generate_calc_job_node(
131+
entry_point_calc_job, fixture_localhost, 'default_1d', generate_inputs_1d, attributes=attributes
132+
)
129133
parser = generate_parser(entry_point_parser)
130134
results, calcfunction = parser.parse_from_node(node, store_provenance=False)
131135

@@ -157,9 +161,13 @@ def test_pp_default_1d_spherical(
157161
"""Test a default `pp.x` calculation producing a 1D data set with spherical averaging."""
158162
entry_point_calc_job = 'quantumespresso.pp'
159163
entry_point_parser = 'quantumespresso.pp'
160-
164+
attributes = {'keep_data_files': False, 'parse_data_files': True}
161165
node = generate_calc_job_node(
162-
entry_point_calc_job, fixture_localhost, 'default_1d_spherical', generate_inputs_1d_spherical
166+
entry_point_calc_job,
167+
fixture_localhost,
168+
'default_1d_spherical',
169+
generate_inputs_1d_spherical,
170+
attributes=attributes
163171
)
164172
parser = generate_parser(entry_point_parser)
165173
results, calcfunction = parser.parse_from_node(node, store_provenance=False)
@@ -200,8 +208,11 @@ def test_pp_default_2d(
200208
"""Test a default `pp.x` calculation producing a 2D data set."""
201209
entry_point_calc_job = 'quantumespresso.pp'
202210
entry_point_parser = 'quantumespresso.pp'
211+
attributes = {'keep_data_files': False, 'parse_data_files': True}
203212

204-
node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_2d', generate_inputs_2d)
213+
node = generate_calc_job_node(
214+
entry_point_calc_job, fixture_localhost, 'default_2d', generate_inputs_2d, attributes=attributes
215+
)
205216
parser = generate_parser(entry_point_parser)
206217
results, calcfunction = parser.parse_from_node(node, store_provenance=False)
207218

@@ -237,8 +248,11 @@ def test_pp_default_polar(
237248
"""Test a default `pp.x` calculation producing a polar coordinates data set."""
238249
entry_point_calc_job = 'quantumespresso.pp'
239250
entry_point_parser = 'quantumespresso.pp'
251+
attributes = {'keep_data_files': False, 'parse_data_files': True}
240252

241-
node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_polar', generate_inputs_polar)
253+
node = generate_calc_job_node(
254+
entry_point_calc_job, fixture_localhost, 'default_polar', generate_inputs_polar, attributes=attributes
255+
)
242256
parser = generate_parser(entry_point_parser)
243257
results, calcfunction = parser.parse_from_node(node, store_provenance=False)
244258

@@ -267,8 +281,11 @@ def test_pp_default_3d(
267281
"""Test a default `pp.x` calculation producing a 3D data set."""
268282
entry_point_calc_job = 'quantumespresso.pp'
269283
entry_point_parser = 'quantumespresso.pp'
284+
attributes = {'keep_data_files': False, 'parse_data_files': True}
270285

271-
node = generate_calc_job_node(entry_point_calc_job, fixture_localhost, 'default_3d', generate_inputs_3d)
286+
node = generate_calc_job_node(
287+
entry_point_calc_job, fixture_localhost, 'default_3d', generate_inputs_3d, attributes=attributes
288+
)
272289
parser = generate_parser(entry_point_parser)
273290
results, calcfunction = parser.parse_from_node(node, store_provenance=False)
274291

@@ -297,12 +314,16 @@ def test_pp_default_3d(
297314
})
298315

299316

300-
def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
301-
"""Test a `pp.x` calculation where `keep_plot_file=False` meaning files will be parsed from temporary directory."""
317+
def test_pp_default_3d_keep_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
318+
"""Test a `pp.x` calculation where `keep_data_files=False` meaning files will be parsed from temporary directory."""
302319
entry_point_calc_job = 'quantumespresso.pp'
303320
entry_point_parser = 'quantumespresso.pp'
304321

305-
attributes = {'options': {'keep_plot_file': False}, 'retrieve_temporary_list': ['aiida.fileout']}
322+
attributes = {
323+
'keep_data_files': False,
324+
'parse_data_files': True,
325+
'retrieve_temporary_list': ['aiida.fileout'],
326+
}
306327
node = generate_calc_job_node(
307328
entry_point_calc_job,
308329
test_name='default_3d',
@@ -320,12 +341,36 @@ def test_pp_default_3d_keep_plot_file(generate_calc_job_node, generate_parser, g
320341
assert len(results['output_data'].get_arraynames()) == 4
321342

322343

344+
def test_pp_default_3d_parse_data_files(generate_calc_job_node, generate_parser, generate_inputs_3d, tmpdir):
345+
"""Test a `pp.x` calculation where `parse_data_files=False`, so data files won't be parsed."""
346+
entry_point_calc_job = 'quantumespresso.pp'
347+
entry_point_parser = 'quantumespresso.pp'
348+
349+
attributes = {'keep_data_files': False, 'parse_data_files': False}
350+
node = generate_calc_job_node(
351+
entry_point_calc_job,
352+
test_name='default_3d',
353+
inputs=generate_inputs_3d,
354+
attributes=attributes,
355+
)
356+
parser = generate_parser(entry_point_parser)
357+
results, calcfunction = parser.parse_from_node(node, store_provenance=False, retrieved_temporary_folder=tmpdir)
358+
359+
assert calcfunction.is_finished, calcfunction.exception
360+
assert calcfunction.is_finished_ok, calcfunction.exit_message
361+
assert 'output_parameters' in results
362+
assert 'output_data' not in results
363+
364+
323365
def test_pp_default_3d_multiple(generate_calc_job_node, generate_parser, generate_inputs_3d):
324366
"""Test a default `pp.x` calculation producing multiple files in 3D format."""
325367
entry_point_calc_job = 'quantumespresso.pp'
326368
entry_point_parser = 'quantumespresso.pp'
369+
attributes = {'keep_data_files': False, 'parse_data_files': True}
327370

328-
node = generate_calc_job_node(entry_point_calc_job, test_name='default_3d_multiple', inputs=generate_inputs_3d)
371+
node = generate_calc_job_node(
372+
entry_point_calc_job, test_name='default_3d_multiple', inputs=generate_inputs_3d, attributes=attributes
373+
)
329374
parser = generate_parser(entry_point_parser)
330375
results, calcfunction = parser.parse_from_node(node, store_provenance=False)
331376

@@ -364,9 +409,14 @@ def test_pp_default_3d_failed_missing_data(
364409
"""Test a default `pp.x` calculation where the aiida.fileout file is missing."""
365410
entry_point_calc_job = 'quantumespresso.pp'
366411
entry_point_parser = 'quantumespresso.pp'
412+
attributes = {'keep_data_files': False, 'parse_data_files': True}
367413

368414
node = generate_calc_job_node(
369-
entry_point_calc_job, fixture_localhost, 'default_3d_failed_missing_data', generate_inputs_3d
415+
entry_point_calc_job,
416+
fixture_localhost,
417+
'default_3d_failed_missing_data',
418+
generate_inputs_3d,
419+
attributes=attributes
370420
)
371421
parser = generate_parser(entry_point_parser)
372422
_, calcfunction = parser.parse_from_node(node, store_provenance=False)
@@ -398,9 +448,10 @@ def test_pp_default_3d_failed_format(fixture_localhost, generate_calc_job_node,
398448
"""Test a default `pp.x` calculation where an unsupported output file format is used."""
399449
entry_point_calc_job = 'quantumespresso.pp'
400450
entry_point_parser = 'quantumespresso.pp'
451+
attributes = {'keep_data_files': False, 'parse_data_files': True}
401452

402453
node = generate_calc_job_node(
403-
entry_point_calc_job, fixture_localhost, 'default_3d_failed_format', generate_inputs_3d
454+
entry_point_calc_job, fixture_localhost, 'default_3d_failed_format', generate_inputs_3d, attributes=attributes
404455
)
405456
parser = generate_parser(entry_point_parser)
406457
_, calcfunction = parser.parse_from_node(node, store_provenance=False)

0 commit comments

Comments
 (0)