From d0be14caa9c586c4634abb3171d97100732c86c2 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 13 Nov 2024 11:18:46 +0100 Subject: [PATCH 1/6] allow hdf5 files for scienta --- src/pynxtools_xps/reader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pynxtools_xps/reader.py b/src/pynxtools_xps/reader.py index 56d1ae71..e0755044 100644 --- a/src/pynxtools_xps/reader.py +++ b/src/pynxtools_xps/reader.py @@ -124,6 +124,8 @@ class XPSReader(MultiFormatReader): ) __prmt_file_ext__ = [ + ".h5", + ".hdf5", ".ibw", ".npl", ".pro", @@ -137,6 +139,8 @@ class XPSReader(MultiFormatReader): ] __vendors__ = ["kratos", "phi", "scienta", "specs", "unkwown"] __prmt_vndr_cls: Dict[str, Dict] = { + ".h5": {"scienta": MapperScienta}, + ".hdf5": {"scienta": MapperScienta}, ".ibw": {"scienta": MapperScienta}, ".npl": {"unkwown": VamasMapper}, ".pro": {"phi": MapperPhi}, From 8471203b71bd5b932a454f440e28a69a7a55b371 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 13 Nov 2024 14:52:09 +0100 Subject: [PATCH 2/6] enable parsing of scienta hdf5 files --- src/pynxtools_xps/config/config_scienta.json | 42 +-- .../config/config_scienta_hdf5.json | 310 ++++++++++++++++++ src/pynxtools_xps/reader.py | 12 +- src/pynxtools_xps/reader_utils.py | 3 +- src/pynxtools_xps/scienta/scienta_mappings.py | 24 +- src/pynxtools_xps/scienta/scienta_reader.py | 233 +++++++------ 6 files changed, 483 insertions(+), 141 deletions(-) create mode 100644 src/pynxtools_xps/config/config_scienta_hdf5.json diff --git a/src/pynxtools_xps/config/config_scienta.json b/src/pynxtools_xps/config/config_scienta.json index dab784a2..5a2fa5f1 100644 --- a/src/pynxtools_xps/config/config_scienta.json +++ b/src/pynxtools_xps/config/config_scienta.json @@ -8,11 +8,11 @@ "experiment_institution":"@eln", "experiment_facility":"@eln", "experiment_laboratory":"@eln", - "data_file":"@attrs:file_info/data_file", - "sequence_file":"@attrs:file_info/sequence_file" + "data_file":"@attrs:data_file", + "sequence_file":"@attrs:sequence_file" }, "/ENTRY/USER[user]":{ - "name":"@attrs:user/user_name", + "name":"@attrs:user_name", "affiliation":"@eln", "address":"@eln", "email":"@eln", @@ -24,7 +24,7 @@ }, "/ENTRY/INSTRUMENT[instrument]":{ "@default":"electronanalyser", - "name":"@attrs:instrument/instrument_name", + "name":"@attrs:instrument_name", "device_information":{ "vendor":"@eln", "model":"@eln", @@ -37,8 +37,8 @@ "energy_resolution":{ "physical_quantity":"energy", "type":"derived", - "resolution":"@attrs:region/step_size", - "resolution/@units":"@attrs:region/step_size/@units" + "resolution":"@attrs:step_size", + "resolution/@units":"@attrs:step_size/@units" }, "sourceTYPE[source_xray]":{ "type":"@eln", @@ -58,8 +58,8 @@ "beamTYPE[beam_xray]":{ "distance":"@eln", "distance/@units":"@eln", - "incident_energy":"@attrs:beam_xray/excitation_energy", - "incident_energy/@units":"@attrs:beam_xray/excitation_energy/@units", + "incident_energy":"@attrs:excitation_energy", + "incident_energy/@units":"@attrs:excitation_energy/@units", "incident_energy_spread":null, "incident_energy_spread/@units":null, "incident_polarization":null, @@ -87,13 +87,13 @@ "energy_resolution":{ "physical_quantity":"energy", "type":"derived", - "resolution":"@attrs:region/step_size", - "resolution/@units":"@attrs:region/step_size/@units" + "resolution":"@attrs:step_size", + "resolution/@units":"@attrs:step_size/@units" }, "transmission_function":null, "COLLECTIONCOLUMN[collectioncolumn]":{ "scheme":"@eln", - "lens_mode":"@attrs:collectioncolumn/lens_mode", + "lens_mode":"@attrs:lens_mode", "projection":null, "angular_acceptance":null, "spatial_acceptance":null, @@ -112,9 +112,9 @@ }, "ENERGYDISPERSION[energydispersion]":{ "scheme":"@eln", - "pass_energy":"@attrs:energydispersion/pass_energy", - "pass_energy/@units":"@attrs:energydispersion/pass_energy/@units", - "energy_scan_mode":"@attrs:energydispersion/acquisition_mode", + "pass_energy":"@attrs:pass_energy", + "pass_energy/@units":"@attrs:pass_energy/@units", + "energy_scan_mode":"@attrs:acquisition_mode", "APERTURE[entrance_slit]":{ "description":"@eln", "shape":"@eln", @@ -135,8 +135,8 @@ "DETECTOR[detector]":{ "@default":"raw_data", "amplifier_type":"@eln", - "count_time":"@attrs:detector/dwell_time", - "count_time/@units":"@attrs:detector/dwell_time/@units", + "count_time":"@attrs:dwell_time", + "count_time/@units":"@attrs:dwell_time/@units", "detector_type":"@eln", "device_information":{ "vendor":"@eln", @@ -246,7 +246,7 @@ "transmission_correction":null }, "/ENTRY/SAMPLE[sample]":{ - "name":"@attrs:sample/sample_name", + "name":"@attrs:sample_name", "identifier":{ "identifier":"@eln" }, @@ -292,12 +292,12 @@ "@signal":"data", "data":"@data:average", "data_errors":"@data:errors", - "data/@units":"@attrs:region/intensity/@units", + "data/@units":"@attrs:intensity/@units", "DATA[*]":"@data:*.scans", - "DATA[*]/@units":"@attrs:region/intensity/@units", + "DATA[*]/@units":"@attrs:intensity/@units", "energy":"@data:energy", - "energy/@type":"@attrs:region/energy_scale", - "energy/@units":"@attrs:region/energy_axis/@units", + "energy/@type":"@attrs:energy_scale", + "energy/@units":"@attrs:energy_axis/@units", "@energy_indices":0 } } \ No newline at end of file diff --git a/src/pynxtools_xps/config/config_scienta_hdf5.json b/src/pynxtools_xps/config/config_scienta_hdf5.json new file mode 100644 index 00000000..daf754c1 --- /dev/null +++ b/src/pynxtools_xps/config/config_scienta_hdf5.json @@ -0,0 +1,310 @@ +{ + "/ENTRY":{ + "@default":"data", + "title":"@attrs:acquisition/spectrum/name", + "start_time":"@attrs:acquisition/spectrum_log/start_time", + "end_time":"@attrs:acquisition/spectrum_log/stop_time", + "method":"X-ray photoelectron spectroscopy (XPS)", + "experiment_institution":"@eln", + "experiment_facility":"@eln", + "experiment_laboratory":"@eln", + "duration":"@attrs:acquisition/spectrum_log/duration", + "duration/@units":"s", + "sequence_counter":"acquisition/spectrum_log/sequence_counter", + "sequence_name":"acquisition/spectrum_log/sequence_name", + "spectrum_counter":"acquisition/spectrum_log/spectrum_counter", + "storage_name":"acquisition/spectrum_log/storage_name" + }, + "/ENTRY/USER[user]":{ + "name":"@attrs:user/name", + "affiliation":"@eln", + "address":"@eln", + "email":"@eln", + "IDENTIFIER[orcid]":{ + "service":"@eln", + "identifier":"@eln", + "is_persistent":"@eln" + } + }, + "/ENTRY/INSTRUMENT[instrument]":{ + "@default":"electronanalyser", + "name":"@attrs:instrument_name", + "device_information":{ + "vendor":"@eln", + "model":"@eln", + "identifier":{ + "service":"@eln", + "identifier":"@eln", + "is_persistent":"@eln" + } + }, + "energy_resolution":{ + "physical_quantity":"energy", + "type":"@eln", + "resolution":"@eln", + "resolution/@units":"@eln" + }, + "sourceTYPE[source_xray]":{ + "type":"@eln", + "name":"@attrs:instrument/analyser/excitation_source/preset_name", + "probe":"@attrs:instrument/analyser/excitation_source/source_type", + "device_information":{ + "vendor":"@eln", + "model":"@eln", + "identifier":{ + "service":"@eln", + "identifier":"@eln", + "is_persistent":"@eln" + } + }, + "associated_beam":"/entry/instrument/beam_xray" + }, + "beamTYPE[beam_xray]":{ + "distance":"@eln", + "distance/@units":"@eln", + "incident_energy":"@attrs:instrument/analyser/excitation_source/energy", + "incident_energy/@units":"eV", + "incident_energy_spread":null, + "incident_energy_spread/@units":null, + "incident_polarization":null, + "incident_polarization/@units":null, + "extent":null, + "associated_source":"/entry/instrument/source_xray" + }, + "ELECTRONANALYSER[electronanalyser]":{ + "@default":"detector", + "description":"@eln", + "name":"@eln", + "work_function":"@attrs:instrument/analyser/work_function", + "work_function/@units":"eV", + "fast_axes":null, + "slow_axes":"energy", + "device_information":{ + "vendor":"@eln", + "model":"@eln", + "serial_number":"@attrs:instrument/analyser/serial_number", + "identifier":{ + "service":"@eln", + "identifier":"@eln", + "is_persistent":"@eln" + } + }, + "energy_resolution":{ + "physical_quantity":"energy", + "type":"@eln", + "resolution":"@eln", + "resolution/@units":"@eln" + }, + "transmission_function":null, + "COLLECTIONCOLUMN[collectioncolumn]":{ + "scheme":"@eln", + "lens_mode":"@attrs:acquisition/spectrum_definition/lens_mode_name", + "projection":null, + "angular_acceptance":null, + "spatial_acceptance":null, + "field_aperture":null, + "contrast_aperture":null, + "iris":null, + "device_information":{ + "vendor":"@eln", + "model":"@eln", + "identifier":{ + "service":"@eln", + "identifier":"@eln", + "is_persistent":"@eln" + } + } + }, + "ENERGYDISPERSION[energydispersion]":{ + "scheme":"@eln", + "pass_energy":"@attrs:acquisition/spectrum_definition/pass_energy", + "pass_energy/@units":"eV", + "radius":"@attrs:/ENTRY[Fix_2D_Trans]/instrument/analyser/radius", + "radius/@units":"mm", + "APERTURE[slit]":{ + "description":"@attrs:instrument/analyser/slit/name", + "shape":"@attrs:instrument/analyser/slit/type", + "length":"@attrs:instrument/analyser/slit/length", + "length/@units":"mm", + "width":"@attrs:instrument/analyser/slit/width", + "width/@units":"mm", + "size":"@attrs:instrument/analyser/slit/size", + "size/@units":"mm", + "is_centered":"@attrs:instrument/analyser/slit/is_centered", + "knob_position":"@attrs:instrument/analyser/slit/knob_position", + "motorized_position":"@attrs:instrument/analyser/slit/motorized_position", + "motorized_position_tolerance":"@attrs:instrument/analyser/slit/motorized_position_tolerance" + }, + "device_information":{ + "vendor":"@eln", + "model":"@eln", + "identifier":{ + "service":"@eln", + "identifier":"@eln", + "is_persistent":"@eln" + } + } + }, + "DETECTOR[detector]":{ + "@default":"raw_data", + "acquisition_mode":"@attrs:acquisition/spectrum_definition/acquisition_mode", + "amplifier_type":"@eln", + "count_time":"@attrs:acquisition/spectrum/dwell_time", + "count_time/@units":"s", + "image_detector":"@attrs:instrument/analyser/image_detector", + "image_source":"@attrs:instrument/analyser/image_source", + "time_per_energy_channel":"@attrs:acquisition/spectrum/time_per_energy_channel", + "time_per_energy_channel/@units":"s", + "device_information":{ + "vendor":"@eln", + "model":"@attrs:instrument/analyser/model", + "identifier":{ + "service":"@eln", + "identifier":"@eln", + "is_persistent":"@eln" + } + }, + "raw_data":{ + "@signal":"raw", + "raw":"@data:data/data", + "raw/@units":"counts" + } + } + }, + "MANIPULATOR[manipulator]":{ + "device_information":{ + "vendor":"@eln", + "model":"@eln", + "identifier":{ + "service":"@eln", + "identifier":"@eln", + "is_persistent":"@eln" + } + }, + "temperature_sensor":{ + "name":"@eln", + "measurement":"@eln", + "type":"@eln", + "attached_to":"@eln", + "value":"@eln", + "value/@units":"@eln" + }, + "sample_heater":{ + "name":"@eln", + "physical_quantity":"@eln", + "type":"@eln", + "heater_power":"@eln", + "heater_power/@units":"@eln", + "PID[pid]/setpoint":"@eln", + "PID[pid]/setpoint/@units":"@eln" + }, + "cryostat":null, + "drain_current_amperemeter":{ + "name":"@eln", + "measurement":"@eln", + "type":"@eln", + "value":"@eln", + "value/@units":"@eln" + }, + "sample_bias_voltmeter":{ + "name":"@eln", + "measurement":"@eln", + "type":"@eln", + "value":"@eln", + "value/@units":"@eln" + }, + "sample_bias_potentiostat":{ + "name":"@eln", + "physical_quantity":"voltage", + "type":"@eln", + "PID[pid]/setpoint":"@attrs:sample/bias", + "PID[pid]/setpoint/@units":"V" + } + }, + "pressure_gauge":{ + "name":"@eln", + "measurement":"@eln", + "type":"@eln", + "value":"@eln", + "value/@units":"@eln" + }, + "flood_gun":{ + "name":"@eln", + "physical_quantity":"@eln", + "type":"@eln", + "current":"@eln", + "current/@units":"@eln" + } + }, + "/ENTRY/PROCESS_MPES[process]":{ + "energy_calibration":{ + "calibrated_axis":"@data:data/x_axis", + "calibrated_axis/@units":"eV" + }, + "energy_referencing":null, + "transmission_correction":null + }, + "/ENTRY/SAMPLE[sample]":{ + "name":"@attrs:sample/name", + "description":"@attrs:sample/description", + "identifier":{ + "identifier":"@eln" + }, + "atom_types":"@eln", + "physical_form":"@eln", + "situation":"@eln", + "SUBSTANCE[substance]":{ + "name":"@eln", + "molecular_mass":"@eln", + "molecular_mass/@units":"@eln", + "cas_number":"@eln", + "molecular_formula_hill":"@eln" + }, + "history":{ + "sample_preparation":{ + "start_time":"@eln", + "end_time":"@eln", + "description":"@eln", + "method":"@eln" + } + }, + "temperature_env":{ + "temperature_sensor":"@link:/entry/instrument/manipulator/temperature_sensor", + "sample_heater":"@link:/entry/instrument/manipulator/sample_heater", + "cryostat":null + }, + "gas_pressure_env":{ + "pressure_gauge":"@link:/entry/instrument/pressure_gauge" + }, + "bias_env":{ + "potentiostat":"@link:/entry/instrument/manipulator/sample_bias_potentiostat", + "voltmeter":"@link:/entry/instrument/manipulator/sample_bias_voltmeter" + }, + "drain_current_env":{ + "amperemeter":"@link:/entry/instrument/manipulator/drain_current_amperemeter" + }, + "flood_gun_current_env":{ + "flood_gun":"@link:/entry/instrument/flood_gun" + } + }, + "/ENTRY/data":{ + "@axes":["y_axis", "energy"], + "@signal":"data", + "data":"@data:data/data", + "data/@units":"counts_per_second", + "energy":"@data:data/x_axis", + "energy/@type":"@attrs:acquisition/spectrum_definition/energy_mode", + "energy/@units":"eV", + "AXISNAME[y_axis]":"@data:data/y_axis", + "AXISNAME[y_axis]/@units":"@attrs:y_axis/@units", + "@y_axis_indices": 0, + "@energy_indices": 1 + }, + "/ENTRY/DATA[data_reduced1d]":{ + "@axes":["energy"], + "@signal":"data", + "data":"@data:data_reduced_1d/data", + "data/@units":"counts_per_second", + "energy":"@link:/entry/data/energy" + } +} \ No newline at end of file diff --git a/src/pynxtools_xps/reader.py b/src/pynxtools_xps/reader.py index e0755044..d929f564 100644 --- a/src/pynxtools_xps/reader.py +++ b/src/pynxtools_xps/reader.py @@ -311,9 +311,13 @@ def _check_for_vendors_txt(file_path: str) -> str: parser = XPSReader.__prmt_vndr_cls[file_ext][vendor]() parser.parse_file(file_path, **self.kwargs) - self.config_file = XPSReader.reader_dir.joinpath( - "config", parser.config_file - ) + + config_file = parser.config_file + + if isinstance(config_file, dict): + config_file = config_file.get(file_ext) + + self.config_file = XPSReader.reader_dir.joinpath("config", config_file) data_dict = parser.data_dict except ValueError as val_err: @@ -691,7 +695,7 @@ def get_data(self, key: str, path: str) -> Any: try: return np.array(xr_data.coords[path].values) except KeyError: - pass + return None def set_root_default(self, template): """Set the default for automatic plotting.""" diff --git a/src/pynxtools_xps/reader_utils.py b/src/pynxtools_xps/reader_utils.py index 79f394c6..97e6f49f 100644 --- a/src/pynxtools_xps/reader_utils.py +++ b/src/pynxtools_xps/reader_utils.py @@ -342,6 +342,7 @@ def _re_map_single_value( input_key: str, value: Optional[Union[str, int, float, bool, np.ndarray]], map_functions: Dict[str, Any], + **kwargs, ): """ Map the values returned from the file to the preferred format for @@ -354,7 +355,7 @@ def _re_map_single_value( for key in map_functions: if key in input_key: map_method = map_functions[key] - value = map_method(value) # type: ignore[operator] + value = map_method(value, **kwargs) # type: ignore[operator] return value diff --git a/src/pynxtools_xps/scienta/scienta_mappings.py b/src/pynxtools_xps/scienta/scienta_mappings.py index 604f36dc..c36ccb58 100644 --- a/src/pynxtools_xps/scienta/scienta_mappings.py +++ b/src/pynxtools_xps/scienta/scienta_mappings.py @@ -32,6 +32,7 @@ from pynxtools_xps.value_mappers import ( convert_energy_type, convert_energy_scan_mode, + parse_datetime, ) @@ -153,18 +154,23 @@ def _parse_time(time_string: str) -> datetime.time: "time_per_spectrum_channel": float, "manipulator_r1": float, "manipulator_r2": float, + "start_time": parse_datetime, + "stop_time": parse_datetime, + "preset_type": lambda x: x.lower(), + "source_type": lambda x: x.lower(), + "energy_mode": convert_energy_type, } UNITS: dict = { - "energydispersion/pass_energy": "eV", - "beam_xray/excitation_energy": "eV", - "region/energy_axis": "eV", - "region/center_energy": "eV", - "region/start_energy": "eV", - "region/stop_energy": "eV", - "region/step_size": "eV", - "detector/dwell_time": "eV", - "region/time_per_spectrum_channel": "s", + "pass_energy": "eV", + "excitation_energy": "eV", + "energy_axis": "eV", + "center_energy": "eV", + "start_energy": "eV", + "stop_energy": "eV", + "step_size": "eV", + "dwell_time": "s", + "time_per_spectrum_channel": "s", } diff --git a/src/pynxtools_xps/scienta/scienta_reader.py b/src/pynxtools_xps/scienta/scienta_reader.py index a9f565c9..041389a9 100644 --- a/src/pynxtools_xps/scienta/scienta_reader.py +++ b/src/pynxtools_xps/scienta/scienta_reader.py @@ -26,6 +26,7 @@ from pathlib import Path from typing import Any, Dict, List, Tuple, Union +import h5py import numpy as np import xarray as xr from igor2 import binarywave @@ -36,6 +37,7 @@ _re_map_single_value, construct_data_key, construct_entry_name, + _format_value, ) from pynxtools_xps.scienta.scienta_data_model import ScientaHeader, ScientaRegion from pynxtools_xps.scienta.scienta_mappings import ( @@ -54,11 +56,18 @@ class MapperScienta(XPSMapper): dictionaries. """ - config_file = "config_scienta.json" + config_file = { + ".h5": "config_scienta_hdf5.json", + ".hdf5": "config_scienta_hdf5.json", + ".ibw": "config_scienta.json", + ".txt": "config_scienta.json", + } __prmt_file_ext__ = [ - "ibw", - "txt", + ".h5", + ".hdf5", + ".ibw", + ".txt", ] __file_err_msg__ = ( @@ -81,6 +90,8 @@ def _select_parser(self): return ScientaTxtParser() elif str(self.file).endswith(".ibw"): return ScientaIgorParser() + elif str(self.file).endswith((".h5", ".hdf5")): + return ScientaHdf5Parser() raise ValueError(MapperScienta.__file_err_msg__) def construct_data(self): @@ -90,80 +101,17 @@ def construct_data(self): self._xps_dict["data"]: dict = {} - template_key_map = { - "file_info": ["data_file", "sequence_file"], - "user": [ - "user_name", - ], - "instrument": [ - "instrument_name", - "vendor", - ], - "source_xray": [], - "beam_xray": [ - "excitation_energy", - ], - "electronanalyser": [], - "collectioncolumn": [ - "lens_mode", - ], - "energydispersion": [ - "acquisition_mode", - "pass_energy", - ], - "detector": [ - "detector_first_x_channel", - "detector_first_y_channel", - "detector_last_x_channel", - "detector_last_y_channel", - "detector_mode", - "dwell_time", - "time_per_spectrum_channel", - ], - "manipulator": [ - "manipulator_r1", - "manipulator_r2", - ], - "calibration": [], - "sample": ["sample_name"], - "region": [ - "center_energy", - "energy_axis", - "energy_scale", - "energy_scale_2", - "energy_size", - "no_of_scans", - "region_id", - "spectrum_comment", - "start_energy", - "step_size", - "stop_energy", - "time_stamp", - "intensity/@units", - ], - # 'unused': [ - # 'energy_unit', - # 'number_of_slices', - # 'software_version', - # 'spectrum_comment', - # 'start_date', - # 'start_time', - # 'time_per_spectrum_channel' - # ] - } - for spectrum in spectra: - self._update_xps_dict_with_spectrum(spectrum, template_key_map) + self._update_xps_dict_with_spectrum(spectrum) - def _update_xps_dict_with_spectrum( - self, spectrum: Dict[str, Any], template_key_map: Dict[str, List[str]] - ): + def _update_xps_dict_with_spectrum(self, spectrum: Dict[str, Any]): """ Map one spectrum from raw data to NXmpes-ready dict. """ + entry_parts = [] - for part in ["spectrum_type", "region_name"]: + for part in ["spectrum_type", "region_name", "acquisition/spectrum/name"]: val = spectrum.get(part, None) if val: entry_parts += [val] @@ -171,50 +119,28 @@ def _update_xps_dict_with_spectrum( entry = construct_entry_name(entry_parts) entry_parent = f"/ENTRY[{entry}]" - file_parent = f"{entry_parent}/file_info" - instrument_parent = f"{entry_parent}/instrument" - analyser_parent = f"{instrument_parent}/electronanalyser" - - path_map = { - "file_info": f"{file_parent}", - "user": f"{entry_parent}/user", - "instrument": f"{instrument_parent}", - "source_xray": f"{instrument_parent}/source_xray", - "beam_xray": f"{instrument_parent}/beam_xray", - "electronanalyser": f"{analyser_parent}", - "collectioncolumn": f"{analyser_parent}/collectioncolumn", - "energydispersion": f"{analyser_parent}/energydispersion", - "detector": f"{analyser_parent}/detector", - "manipulator": f"{instrument_parent}/manipulator", - "calibration": f"{instrument_parent}/calibration", - "sample": f"{entry_parent}/sample", - "data": f"{entry_parent}/data", - "region": f"{entry_parent}/region", - } + for key, value in spectrum.items(): + mpes_key = f"{entry_parent}/{key}" + self._xps_dict[mpes_key] = value - for grouping, spectrum_keys in template_key_map.items(): - root = path_map[str(grouping)] + units = get_units_for_key(key, UNITS) + if units is not None: + self._xps_dict[f"{mpes_key}/@units"] = units - for spectrum_key in spectrum_keys: - mpes_key = spectrum_key.rsplit(" ", 1)[0] - try: - self._xps_dict[f"{root}/{mpes_key}"] = spectrum[spectrum_key] - except KeyError: - pass - - unit_key = f"{grouping}/{spectrum_key}" - units = get_units_for_key(unit_key, UNITS) - if units is not None: - self._xps_dict[f"{root}/{mpes_key}/@units"] = units - - # Create key for writing to data - scan_key = construct_data_key(spectrum) + try: + self._fill_with_data_txt_ibw(spectrum, entry) + except (IndexError, KeyError): + self._fill_with_data_hdf5(spectrum, entry) + def _fill_with_data_txt_ibw(self, spectrum: Dict[str, Any], entry: str): # If multiple spectra exist to entry, only create a new # xr.Dataset if the entry occurs for the first time. if entry not in self._xps_dict["data"]: self._xps_dict["data"][entry] = xr.Dataset() + # Create key for writing to data + scan_key = construct_data_key(spectrum) + energy = np.array(spectrum["data"]["energy"]) intensity = spectrum["data"]["intensity"] @@ -250,6 +176,23 @@ def _update_xps_dict_with_spectrum( data=intensity, coords={"energy": energy} ) + def _fill_with_data_hdf5(self, spectrum: Dict[str, Any], entry: str): + self._xps_dict["data"][entry] = {} + + data_keys = [ + "data/data", + "data/sum", + "data/x_axis", + "data/y_axis", + "data_reduced_1d/data", + "data_reduced_1d/x_axis", + ] + + for key in data_keys: + value = spectrum[f"acquisition/spectrum/{key}"] + + self._xps_dict["data"][entry][key] = value + class ScientaTxtParser: """Parser for Scienta TXT exports.""" @@ -614,3 +557,81 @@ def axis_units_for_dim(self, wave_header: Dict[str, Any], dim: int) -> str: unit += elem.decode("utf-8") return unit + + +class ScientaHdf5Parser: + def __init__(self): + self.spectra: List[Dict[str, Any]] = [] + + def parse_file(self, file: Union[str, Path], **kwargs): + """ + Reads the igor binarywave files and returns a list of + dictionary containing the wave data. + + Parameters + ---------- + file : str + Filepath of the TXT file to be read. + + Returns + ------- + self.spectra + Flat list of dictionaries containing one spectrum each. + + """ + + def format_value(key: str, value_str: str) -> Tuple[Any, str]: + """ + Formats a value string (to a corresponding key) according to a series of transformations. + This function: + 1. Formats the numeric part of the value according to its expected type. + 2. Remaps the value to a new format if specified in `VALUE_MAP`. + Args: + key (str): The key associated with the value, which may need mapping and formatting. + value_str (str): The value string to format and separate into numeric value and unit. + Returns: + Tuple[Any, str]: + - The formatted key (converted to snake_case and remapped if needed). + - The formatted value, with numeric value processed and remapped according to `VALUE_MAP`. + """ + kwargs: Dict[str, Any] = {} + + if "_time" in key: + kwargs["possible_date_formats"] = ["%Y-%m-%dT%H:%M:%S"] + + value = _re_map_single_value(key, value_str, VALUE_MAP, **kwargs) + + value = _format_value(value) + + return value + + def recursively_read_group(group, path=""): + result = {} + for key, item in group.items(): + new_path = f"{path}/{key}" if path else key + if isinstance(item, h5py.Group): + # Recursively read subgroups + result.update(recursively_read_group(item, new_path)) + elif isinstance(item, h5py.Dataset): + # Read datasets + data = item[()] + if isinstance(data, bytes): + data = data.decode("utf-8") + data = format_value(key, data) + result[new_path] = data + return result + + # Open the HDF5 file and read its contents + with h5py.File(file, "r") as hdf: + hdf5_data = recursively_read_group(hdf) + + try: + length, width = ( + hdf5_data["instrument/analyser/slit/length"], + hdf5_data["instrument/analyser/slit/width"], + ) + hdf5_data["instrument/analyser/slit/size"] = np.array([length, width]) + except KeyError: + pass + + return [hdf5_data] From 405eda63cae130501a97773c930902c10c547196 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:09:12 +0100 Subject: [PATCH 3/6] small fix for scienta ibw parser --- src/pynxtools_xps/scienta/scienta_data_model.py | 2 +- src/pynxtools_xps/scienta/scienta_mappings.py | 2 +- src/pynxtools_xps/scienta/scienta_reader.py | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/pynxtools_xps/scienta/scienta_data_model.py b/src/pynxtools_xps/scienta/scienta_data_model.py index ee377c84..6cb22a74 100644 --- a/src/pynxtools_xps/scienta/scienta_data_model.py +++ b/src/pynxtools_xps/scienta/scienta_data_model.py @@ -63,7 +63,7 @@ class ScientaRegion(XpsDataclass): sample_name: str = "" spectrum_comment: str = "" start_date: str = "" - start_time: str = "" + time: str = "" time_stamp: str = "" time_per_spectrum_channel: float = 0.0 detector_mode: str = "" diff --git a/src/pynxtools_xps/scienta/scienta_mappings.py b/src/pynxtools_xps/scienta/scienta_mappings.py index c36ccb58..56e612eb 100644 --- a/src/pynxtools_xps/scienta/scienta_mappings.py +++ b/src/pynxtools_xps/scienta/scienta_mappings.py @@ -127,7 +127,6 @@ def _parse_time(time_string: str) -> datetime.time: "sample": "sample_name", "comments": "spectrum_comment", "date": "start_date", - "time": "start_time", } VALUE_MAP = { @@ -204,6 +203,7 @@ def _get_key_value_pair(line: str): if "dimension" in key: key_part = f"dimension_{key.rsplit('_')[-1]}" key = KEY_MAP.get(key_part, key_part) + value = _re_map_single_value(key, value, VALUE_MAP) except ValueError: diff --git a/src/pynxtools_xps/scienta/scienta_reader.py b/src/pynxtools_xps/scienta/scienta_reader.py index 041389a9..74e620b1 100644 --- a/src/pynxtools_xps/scienta/scienta_reader.py +++ b/src/pynxtools_xps/scienta/scienta_reader.py @@ -346,7 +346,7 @@ def _parse_region(self, region_id: int): region.data = {"energy": np.array(energies), "intensity": np.array(intensities)} # Convert date and time to ISO8601 date time. - region.time_stamp = _construct_date_time(region.start_date, region.start_time) + region.time_stamp = _construct_date_time(region.start_date, region.time) region.validate_types() @@ -428,9 +428,7 @@ def parse_file(self, file: Union[str, Path], **kwargs): intensities = data[region_id] # Convert date and time to ISO8601 date time. - region.time_stamp = _construct_date_time( - region.start_date, region.start_time - ) + region.time_stamp = _construct_date_time(region.start_date, region.time) region.energy_size = len(energies) region.energy_axis = energies From 3592a563f8b31d1818e2eb5ca03cc1f0c9920665 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 13 Nov 2024 15:59:19 +0100 Subject: [PATCH 4/6] allow for microseconds in datetime strings --- src/pynxtools_xps/scienta/scienta_reader.py | 5 ++++- src/pynxtools_xps/value_mappers.py | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/pynxtools_xps/scienta/scienta_reader.py b/src/pynxtools_xps/scienta/scienta_reader.py index 74e620b1..9dcbd2c8 100644 --- a/src/pynxtools_xps/scienta/scienta_reader.py +++ b/src/pynxtools_xps/scienta/scienta_reader.py @@ -595,7 +595,10 @@ def format_value(key: str, value_str: str) -> Tuple[Any, str]: kwargs: Dict[str, Any] = {} if "_time" in key: - kwargs["possible_date_formats"] = ["%Y-%m-%dT%H:%M:%S"] + kwargs["possible_date_formats"] = [ + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%dT%H:%M:%S.%f%z", + ] value = _re_map_single_value(key, value_str, VALUE_MAP, **kwargs) diff --git a/src/pynxtools_xps/value_mappers.py b/src/pynxtools_xps/value_mappers.py index 965fd433..fa7c6ebf 100644 --- a/src/pynxtools_xps/value_mappers.py +++ b/src/pynxtools_xps/value_mappers.py @@ -181,6 +181,10 @@ def parse_datetime( Datetime in ISO 8601 format. """ for date_fmt in possible_date_formats: + if date_fmt == "%Y-%m-%dT%H:%M:%S.%f%z": + # strptime only supports six digits for microseconds + datetime_string = datetime_string[:-7] + datetime_string[-6:] + try: datetime_obj = datetime.datetime.strptime(datetime_string, date_fmt) From 20df6a9c8860c6c191430943816651d117714637 Mon Sep 17 00:00:00 2001 From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com> Date: Wed, 13 Nov 2024 14:35:24 +0000 Subject: [PATCH 5/6] giupdate examples and tests for scienta reader --- examples/scienta/ibw/Cu-HHTP.ibw.nxs | Bin 632968 -> 632968 bytes examples/scienta/txt/Cu-HHTP.txt.nxs | Bin 632968 -> 632968 bytes tests/data/scienta_ibw/scienta_ibw_ref.nxs | Bin 254136 -> 254136 bytes tests/data/scienta_txt/scienta_txt_ref.nxs | Bin 254136 -> 254136 bytes 4 files changed, 0 insertions(+), 0 deletions(-) diff --git a/examples/scienta/ibw/Cu-HHTP.ibw.nxs b/examples/scienta/ibw/Cu-HHTP.ibw.nxs index adc78d4bbefffd62b1d1fd8fbdc92a58d46f3792..e3312df225c4d6497a7e2b95aac5a1f19ec2f155 100644 GIT binary patch delta 239 zcmeCUsMc{&ZG(W#TxZ?ha-HqSjOhoSvh%cu6tDv^$M%o{&PSZWMn+a9MtUa3<|f7#+6D$z1_o@? zC961Pd5|Th7gcfgO}CiNDY0FmnsWp5^p0vy0Y<~=g4LYj+dtHCE`W$^U(m$411ebw Tk!)z=G!OtQ-+o{k=Ltaot=C6} diff --git a/examples/scienta/txt/Cu-HHTP.txt.nxs b/examples/scienta/txt/Cu-HHTP.txt.nxs index 7211b1f26593ed8290e78cceeefead0d599ea89f..c651ea58c66ec0cd261e66abd725637d8f6dc569 100644 GIT binary patch delta 239 zcmeCUsMc{&ZG(W#TxZ?ha-HqSjOhoSvh%cu6tDv^$M%o{&PSZWMn+a9W_m`ZhUONQ+6D$z1_o@? zC961Pd5|Th7gcfgO}CiNDY0FmnsWp5^p0vy0Y<~=g4LYj+dtHCE`W$^U(m$411ebw Tk!)z=G!OtQ-+o{k=Ltaovc^Z0 diff --git a/tests/data/scienta_ibw/scienta_ibw_ref.nxs b/tests/data/scienta_ibw/scienta_ibw_ref.nxs index 68ba0e73ead6b209c1a8b03e9634804c58968f36..03f8a965c1dddf17b023effabf4412851e082c8a 100644 GIT binary patch delta 125 zcmdmSkblQP{tW^)lNapbX_m3sE@Q*kvH-+sUvr0X`84Gb-` z4GgRd4A`c>JjpD}gDf%K=@fGx7b61%6fKG|Sj*m$6}NSped+uermxea#)FsJFs~hE|5=dZtDe<|ank z1_o9J25i$`o@AEgL6(^Abc(redcJjpD}gDf%K=@fGx7b61%6fKG|Sj*m$6}NSped+uermxea#)FsJFs~hE_%fdIpw8X66>! z1_o9J25i$`o@AEgL6(^Abc(redc Date: Wed, 13 Nov 2024 16:15:30 +0100 Subject: [PATCH 6/6] allow nxs-like structure in scienta h5 files --- src/pynxtools_xps/scienta/scienta_reader.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/pynxtools_xps/scienta/scienta_reader.py b/src/pynxtools_xps/scienta/scienta_reader.py index 9dcbd2c8..9f96f0ae 100644 --- a/src/pynxtools_xps/scienta/scienta_reader.py +++ b/src/pynxtools_xps/scienta/scienta_reader.py @@ -111,7 +111,12 @@ def _update_xps_dict_with_spectrum(self, spectrum: Dict[str, Any]): """ entry_parts = [] - for part in ["spectrum_type", "region_name", "acquisition/spectrum/name"]: + for part in [ + "spectrum_type", + "region_name", + "acquisition/spectrum/name", + "title", + ]: val = spectrum.get(part, None) if val: entry_parts += [val] @@ -120,6 +125,9 @@ def _update_xps_dict_with_spectrum(self, spectrum: Dict[str, Any]): entry_parent = f"/ENTRY[{entry}]" for key, value in spectrum.items(): + if key.startswith("entry"): + entry_parent = f"/ENTRY[entry]" + key = key.replace("entry/", "", 1) mpes_key = f"{entry_parent}/{key}" self._xps_dict[mpes_key] = value @@ -189,9 +197,9 @@ def _fill_with_data_hdf5(self, spectrum: Dict[str, Any], entry: str): ] for key in data_keys: - value = spectrum[f"acquisition/spectrum/{key}"] - - self._xps_dict["data"][entry][key] = value + value = spectrum.get(f"acquisition/spectrum/{key}") + if value is not None: + self._xps_dict["data"][entry][key] = value class ScientaTxtParser: