diff --git a/pyproject.toml b/pyproject.toml index 62aeecc..eeae4f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,9 +111,10 @@ package-dir = { "" = "src" } where = ["src"] [project.entry-points.'nomad.plugin'] -parser_entry_point = "nomad_inl_base.parsers:parser_entry_point" +CVarser_entry_point = "nomad_inl_base.parsers:CV_parser_entry_point" +EDparser_entry_point1 = "nomad_inl_base.parsers:ED_parser_entry_point" schema_package_entry_point = "nomad_inl_base.schema_packages:schema_package_entry_point" -cyclyc_voltammetry_entry_point = "nomad_inl_base.schema_packages:cyclyc_voltammetry_entry_point" +cyclic_voltammetry_entry_point = "nomad_inl_base.schema_packages:cyclic_voltammetry_entry_point" diff --git a/src/nomad_inl_base/parsers/__init__.py b/src/nomad_inl_base/parsers/__init__.py index 8d69008..219f0c4 100644 --- a/src/nomad_inl_base/parsers/__init__.py +++ b/src/nomad_inl_base/parsers/__init__.py @@ -2,17 +2,28 @@ from pydantic import Field -class NewParserEntryPoint(ParserEntryPoint): - parameter: int = Field(0, description='Custom configuration parameter') - +class CVConfigurationParserEntryPoint(ParserEntryPoint): def load(self): - from nomad_inl_base.parsers.parser import NewParser + from nomad_inl_base.parsers.parser import CVParser + + return CVParser(**self.dict()) - return NewParser(**self.dict()) +CV_parser_entry_point = CVConfigurationParserEntryPoint( + name='CVParser', + description='New parser for getting the data from a CV.', + mainfile_name_re=r'.*mVs\.xlsx', + mainfile_mime_re='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', +) + +class EDConfigurationParserEntryPoint(ParserEntryPoint): + def load(self): + from nomad_inl_base.parsers.parser import EDParser + return EDParser(**self.dict()) -parser_entry_point = NewParserEntryPoint( - name='NewParser', - description='New parser entry point configuration.', - mainfile_name_re='.*\.newmainfilename', +ED_parser_entry_point = EDConfigurationParserEntryPoint( + name='EDParser', + description='New parser for getting the data from a ED.', + mainfile_name_re=r'.*ED\.xlsx', + mainfile_mime_re='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', ) diff --git a/src/nomad_inl_base/parsers/parser.py b/src/nomad_inl_base/parsers/parser.py index 6e3fee6..f1ccbb2 100644 --- a/src/nomad_inl_base/parsers/parser.py +++ b/src/nomad_inl_base/parsers/parser.py @@ -13,20 +13,162 @@ from nomad.config import config from nomad.datamodel.metainfo.workflow import Workflow from nomad.parsing.parser import MatchingParser +import pandas as pd +from nomad.datamodel.data import EntryData +from nomad.datamodel.datamodel import EntryArchive, EntryMetadata +from nomad_inl_base.utils import fill_quantity, create_archive, get_hash_ref +from nomad_inl_base.schema_packages.cyclic_voltammetry import * +from nomad.units import ureg + +class RawFile_(EntryData): + m_def = Section(a_eln=None, label='Raw File EPIC') + name = Quantity( + type=str, + a_eln=ELNAnnotation( + component='StringEditQuantity', + ), + ) + file_ = Quantity( + type=str, + a_eln=ELNAnnotation( + component='FileEditQuantity', + ), + a_browser={'adaptor': 'RawFileAdaptor'}, + description='EPIC log file list', + ) + +class EDParser(MatchingParser): + + def parse(self, mainfile: str, archive: EntryArchive, logger) -> None: + filetype = 'yaml' + data_file = mainfile.split('/')[-1].split('.xlsx')[0].replace(' ', '_') + xlsx = pd.ExcelFile(mainfile) + + data = pd.read_excel(xlsx) + if 'WE(1).Current (A)' in data.columns: + data.rename(columns={'Corrected time (s)':'Time', + 'WE(1).Current (A)':'Current'}, inplace=True) + else: + data.rename(columns={'Column 1':'t', + 'Column 2':'Current', + 'Column 3':'Time', + 'Column 4':'Index', + 'Column 5':'Current range'}, inplace=True) + + #Dummy archive for the data file + file_reference = get_hash_ref(archive.m_context.upload_id, data_file) + + + #create a ED archive + ED_measurement = ChronoamperometryMeasurement() + ED_measurement.current = CurrentTimeSeries() + ED_measurement.current.value = fill_quantity(data, 'Current', 'ampere') + ED_measurement.current.time = fill_quantity(data, 'Time', 'seconds') + + #create a ED archive + ED_filename = f'{data_file}.ED_measurement.archive.{filetype}' + + if archive.m_context.raw_path_exists(ED_filename): + logger.warn(f'Process archive already exists: {ED_filename}') + else: + ED_archive = EntryArchive( + data=ED_measurement if ED_filename else ChronoamperometryMeasurement(), + # m_context=archive.m_context, + metadata=EntryMetadata(upload_id=archive.m_context.upload_id), + ) + + + create_archive( + ED_archive.m_to_dict(), + archive.m_context, + ED_filename, + filetype, + logger, + ) + + + archive.data = RawFile_( + name=data_file + '_raw', + file_=file_reference, + ) + archive.metadata.entry_name = data_file.replace('.xlsx', '') + + +class CVParser(MatchingParser): + + def parse(self, mainfile: str, archive: EntryArchive, logger) -> None: + filetype = 'yaml' + data_file = mainfile.split('/')[-1].split('.xlsx')[0].replace(' ', '_') + xlsx = pd.ExcelFile(mainfile) + + data = pd.read_excel(xlsx) + if 'WE(1).Potential (V)' in data.columns: + data.rename(columns={'WE(1).Potential (V)':'Potential', + 'WE(1).Current (A)':'Current'}, inplace=True) + else: + data.rename(columns={'Column 1':'Potential applied (V)', + 'Column 2':'Time (s)', + 'Column 3':'Current', + 'Column 4':'Potential', + 'Column 5':'Scan', + 'Column 6':'Index', + 'Column 7': 'Q+', + 'Column 8':'Q-'}, inplace=True) + + rate = float(mainfile.split('.xlsx')[0].split('-')[-1].replace(' ','').replace('mVs', '')) + + #create a CV archive + + CV_measurement = PotentiostatMeasurement() + CV_measurement.voltage = VoltageTimeSeries() + CV_measurement.current = CurrentTimeSeries() + CV_measurement.scan = ScanTimeSeries() + CV_measurement.rate = ureg.Quantity( + rate, + ureg('millivolt/second'), + ) + + #Dummy archive for the data file + file_reference = get_hash_ref(archive.m_context.upload_id, data_file) + + #CV_measurement.data_file = file_reference + + CV_measurement.voltage.value = fill_quantity(data, 'Potential', 'volt') + CV_measurement.current.value = fill_quantity(data, 'Current', 'ampere') + CV_measurement.scan.value = fill_quantity(data, 'Scan') + for values in [CV_measurement.voltage, CV_measurement.current, CV_measurement.scan]: + values.time = fill_quantity(data, 'Time (s)', 'seconds') + + #create a CV archive + CV_filename = f'{data_file}.CV_measurement.archive.{filetype}' + + if archive.m_context.raw_path_exists(CV_filename): + logger.warn(f'Process archive already exists: {CV_filename}') + else: + CV_archive = EntryArchive( + data=CV_measurement if CV_filename else PotentiostatMeasurement(), + # m_context=archive.m_context, + metadata=EntryMetadata(upload_id=archive.m_context.upload_id), + ) + + create_archive( + CV_archive.m_to_dict(), + archive.m_context, + CV_filename, + filetype, + logger, + ) + + + + + archive.data = RawFile_( + name=data_file + '_raw', + file_=file_reference, + ) + archive.metadata.entry_name = data_file.replace('.xlsx', '') + -configuration = config.get_plugin_entry_point( - 'nomad_inl_base.parsers:parser_entry_point' -) -class NewParser(MatchingParser): - def parse( - self, - mainfile: str, - archive: 'EntryArchive', - logger: 'BoundLogger', - child_archives: dict[str, 'EntryArchive'] = None, - ) -> None: - logger.info('NewParser.parse', parameter=configuration.parameter) - archive.workflow2 = Workflow(name='test') diff --git a/src/nomad_inl_base/schema_packages/__init__.py b/src/nomad_inl_base/schema_packages/__init__.py index ee0e87e..407d2b2 100644 --- a/src/nomad_inl_base/schema_packages/__init__.py +++ b/src/nomad_inl_base/schema_packages/__init__.py @@ -25,7 +25,8 @@ def load(self): return m_package -schema_package_entry_point = CyclicVoltammetryPackageEntryPoint( +cyclic_voltammetry_entry_point = CyclicVoltammetryPackageEntryPoint( name='CyclicVoltammetry', description='CyclicVoltammetry entry point configuration.', ) + diff --git a/src/nomad_inl_base/schema_packages/cyclic_voltammetry.py b/src/nomad_inl_base/schema_packages/cyclic_voltammetry.py index 619fae1..01b1330 100644 --- a/src/nomad_inl_base/schema_packages/cyclic_voltammetry.py +++ b/src/nomad_inl_base/schema_packages/cyclic_voltammetry.py @@ -12,59 +12,270 @@ from nomad.config import config from nomad.datamodel.data import Schema -from nomad.metainfo import Quantity, SchemaPackage +from nomad_material_processing.solution.general import * +from nomad.metainfo import Quantity, SchemaPackage, Section, SubSection + +from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum +from nomad.datamodel.metainfo.basesections import Measurement +from nomad_material_processing.general import TimeSeries +from nomad_material_processing.vapor_deposition.general import SampleParameters +import numpy as np + +from nomad.datamodel.metainfo.plot import PlotSection, PlotlyFigure +from nomad.datamodel.data import EntryData +import plotly.express as px +import plotly.graph_objs as go +from plotly.subplots import make_subplots configuration = config.get_plugin_entry_point( - 'nomad_inl_base.schema_packages:schema_package_entry_point' + 'nomad_inl_base.schema_packages:cyclic_voltammetry_entry_point' ) m_package = SchemaPackage() +class CurrentTimeSeries(TimeSeries): + m_def = Section(label_quantity='set_value', a_eln={'hide': ['set_value','set_time',]}) + + value = Quantity( + type=np.float64, + description='The observed current as a function of time.', + shape=['*'], + unit='ampere', + ) + +class CurrentDensityTimeSeries(TimeSeries): + m_def = Section(label_quantity='set_value', a_eln={'hide': ['set_value','set_time',]}) + + value = Quantity( + type=np.float64, + description='The observed current density as a function of time.', + shape=['*'], + unit='ampere/meter**2', + ) + +class ScanTimeSeries(TimeSeries): + m_def = Section(label_quantity='set_value', a_eln={'hide': ['set_value','set_time',]}) + + value = Quantity( + type=np.float64, + description='The observed scan as a function of time.', + shape=['*'], + ) + + +class VoltageTimeSeries(TimeSeries): + m_def = Section(label_quantity='set_value', a_eln={'hide': ['set_value','set_time',]}) + + value = Quantity( + type=np.float64, + description='The observed voltage as a function of time.', + shape=['*'], + unit='volt', + ) + -class PotentiostatMeasurement(Schema): +class ChronoamperometryMeasurement(PlotSection, Measurement, Schema): m_def = Section( links=['https://w3id.org/nfdi4cat/voc4cat_0007206'], ) - data_file = Quantity( - type=str, - a_eln=dict(component='FileEditQuantity'), - a_browser=dict(adaptor='RawFileAdaptor'), + area_electrode = Quantity( + type=np.float64, + description='Area of the electrode ', + a_eln=ELNAnnotation( + component=ELNComponentEnum.NumberEditQuantity, + defaultDisplayUnit='centimeter**2', + ), + unit='meter**2', + ) + + Voltage_applied = Quantity( + type=np.float64, + description='Voltage applied to the electrode', + a_eln=ELNAnnotation( + component=ELNComponentEnum.NumberEditQuantity + ), + unit='V', ) - station = Quantity(type=str, a_eln=dict(component='StringEditQuantity')) + current = SubSection(section_def=CurrentTimeSeries) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super(ChronoamperometryMeasurement, self).normalize(archive, logger) + + self.figures = [] + + y_current = np.array(self.current.value) * 1000 + x_time = self.current.time + + y_label = 'Current (mA)' + if self.area_electrode is not None: + y_current /= self.area_electrode + y_label = 'Current density (mA cm' + r'$^{-2}$' + ')' - function = Quantity(type=str, a_eln=dict(component='StringEditQuantity')) + first_line = px.scatter(x=x_time, y=y_current) + figure1 = make_subplots(rows=1, cols=1) + figure1.add_trace(first_line.data[0], row=1, col=1) + figure1.update_layout(template='plotly_white', + height=400, width=716, + xaxis_title="Time (s)", + yaxis_title=y_label, + title_text='ED curve') - environment = Quantity( - links=['https://w3id.org/nfdi4cat/voc4cat_0007223'], - type=Reference(Environment.m_def), - a_eln=dict(component='ReferenceEditQuantity'), + self.figures.append(PlotlyFigure(label='figure 1', figure=figure1.to_plotly_json())) + + logger.info('NewSchema.normalize', parameter=configuration.parameter) + #self.message = f'Hello {self.name}!' + +class PotentiostatMeasurement(PlotSection, Measurement, Schema): + m_def = Section( + links=['https://w3id.org/nfdi4cat/voc4cat_0007206'], ) - setup = Quantity( - links=['https://w3id.org/nfdi4cat/voc4cat_0007230'], - type=Reference(ElectroChemicalSetup.m_def), - a_eln=dict(component='ReferenceEditQuantity'), + + #data_file = Quantity( + # type=str, + # a_eln=dict(component='FileEditQuantity'), + # a_browser=dict(adaptor='RawFileAdaptor'), + #) + + area_electrode = Quantity( + type=np.float64, + description='Area of the electrode ', + a_eln=ELNAnnotation( + component=ELNComponentEnum.NumberEditQuantity, + defaultDisplayUnit='centimeter**2', + ), + unit='meter**2', ) - connected_experiments = Quantity( - type=Reference(SectionProxy('PotentiostatMeasurement')), - shape=['*'], - a_eln=dict(component='ReferenceEditQuantity'), + rate = Quantity( + type=np.float64, + description='Rate of the CV measurement', + a_eln=ELNAnnotation( + component=ELNComponentEnum.NumberEditQuantity, + defaultDisplayUnit='millivolt/second', + ), + unit='volt/second', ) - pretreatment = SubSection(section_def=VoltammetryCycle) + current = SubSection(section_def=CurrentTimeSeries) + voltage = SubSection(section_def=VoltageTimeSeries) + scan = SubSection(section_def=ScanTimeSeries) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super(PotentiostatMeasurement, self).normalize(archive, logger) + + self.figures = [] - setup_parameters = SubSection(section_def=PotentiostatSetup) + scan_plotted = 3. + + scan = np.array(self.scan.value) + voltage = np.array(self.voltage.value) + current = np.array(self.current.value) * 1000 + + x_voltage = voltage[scan == scan_plotted] + y_current = current[scan == scan_plotted] + + y_label = 'Current (mA)' + if self.area_electrode is not None: + y_current /= self.area_electrode + y_label = 'Current density (mA cm' + r'$^{-2}$' + ')' + + + if np.isnan(y_current).all(): + scan_plotted = 2. + x_voltage = voltage[scan == scan_plotted] + y_current = current[scan == scan_plotted] + + first_line = px.scatter(x=x_voltage, y=y_current) + figure1 = make_subplots(rows=1, cols=1) + figure1.add_trace(first_line.data[0], row=1, col=1) + figure1.update_layout(template='plotly_white', + height=400, width=716, + xaxis_title="Voltage (V)", + yaxis_title=y_label, + title_text='CV curve, scan ' + str(int(scan_plotted))) + + self.figures.append(PlotlyFigure(label='figure 1', figure=figure1.to_plotly_json())) + + logger.info('NewSchema.normalize', parameter=configuration.parameter) + #self.message = f'Hello {self.name}!' + + + +class ElectrolyteSolution(Solution): + m_def = Section( + links=['https://w3id.org/nfdi4cat/voc4cat_0007206'], + ) + + molar_concentration = Quantity( + type=np.float64, + description='Concentration of the electrolyte', + a_eln=ELNAnnotation( + component=ELNComponentEnum.NumberEditQuantity, + defaultDisplayUnit='mole/liter', + ), + unit='mole/m**3', + ) + + molal_concentration = Quantity( + type=np.float64, + description='Concentration of the electrolyte', + a_eln=ELNAnnotation( + component=ELNComponentEnum.NumberEditQuantity, + defaultDisplayUnit='mole/kg', + ), + unit='mole/kg', + ) - properties = SubSection(section_def=PotentiostatProperties) def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) + super(ElectrolyteSolution, self).normalize(archive, logger) logger.info('NewSchema.normalize', parameter=configuration.parameter) - self.message = f'Hello {self.name}!' + #self.message = f'Hello {self.name}!' + + +class WorkingElectrode(SampleParameters, Schema): + m_def = Section( + links=['https://w3id.org/nfdi4cat/voc4cat_0007206'], + ) + + area_electrode = Quantity( + type=np.float64, + description='Area of the electrode ', + a_eln=ELNAnnotation( + component=ELNComponentEnum.NumberEditQuantity, + defaultDisplayUnit='centimeter**2', + ), + unit='meter**2', + ) + + substrate = Quantity( + type=str, + description='Substrate of the electrode', + a_eln=ELNAnnotation( + component=ELNComponentEnum.StringEditQuantity, + ), + default='SLG', + ) + + layer = Quantity( + type=str, + description='Layer of the electrode', + a_eln=ELNAnnotation( + component=ELNComponentEnum.StringEditQuantity, + ), + default='Mo', + ) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super(WorkingElectrode, self).normalize(archive, logger) + + logger.info('NewSchema.normalize', parameter=configuration.parameter) + #self.message = f'Hello {self.name}!' + m_package.__init_metainfo__() diff --git a/src/nomad_inl_base/utils.py b/src/nomad_inl_base/utils.py new file mode 100644 index 0000000..e03a99b --- /dev/null +++ b/src/nomad_inl_base/utils.py @@ -0,0 +1,115 @@ +import pandas as pd +import json, yaml, math +from nomad.datamodel.context import ClientContext +from nomad.datamodel.metainfo.basesections import ( + ExperimentStep, +) +from nomad.units import ureg + +def get_reference(upload_id, entry_id): + return f'../uploads/{upload_id}/archive/{entry_id}' + + +def get_entry_id(upload_id, filename): + from nomad.utils import hash + + return hash(upload_id, filename) + +def get_hash_ref(upload_id, filename): + return f'{get_reference(upload_id, get_entry_id(upload_id, filename))}#data' + +def dict_nan_equal(dict1, dict2): + """ + Compare two dictionaries with NaN values. + """ + if set(dict1.keys()) != set(dict2.keys()): + return False + for key in dict1: + if not nan_equal(dict1[key], dict2[key]): + return False + return True + +def nan_equal(a, b): + """ + Compare two values with NaN values. + """ + if isinstance(a, float) and isinstance(b, float): + return a == b or (math.isnan(a) and math.isnan(b)) + elif isinstance(a, dict) and isinstance(b, dict): + return dict_nan_equal(a, b) + elif isinstance(a, list) and isinstance(b, list): + return list_nan_equal(a, b) + else: + return a == b + +def list_nan_equal(list1, list2): + """ + Compare two lists with NaN values. + """ + if len(list1) != len(list2): + return False + for a, b in zip(list1, list2): + if not nan_equal(a, b): + return False + return True + + +def create_archive( + entry_dict, context, filename, file_type, logger, *, overwrite: bool = False +): + file_exists = context.raw_path_exists(filename) + dicts_are_equal = None + if isinstance(context, ClientContext): + return None + if file_exists: + with context.raw_file(filename, 'r') as file: + existing_dict = yaml.safe_load(file) + dicts_are_equal = dict_nan_equal(existing_dict, entry_dict) + if not file_exists or overwrite or dicts_are_equal: + with context.raw_file(filename, 'w') as newfile: + if file_type == 'json': + json.dump(entry_dict, newfile) + elif file_type == 'yaml': + yaml.dump(entry_dict, newfile) + context.upload.process_updated_raw_file(filename, allow_modify=True) + elif file_exists and not overwrite and not dicts_are_equal: + logger.error( + f'{filename} archive file already exists. ' + f'You are trying to overwrite it with a different content. ' + f'To do so, remove the existing archive and click reprocess again.' + ) + return get_hash_ref(context.upload_id, filename) + +def fill_quantity(dataframe, column_header, read_unit=None): + """ + Fetches a value from a DataFrame and optionally converts it to a specified unit. + """ + try: + if not dataframe[column_header].empty: + value = dataframe[column_header] + else: + value = None + except (KeyError, IndexError): + value = None + + pint_value = None + if read_unit is not None: + try: + if value is not None: + pint_value = ureg.Quantity( + value.to_numpy(), + ureg(read_unit), + ) + + else: + value = None + except ValueError: + if hasattr(value, 'empty') and not value.empty(): + pint_value = ureg.Quantity( + value.to_numpy(), + ureg(read_unit), + ) + elif value == '': + pint_value = None + + return pint_value if read_unit is not None else value \ No newline at end of file