From 574a4f23c9135e076ff198fece3cc6079f074626 Mon Sep 17 00:00:00 2001 From: Timur Bazhirov Date: Tue, 12 Aug 2025 01:44:22 -0700 Subject: [PATCH] chore: remove exabyteml --- express/parsers/exabyteml.py | 22 ---- express/parsers/mixins/exabyteml.py | 30 ----- express/properties/workflow.py | 169 +--------------------------- express/settings.py | 2 - 4 files changed, 2 insertions(+), 221 deletions(-) delete mode 100644 express/parsers/exabyteml.py delete mode 100644 express/parsers/mixins/exabyteml.py diff --git a/express/parsers/exabyteml.py b/express/parsers/exabyteml.py deleted file mode 100644 index 31374c35..00000000 --- a/express/parsers/exabyteml.py +++ /dev/null @@ -1,22 +0,0 @@ -from express.parsers import BaseParser -from express.parsers.mixins.exabyteml import ExabyteMLDataMixin - - -class ExabyteMLParser(BaseParser, ExabyteMLDataMixin): - """ - Exabyte ML parser class. - """ - - def __init__(self, *args, **kwargs): - super(ExabyteMLParser, self).__init__(*args, **kwargs) - self.model = kwargs.get("model") - self.targets = kwargs.get("targets") - self.features = kwargs.get("features") - self.predicted_properties = self.kwargs.get("predicted_properties") - self.scaling_params_per_feature = kwargs.get("scaling_params_per_feature") - - def band_gaps_direct(self): - return next((i["value"] for i in self.predicted_properties if i["name"] == "band_gaps:direct")) - - def band_gaps_indirect(self): - return next((i["value"] for i in self.predicted_properties if i["name"] == "band_gaps:indirect")) diff --git a/express/parsers/mixins/exabyteml.py b/express/parsers/mixins/exabyteml.py deleted file mode 100644 index 7d529112..00000000 --- a/express/parsers/mixins/exabyteml.py +++ /dev/null @@ -1,30 +0,0 @@ -from abc import abstractmethod - - -class ExabyteMLDataMixin(object): - """ - Defines Exabyte ML interfaces. - - Note: - THE FORMAT OF DATA STRUCTURE RETURNED MUST BE PRESERVED IN IMPLEMENTATION. - """ - - @abstractmethod - def data_per_property(self): - pass - - @abstractmethod - def precision_per_property(self): - pass - - @abstractmethod - def scaling_params_per_feature(self): - pass - - @abstractmethod - def band_gaps_direct(self): - pass - - @abstractmethod - def band_gaps_indirect(self): - pass diff --git a/express/properties/workflow.py b/express/properties/workflow.py index 85f4f84d..6fb2e7da 100644 --- a/express/properties/workflow.py +++ b/express/properties/workflow.py @@ -51,7 +51,7 @@ def _serialize(self) -> dict: class PyMLTrainAndPredictWorkflow(WorkflowProperty): """ - Next generation of ExabyteML. We expect workflows to have a format as follows: + We expect workflows to have a format as follows: Workflow_Head_Subworkflow - Contains various units which prepare an ML job. For example, we may have the following units present. @@ -203,7 +203,7 @@ def is_using_dataset(self): @property def workflow_specific_config(self) -> dict: """ - Generates the specific config for the new implementation of ExabyteML. The remainder of the config is + Generates the specific config for the workflow. The remainder of the config is generated inside of the parent Workflow class. Returns: @@ -224,168 +224,3 @@ def workflow_specific_config(self) -> dict: } return specific_config - - -class ExabyteMLPredictWorkflow(WorkflowProperty): - """ - Legacy implementation of Exabyte ML's predict Workflow property class. - """ - - def __init__(self, name, parser, *args, **kwargs): - super().__init__(name, parser, *args, **kwargs) - - self.model = self.parser.model - self.targets = self.parser.targets - self.features = self.parser.features - self.scaling_params_per_feature = self.parser.scaling_params_per_feature - - @property - def workflow_specific_config(self) -> dict: - """ - Generates the specific config for a legacy ExabyteML workflow. The remainder of the config is generated - inside of the parent Worfklow class. - - Returns: - dict - """ - specific_config = { - "units": [ - { - "_id": "LCthJ6E2QabYCZqf4", - "name": "ml_predict_subworkflow", - "type": "subworkflow", - "flowchartId": "subworkflow", - "head": True, - } - ], - "subworkflows": [ - { - "name": "ml_predict_subworkflow", - "isDraft": True, - "application": { - "version": "0.2.0", - "summary": "Exabyte Machine Learning Engine", - "name": "exabyteml", - "shortName": "ml", - "build": "Default", - }, - "units": [ - { - "status": "idle", - "statusTrack": [], - "head": True, - "flowchartId": "io", - "name": "input", - "application": { - "version": "0.2.0", - "summary": "Exabyte Machine Learning Engine", - "name": "exabyteml", - "shortName": "ml", - "build": "Default", - }, - "results": [], - "next": "data_transformation_manipulation", - "source": "api", - "postProcessors": [], - "preProcessors": [], - "subtype": "dataFrame", - "input": [ - { - "endpoint": "dataframe", - "endpoint_options": { - "headers": {}, - "data": {"features": self.features, "ids": [], "targets": self.targets}, - "method": "POST", - "params": {}, - "jobId": "", - }, - } - ], - "type": "io", - "monitors": [], - }, - { - "status": "idle", - "statusTrack": [], - "head": False, - "flowchartId": "data_transformation_manipulation", - "name": "clean data", - "monitors": [], - "results": [], - "next": "data_transformation_scale_and_reduce", - "application": { - "version": "0.2.0", - "summary": "Exabyte Machine Learning Engine", - "name": "exabyteml", - "shortName": "ml", - "build": "Default", - }, - "postProcessors": [], - "preProcessors": [], - "operationType": "manipulation", - "operation": "data_transformation", - "type": "processing", - "inputData": { - "cleanMissingData": True, - "replaceNoneValuesWith": 0, - "removeDuplicateRows": True, - }, - }, - { - "status": "idle", - "statusTrack": [], - "head": False, - "flowchartId": "data_transformation_scale_and_reduce", - "name": "scale and reduce", - "monitors": [], - "results": [], - "next": "score", - "application": { - "version": "0.2.0", - "build": "Default", - "name": "exabyteml", - "shortName": "ml", - "summary": "Exabyte Machine Learning Engine", - }, - "postProcessors": [], - "preProcessors": [], - "operationType": "scale_and_reduce", - "operation": "data_transformation", - "type": "processing", - "inputData": { - "scaler": "standard_scaler", - "perFeature": self.scaling_params_per_feature, - }, - }, - { - "status": "idle", - "statusTrack": [], - "executable": {"name": "score"}, - "flowchartId": "score", - "name": "score", - "head": False, - "results": [{"name": "predicted_properties"}], - "application": { - "version": "0.2.0", - "build": "Default", - "name": "exabyteml", - "shortName": "ml", - "summary": "Exabyte Machine Learning Engine", - }, - "postProcessors": [], - "preProcessors": [], - "context": {}, - "input": [], - "flavor": {"name": "score"}, - "type": "execution", - "monitors": [{"name": "standard_output"}], - }, - ], - "model": self.model, - "_id": "LCthJ6E2QabYCZqf4", - "properties": self.targets, - } - ], - "properties": self.targets, - } - return specific_config diff --git a/express/settings.py b/express/settings.py index e8e20204..6912ecfa 100644 --- a/express/settings.py +++ b/express/settings.py @@ -29,7 +29,6 @@ }, "material": {"reference": "express.properties.material.Material"}, "symmetry": {"reference": "express.properties.non_scalar.symmetry.Symmetry"}, - "workflow:ml_predict": {"reference": "express.properties.workflow.ExabyteMLPredictWorkflow"}, "workflow:pyml_predict": {"reference": "express.properties.workflow.PyMLTrainAndPredictWorkflow"}, "phonon_dos": {"reference": "express.properties.non_scalar.two_dimensional_plot.phonon_dos.PhononDOS"}, "phonon_dispersions": { @@ -70,7 +69,6 @@ "vasp": "express.parsers.apps.vasp.parser.VaspParser", "nwchem": "express.parsers.apps.nwchem.parser.NwchemParser", "structure": "express.parsers.structure.StructureParser", - "exabyteml": "express.parsers.exabyteml.ExabyteMLParser", } # Used to round to zero by default