From 574a4f23c9135e076ff198fece3cc6079f074626 Mon Sep 17 00:00:00 2001
From: Timur Bazhirov <timur@exabyte.io>
Date: Tue, 12 Aug 2025 01:44:22 -0700
Subject: [PATCH] chore: remove exabyteml

---
 express/parsers/exabyteml.py        |  22 ----
 express/parsers/mixins/exabyteml.py |  30 -----
 express/properties/workflow.py      | 169 +---------------------------
 express/settings.py                 |   2 -
 4 files changed, 2 insertions(+), 221 deletions(-)
 delete mode 100644 express/parsers/exabyteml.py
 delete mode 100644 express/parsers/mixins/exabyteml.py

diff --git a/express/parsers/exabyteml.py b/express/parsers/exabyteml.py
deleted file mode 100644
index 31374c35..00000000
--- a/express/parsers/exabyteml.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from express.parsers import BaseParser
-from express.parsers.mixins.exabyteml import ExabyteMLDataMixin
-
-
-class ExabyteMLParser(BaseParser, ExabyteMLDataMixin):
-    """
-    Exabyte ML parser class.
-    """
-
-    def __init__(self, *args, **kwargs):
-        super(ExabyteMLParser, self).__init__(*args, **kwargs)
-        self.model = kwargs.get("model")
-        self.targets = kwargs.get("targets")
-        self.features = kwargs.get("features")
-        self.predicted_properties = self.kwargs.get("predicted_properties")
-        self.scaling_params_per_feature = kwargs.get("scaling_params_per_feature")
-
-    def band_gaps_direct(self):
-        return next((i["value"] for i in self.predicted_properties if i["name"] == "band_gaps:direct"))
-
-    def band_gaps_indirect(self):
-        return next((i["value"] for i in self.predicted_properties if i["name"] == "band_gaps:indirect"))
diff --git a/express/parsers/mixins/exabyteml.py b/express/parsers/mixins/exabyteml.py
deleted file mode 100644
index 7d529112..00000000
--- a/express/parsers/mixins/exabyteml.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from abc import abstractmethod
-
-
-class ExabyteMLDataMixin(object):
-    """
-    Defines Exabyte ML interfaces.
-
-    Note:
-        THE FORMAT OF DATA STRUCTURE RETURNED MUST BE PRESERVED IN IMPLEMENTATION.
-    """
-
-    @abstractmethod
-    def data_per_property(self):
-        pass
-
-    @abstractmethod
-    def precision_per_property(self):
-        pass
-
-    @abstractmethod
-    def scaling_params_per_feature(self):
-        pass
-
-    @abstractmethod
-    def band_gaps_direct(self):
-        pass
-
-    @abstractmethod
-    def band_gaps_indirect(self):
-        pass
diff --git a/express/properties/workflow.py b/express/properties/workflow.py
index 85f4f84d..6fb2e7da 100644
--- a/express/properties/workflow.py
+++ b/express/properties/workflow.py
@@ -51,7 +51,7 @@ def _serialize(self) -> dict:
 
 class PyMLTrainAndPredictWorkflow(WorkflowProperty):
     """
-    Next generation of ExabyteML. We expect workflows to have a format as follows:
+    We expect workflows to have a format as follows:
 
     Workflow_Head_Subworkflow - Contains various units which prepare an ML job. For example, we may have the following
     units present.
@@ -203,7 +203,7 @@ def is_using_dataset(self):
     @property
     def workflow_specific_config(self) -> dict:
         """
-        Generates the specific config for the new implementation of ExabyteML. The remainder of the config is
+        Generates the specific config for the workflow. The remainder of the config is
         generated inside of the parent Workflow class.
 
         Returns:
@@ -224,168 +224,3 @@ def workflow_specific_config(self) -> dict:
         }
 
         return specific_config
-
-
-class ExabyteMLPredictWorkflow(WorkflowProperty):
-    """
-    Legacy implementation of Exabyte ML's predict Workflow property class.
-    """
-
-    def __init__(self, name, parser, *args, **kwargs):
-        super().__init__(name, parser, *args, **kwargs)
-
-        self.model = self.parser.model
-        self.targets = self.parser.targets
-        self.features = self.parser.features
-        self.scaling_params_per_feature = self.parser.scaling_params_per_feature
-
-    @property
-    def workflow_specific_config(self) -> dict:
-        """
-        Generates the specific config for a legacy ExabyteML workflow. The remainder of the config is generated
-        inside of the parent Worfklow class.
-
-        Returns:
-             dict
-        """
-        specific_config = {
-            "units": [
-                {
-                    "_id": "LCthJ6E2QabYCZqf4",
-                    "name": "ml_predict_subworkflow",
-                    "type": "subworkflow",
-                    "flowchartId": "subworkflow",
-                    "head": True,
-                }
-            ],
-            "subworkflows": [
-                {
-                    "name": "ml_predict_subworkflow",
-                    "isDraft": True,
-                    "application": {
-                        "version": "0.2.0",
-                        "summary": "Exabyte Machine Learning Engine",
-                        "name": "exabyteml",
-                        "shortName": "ml",
-                        "build": "Default",
-                    },
-                    "units": [
-                        {
-                            "status": "idle",
-                            "statusTrack": [],
-                            "head": True,
-                            "flowchartId": "io",
-                            "name": "input",
-                            "application": {
-                                "version": "0.2.0",
-                                "summary": "Exabyte Machine Learning Engine",
-                                "name": "exabyteml",
-                                "shortName": "ml",
-                                "build": "Default",
-                            },
-                            "results": [],
-                            "next": "data_transformation_manipulation",
-                            "source": "api",
-                            "postProcessors": [],
-                            "preProcessors": [],
-                            "subtype": "dataFrame",
-                            "input": [
-                                {
-                                    "endpoint": "dataframe",
-                                    "endpoint_options": {
-                                        "headers": {},
-                                        "data": {"features": self.features, "ids": [], "targets": self.targets},
-                                        "method": "POST",
-                                        "params": {},
-                                        "jobId": "",
-                                    },
-                                }
-                            ],
-                            "type": "io",
-                            "monitors": [],
-                        },
-                        {
-                            "status": "idle",
-                            "statusTrack": [],
-                            "head": False,
-                            "flowchartId": "data_transformation_manipulation",
-                            "name": "clean data",
-                            "monitors": [],
-                            "results": [],
-                            "next": "data_transformation_scale_and_reduce",
-                            "application": {
-                                "version": "0.2.0",
-                                "summary": "Exabyte Machine Learning Engine",
-                                "name": "exabyteml",
-                                "shortName": "ml",
-                                "build": "Default",
-                            },
-                            "postProcessors": [],
-                            "preProcessors": [],
-                            "operationType": "manipulation",
-                            "operation": "data_transformation",
-                            "type": "processing",
-                            "inputData": {
-                                "cleanMissingData": True,
-                                "replaceNoneValuesWith": 0,
-                                "removeDuplicateRows": True,
-                            },
-                        },
-                        {
-                            "status": "idle",
-                            "statusTrack": [],
-                            "head": False,
-                            "flowchartId": "data_transformation_scale_and_reduce",
-                            "name": "scale and reduce",
-                            "monitors": [],
-                            "results": [],
-                            "next": "score",
-                            "application": {
-                                "version": "0.2.0",
-                                "build": "Default",
-                                "name": "exabyteml",
-                                "shortName": "ml",
-                                "summary": "Exabyte Machine Learning Engine",
-                            },
-                            "postProcessors": [],
-                            "preProcessors": [],
-                            "operationType": "scale_and_reduce",
-                            "operation": "data_transformation",
-                            "type": "processing",
-                            "inputData": {
-                                "scaler": "standard_scaler",
-                                "perFeature": self.scaling_params_per_feature,
-                            },
-                        },
-                        {
-                            "status": "idle",
-                            "statusTrack": [],
-                            "executable": {"name": "score"},
-                            "flowchartId": "score",
-                            "name": "score",
-                            "head": False,
-                            "results": [{"name": "predicted_properties"}],
-                            "application": {
-                                "version": "0.2.0",
-                                "build": "Default",
-                                "name": "exabyteml",
-                                "shortName": "ml",
-                                "summary": "Exabyte Machine Learning Engine",
-                            },
-                            "postProcessors": [],
-                            "preProcessors": [],
-                            "context": {},
-                            "input": [],
-                            "flavor": {"name": "score"},
-                            "type": "execution",
-                            "monitors": [{"name": "standard_output"}],
-                        },
-                    ],
-                    "model": self.model,
-                    "_id": "LCthJ6E2QabYCZqf4",
-                    "properties": self.targets,
-                }
-            ],
-            "properties": self.targets,
-        }
-        return specific_config
diff --git a/express/settings.py b/express/settings.py
index e8e20204..6912ecfa 100644
--- a/express/settings.py
+++ b/express/settings.py
@@ -29,7 +29,6 @@
     },
     "material": {"reference": "express.properties.material.Material"},
     "symmetry": {"reference": "express.properties.non_scalar.symmetry.Symmetry"},
-    "workflow:ml_predict": {"reference": "express.properties.workflow.ExabyteMLPredictWorkflow"},
     "workflow:pyml_predict": {"reference": "express.properties.workflow.PyMLTrainAndPredictWorkflow"},
     "phonon_dos": {"reference": "express.properties.non_scalar.two_dimensional_plot.phonon_dos.PhononDOS"},
     "phonon_dispersions": {
@@ -70,7 +69,6 @@
     "vasp": "express.parsers.apps.vasp.parser.VaspParser",
     "nwchem": "express.parsers.apps.nwchem.parser.NwchemParser",
     "structure": "express.parsers.structure.StructureParser",
-    "exabyteml": "express.parsers.exabyteml.ExabyteMLParser",
 }
 
 # Used to round to zero by default