fetchai · lrahmani · Feb 9, 2023 · Apr 3, 2023 · Apr 3, 2023 · Apr 3, 2023
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -7,7 +7,7 @@ on:
   push:
     branches: [ master ]
   pull_request:
-    branches: [ master ]
+    branches: [ master, feature/prediction ]
 
 jobs:
   code_quality_checks:
@@ -40,11 +40,11 @@ jobs:
 
     continue-on-error: False
     runs-on: self-hosted
-    timeout-minutes: 30
+    timeout-minutes: 60
 
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.7]
     env:
       GITHUB_ACTION: true
 

diff --git a/.pylintrc b/.pylintrc
@@ -68,7 +68,7 @@ ENABLED:
 
 [IMPORTS]
 ignored-modules=click,google,grpc,matplotlib,numpy,opacus,onnx,onnxmltools,pandas,PIL,prometheus_client,pydantic,pytest,
-    tensorflow,tensorflow_core,tensorflow_datasets,tensorflow_privacy,torch,torchsummary,torchvision,typing_extensions,
+    tensorflow,tensorflow_addons,tensorflow_core,tensorflow_datasets,tensorflow_privacy,torch,torchsummary,torchvision,typing_extensions,
     scipy,sklearn,xgboost
 
 [TYPECHECK]

diff --git a/colearn/ml_interface.py b/colearn/ml_interface.py
@@ -20,32 +20,7 @@
 from typing import Any, Optional
 
 import onnx
-import onnxmltools
-import sklearn
-import tensorflow as tf
-import torch
 from pydantic import BaseModel
-from tensorflow import keras
-
-model_classes_keras = (tf.keras.Model, keras.Model, tf.estimator.Estimator)
-model_classes_scipy = (torch.nn.Module)
-model_classes_sklearn = (sklearn.base.ClassifierMixin)
-
-
-def convert_model_to_onnx(model: Any):
-    """
-    Helper function to convert a ML model to onnx format
-    """
-    if isinstance(model, model_classes_keras):
-        return onnxmltools.convert_keras(model)
-    if isinstance(model, model_classes_sklearn):
-        return onnxmltools.convert_sklearn(model)
-    if 'xgboost' in model.__repr__():
-        return onnxmltools.convert_sklearn(model)
-    if isinstance(model, model_classes_scipy):
-        raise Exception("Pytorch models not yet supported to onnx")
-    else:
-        raise Exception("Attempt to convert unsupported model to onnx: {model}")
 
 
 class DiffPrivBudget(BaseModel):
@@ -78,8 +53,9 @@ class DiffPrivConfig(BaseModel):
 
 class ProposedWeights(BaseModel):
     weights: Weights
-    vote_score: float
-    test_score: float
+    vote_score: dict
+    test_score: dict
+    criterion: str
     vote: Optional[bool]
 
 
@@ -94,6 +70,17 @@ class ColearnModel(BaseModel):
     model: Optional[Any]
 
 
+class PredictionRequest(BaseModel):
+    name: str
+    input_data: Any
+    pred_dataloader_key: Optional[Any]
+
+
+class Prediction(BaseModel):
+    name: str
+    prediction_data: Any
+
+
 def deser_model(model: Any) -> onnx.ModelProto:
     """
     Helper function to recover a onnx model from its deserialized form
@@ -136,3 +123,14 @@ def mli_get_current_model(self) -> ColearnModel:
         Returns the current model
         """
         pass
+
+    @abc.abstractmethod
+    def mli_make_prediction(self, request: PredictionRequest) -> Prediction:
+        """
+        Make prediction using the current model.
+        Does not change the current weights of the model.
+
+        :param request: data to get the prediction for
+        :returns: the prediction
+        """
+        pass
diff --git a/colearn/onnxutils.py b/colearn/onnxutils.py
@@ -0,0 +1,44 @@
+# ------------------------------------------------------------------------------
+#
+#   Copyright 2021 Fetch.AI Limited
+#
+#   Licensed under the Creative Commons Attribution-NonCommercial International
+#   License, Version 4.0 (the "License"); you may not use this file except in
+#   compliance with the License. You may obtain a copy of the License at
+#
+#       http://creativecommons.org/licenses/by-nc/4.0/legalcode
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+# ------------------------------------------------------------------------------
+from typing import Any
+
+import onnxmltools
+import sklearn
+import tensorflow as tf
+import torch
+from tensorflow import keras
+
+model_classes_keras = (tf.keras.Model, keras.Model, tf.estimator.Estimator)
+model_classes_scipy = (torch.nn.Module)
+model_classes_sklearn = (sklearn.base.ClassifierMixin)
+
+
+def convert_model_to_onnx(model: Any):
+    """
+    Helper function to convert a ML model to onnx format
+    """
+    if isinstance(model, model_classes_keras):
+        return onnxmltools.convert_keras(model)
+    if isinstance(model, model_classes_sklearn):
+        return onnxmltools.convert_sklearn(model)
+    if 'xgboost' in model.__repr__():
+        return onnxmltools.convert_sklearn(model)
+    if isinstance(model, model_classes_scipy):
+        raise Exception("Pytorch models not yet supported to onnx")
+    else:
+        raise Exception("Attempt to convert unsupported model to onnx: {model}")
diff --git a/colearn/training.py b/colearn/training.py
@@ -33,8 +33,8 @@ def initial_result(learners: Sequence[MachineLearningInterface]):
     result = Result()
     for learner in learners:
         proposed_weights = learner.mli_test_weights(learner.mli_get_current_weights())  # type: ProposedWeights
-        result.test_scores.append(proposed_weights.test_score)
-        result.vote_scores.append(proposed_weights.vote_score)
+        result.test_scores.append(proposed_weights.test_score[proposed_weights.criterion])
+        result.vote_scores.append(proposed_weights.vote_score[proposed_weights.criterion])
         result.votes.append(True)
     return result
 
@@ -48,9 +48,10 @@ def collective_learning_round(learners: Sequence[MachineLearningInterface], vote
                                                 vote_threshold)
     result.vote = vote
     result.votes = [pw.vote for pw in proposed_weights_list]
-    result.vote_scores = [pw.vote_score for pw in
+    # TODO does this make sense?
+    result.vote_scores = [pw.vote_score[pw.criterion] for pw in
                           proposed_weights_list]
-    result.test_scores = [pw.test_score for pw in proposed_weights_list]
+    result.test_scores = [pw.test_score[pw.criterion] for pw in proposed_weights_list]
     result.training_summaries = [
         l.mli_get_current_weights().training_summary
         for l in learners
@@ -73,7 +74,7 @@ def individual_training_round(learners: Sequence[MachineLearningInterface], roun
         learner.mli_accept_weights(weights)
 
         result.votes.append(True)
-        result.vote_scores.append(proposed_weights.vote_score)
-        result.test_scores.append(proposed_weights.test_score)
+        result.vote_scores.append(proposed_weights.vote_score[proposed_weights.criterion])
+        result.test_scores.append(proposed_weights.test_score[proposed_weights.criterion])
 
     return result
diff --git a/colearn_examples/grpc/mlifactory_grpc_mnist.py b/colearn_examples/grpc/mlifactory_grpc_mnist.py
@@ -140,11 +140,18 @@ def get_models(self) -> Dict[str, Dict[str, Any]]:
                                 vote_batches=10,
                                 learning_rate=0.001)}
 
-    def get_compatibilities(self) -> Dict[str, Set[str]]:
+    def get_data_compatibilities(self) -> Dict[str, Set[str]]:
         return {model_tag: {dataloader_tag}}
 
+    def get_prediction_dataloaders(self) -> Dict[str, Dict[str, Any]]:
+        raise NotImplementedError
+
+    def get_pred_compatibilities(self) -> Dict[str, Set[str]]:
+        raise NotImplementedError
+
     def get_mli(self, model_name: str, model_params: str, dataloader_name: str,
-                dataset_params: str) -> MachineLearningInterface:
+                dataset_params: str, prediction_dataloader_name: str = None,
+                prediction_dataset_params: str = None) -> MachineLearningInterface:
         dataloader_kwargs = json.loads(dataset_params)
         data_loaders = prepare_data_loaders(**dataloader_kwargs)
 

diff --git a/colearn_examples/grpc/mnist_grpc.py b/colearn_examples/grpc/mnist_grpc.py
@@ -37,6 +37,7 @@
 from colearn_keras.keras_mnist import split_to_folders  # pylint: disable=C0413 # noqa: F401
 from tensorflow.python.data.ops.dataset_ops import PrefetchDataset  # pylint: disable=C0413 # noqa: F401
 import tensorflow as tf  # pylint: disable=C0413 # noqa: F401
+import tensorflow_addons as tfa  # pylint: disable=C0413 # noqa: F401
 
 dataloader_tag = "KERAS_MNIST_EXAMPLE_DATALOADER"
 
@@ -63,6 +64,9 @@ def prepare_data_loaders(location: str,
     images = pickle.load(open(Path(data_folder) / image_fl, "rb"))
     labels = pickle.load(open(Path(data_folder) / label_fl, "rb"))
 
+    # OHE for broader metric usage
+    labels = tf.keras.utils.to_categorical(labels, 10)
+
     n_cases = int(train_ratio * len(images))
     n_vote_cases = int(vote_ratio * len(images))
 
@@ -87,7 +91,7 @@ def prepare_data_loaders(location: str,
 @FactoryRegistry.register_model_architecture(model_tag, [dataloader_tag])
 def prepare_learner(data_loaders: Tuple[PrefetchDataset, PrefetchDataset, PrefetchDataset],
                     steps_per_epoch: int = 100,
-                    vote_batches: int = 10,
+                    vote_batches: int = 1,  # needs to stay one for correct test calculation
                     learning_rate: float = 0.001
                     ) -> KerasLearner:
     """
@@ -100,8 +104,11 @@ def prepare_learner(data_loaders: Tuple[PrefetchDataset, PrefetchDataset, Prefet
     """
 
     # 2D Convolutional model for image recognition
-    loss = "sparse_categorical_crossentropy"
+    loss = "categorical_crossentropy"
     optimizer = tf.keras.optimizers.Adam
+    n_classes = 10
+    metric_list = ["accuracy", tf.keras.metrics.AUC(),
+                   tfa.metrics.F1Score(average="macro", num_classes=n_classes)]
 
     input_img = tf.keras.Input(shape=(28, 28, 1), name="Input")
     x = tf.keras.layers.Conv2D(32, (5, 5), activation="relu", padding="same", name="Conv1_1")(input_img)
@@ -112,19 +119,19 @@ def prepare_learner(data_loaders: Tuple[PrefetchDataset, PrefetchDataset, Prefet
     x = tf.keras.layers.MaxPooling2D((2, 2), name="pool3")(x)
     x = tf.keras.layers.Flatten(name="flatten")(x)
     x = tf.keras.layers.Dense(64, activation="relu", name="fc1")(x)
-    x = tf.keras.layers.Dense(10, activation="softmax", name="fc2")(x)
+    x = tf.keras.layers.Dense(n_classes, activation="softmax", name="fc2")(x)
     model = tf.keras.Model(inputs=input_img, outputs=x)
 
     opt = optimizer(lr=learning_rate)
-    model.compile(loss=loss, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()], optimizer=opt)
+    model.compile(loss=loss, metrics=metric_list, optimizer=opt)
 
     learner = KerasLearner(
         model=model,
         train_loader=data_loaders[0],
         vote_loader=data_loaders[1],
         test_loader=data_loaders[2],
-        criterion="sparse_categorical_accuracy",
-        minimise_criterion=False,
+        criterion="loss",
+        minimise_criterion=True,
         model_fit_kwargs={"steps_per_epoch": steps_per_epoch},
         model_evaluate_kwargs={"steps": vote_batches},
     )
@@ -167,7 +174,7 @@ def prepare_learner(data_loaders: Tuple[PrefetchDataset, PrefetchDataset, Prefet
 results = Results()
 results.data.append(initial_result(all_learner_models))
 
-plot = ColearnPlot(score_name="accuracy")
+plot = ColearnPlot(score_name="loss")
 
 testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", ""))  # for testing
 n_rounds = 10 if not testing_mode else 1

diff --git a/colearn_examples/ml_interface/keras_fraud.py b/colearn_examples/ml_interface/keras_fraud.py
@@ -31,10 +31,8 @@
 
 """
 Fraud training example using Tensorflow Keras
-
 Used dataset:
 - Fraud, download from kaggle: https://www.kaggle.com/c/ieee-fraud-detection
-
 What script does:
 - Sets up the Keras model and some configuration parameters
 - Randomly splits the dataset between multiple learners

diff --git a/colearn_examples/ml_interface/mli_fraud.py b/colearn_examples/ml_interface/mli_fraud.py
@@ -24,7 +24,8 @@
 import sklearn
 from sklearn.linear_model import SGDClassifier
 
-from colearn.ml_interface import MachineLearningInterface, Weights, ProposedWeights, ColearnModel, ModelFormat, convert_model_to_onnx
+from colearn.ml_interface import MachineLearningInterface, Prediction, PredictionRequest, Weights, ProposedWeights, ColearnModel, ModelFormat
+from colearn.onnxutils import convert_model_to_onnx
 from colearn.training import initial_result, collective_learning_round, set_equal_weights
 from colearn.utils.plot import ColearnPlot
 from colearn.utils.results import Results, print_results
@@ -87,18 +88,20 @@ def mli_propose_weights(self) -> Weights:
     def mli_test_weights(self, weights: Weights) -> ProposedWeights:
         current_weights = self.mli_get_current_weights()
         self.set_weights(weights)
+        criterion = "mean_accuracy"
 
         vote_score = self.test(self.vote_data, self.vote_labels)
 
         test_score = self.test(self.test_data, self.test_labels)
 
-        vote = self.vote_score <= vote_score
+        vote = self.vote_score[criterion] <= vote_score[criterion]
 
         self.set_weights(current_weights)
         return ProposedWeights(weights=weights,
                                vote_score=vote_score,
                                test_score=test_score,
-                               vote=vote
+                               vote=vote,
+                               criterion=criterion
                                )
 
     def mli_accept_weights(self, weights: Weights):
@@ -126,9 +129,12 @@ def set_weights(self, weights: Weights):
 
     def test(self, data, labels):
         try:
-            return self.model.score(data, labels)
+            return {"mean_accuracy": self.model.score(data, labels)}
         except sklearn.exceptions.NotFittedError:
-            return 0
+            return {"mean_accuracy": 0}
+
+    def mli_make_prediction(self, request: PredictionRequest) -> Prediction:
+        raise NotImplementedError()
 
 
 if __name__ == "__main__":

diff --git a/colearn_examples/ml_interface/mli_random_forest_iris.py b/colearn_examples/ml_interface/mli_random_forest_iris.py
@@ -22,7 +22,8 @@
 from sklearn import datasets
 from sklearn.ensemble import RandomForestClassifier
 
-from colearn.ml_interface import MachineLearningInterface, Weights, ProposedWeights, ColearnModel, ModelFormat, convert_model_to_onnx
+from colearn.ml_interface import MachineLearningInterface, Prediction, PredictionRequest, Weights, ProposedWeights, ColearnModel, ModelFormat
+from colearn.onnxutils import convert_model_to_onnx
 from colearn.training import initial_result, collective_learning_round
 from colearn.utils.plot import ColearnPlot
 from colearn.utils.results import Results, print_results
@@ -76,18 +77,20 @@ def mli_propose_weights(self) -> Weights:
     def mli_test_weights(self, weights: Weights) -> ProposedWeights:
         current_weights = self.mli_get_current_weights()
         self.set_weights(weights)
+        criterion = "mean_accuracy"
 
         vote_score = self.test(self.vote_data, self.vote_labels)
 
         test_score = self.test(self.test_data, self.test_labels)
 
-        vote = self.vote_score <= vote_score
+        vote = self.vote_score[criterion] <= vote_score[criterion]
 
         self.set_weights(current_weights)
         return ProposedWeights(weights=weights,
                                vote_score=vote_score,
                                test_score=test_score,
-                               vote=vote
+                               vote=vote,
+                               criterion=criterion
                                )
 
     def mli_accept_weights(self, weights: Weights):
@@ -112,7 +115,11 @@ def set_weights(self, weights: Weights):
         self.model = pickle.loads(weights.weights)
 
     def test(self, data_array, labels_array):
-        return self.model.score(data_array, labels_array)
+        score = {"mean_accuracy": self.model.score(data_array, labels_array)}
+        return score
+
+    def mli_make_prediction(self, request: PredictionRequest) -> Prediction:
+        raise NotImplementedError()
 
 
 train_fraction = 0.9

diff --git a/colearn_examples/ml_interface/run_demo.py b/colearn_examples/ml_interface/run_demo.py
@@ -72,7 +72,7 @@
 args = parser.parse_args()
 
 model_name = args.model
-dataloader_set = mli_fac.get_compatibilities()[model_name]
+dataloader_set = mli_fac.get_data_compatibilities()[model_name]
 dataloader_name = next(iter(dataloader_set))  # use the first dataloader
 
 n_learners = args.n_learners