Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement prediction service #276

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
b73b28b
feat: Add initial prediction service data pipeline to learner - imple…
lrahmani Feb 9, 2023
b90bae7
Adding draft of generic prediction service. (#278)
hanwag Apr 3, 2023
9534624
Bug-Fix: linting.
hanwag Apr 3, 2023
b65df16
Bug-Fix: linting.
hanwag Apr 3, 2023
d794d4e
Bug-Fix: failing pytests.
hanwag Apr 3, 2023
f89a3a8
Bug-Fix: data compatibilities.
hanwag Apr 3, 2023
e0fab4a
Bug-Fix: typo pred compatibilities.
hanwag Apr 3, 2023
3765c5c
Bug-Fix: failing tests and linting.
hanwag Apr 4, 2023
c617ee2
tmp passing failing test.
hanwag Apr 5, 2023
7ec9711
Bug-Fix: Prediction optional.
hanwag Apr 5, 2023
021752a
Bug-Fix: linting and filepath.
hanwag Apr 5, 2023
9eb9710
Bug-Fix: path update.
hanwag Apr 5, 2023
4e3a371
bug-fix: folder location and linting error.
hanwag Apr 5, 2023
b3004e1
bug-fix: trailing white space.
hanwag Apr 5, 2023
377c21e
Bug-Fix: type checking errors.
hanwag Apr 5, 2023
0e9fe4c
Bug-Fix: style checks.
hanwag Apr 5, 2023
299c303
Merge branch 'master' into feature/prediction
lrahmani Apr 5, 2023
8ea23ac
Bug-Fix: increase time out and failing env1 test.
hanwag Apr 6, 2023
27dba67
Bug-Fix: pred data loader.
hanwag Apr 6, 2023
3ae322e
Added debug messages.
hanwag Apr 6, 2023
bd94ba9
Bug-Fix: failing scania test.
hanwag Apr 6, 2023
fb01378
Bug-Fix: timeout, env17 removed, fixed.
hanwag Apr 6, 2023
a8ec8e4
Bug-Fix: keras fraud test.
hanwag Apr 6, 2023
6587465
Bug-Fix: test type annotations.
hanwag Apr 6, 2023
07143c5
Bug-Fix: added type hints.
hanwag Apr 6, 2023
ce38a66
Implement Prediction dataloader for Scania learner (#280)
hanwag Apr 18, 2023
0179864
Multiple metrics (#283)
hanwag May 23, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
branches: [ master, feature/prediction ]

jobs:
code_quality_checks:
Expand Down Expand Up @@ -40,11 +40,11 @@ jobs:

continue-on-error: False
runs-on: self-hosted
timeout-minutes: 30
timeout-minutes: 60

strategy:
matrix:
python-version: [3.7, 3.8]
python-version: [3.7]
env:
GITHUB_ACTION: true

Expand Down
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ ENABLED:

[IMPORTS]
ignored-modules=click,google,grpc,matplotlib,numpy,opacus,onnx,onnxmltools,pandas,PIL,prometheus_client,pydantic,pytest,
tensorflow,tensorflow_core,tensorflow_datasets,tensorflow_privacy,torch,torchsummary,torchvision,typing_extensions,
tensorflow,tensorflow_addons,tensorflow_core,tensorflow_datasets,tensorflow_privacy,torch,torchsummary,torchvision,typing_extensions,
scipy,sklearn,xgboost

[TYPECHECK]
Expand Down
52 changes: 25 additions & 27 deletions colearn/ml_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,7 @@
from typing import Any, Optional

import onnx
import onnxmltools
import sklearn
import tensorflow as tf
import torch
from pydantic import BaseModel
from tensorflow import keras

model_classes_keras = (tf.keras.Model, keras.Model, tf.estimator.Estimator)
model_classes_scipy = (torch.nn.Module)
model_classes_sklearn = (sklearn.base.ClassifierMixin)


def convert_model_to_onnx(model: Any):
"""
Helper function to convert a ML model to onnx format
"""
if isinstance(model, model_classes_keras):
return onnxmltools.convert_keras(model)
if isinstance(model, model_classes_sklearn):
return onnxmltools.convert_sklearn(model)
if 'xgboost' in model.__repr__():
return onnxmltools.convert_sklearn(model)
if isinstance(model, model_classes_scipy):
raise Exception("Pytorch models not yet supported to onnx")
else:
raise Exception("Attempt to convert unsupported model to onnx: {model}")


class DiffPrivBudget(BaseModel):
Expand Down Expand Up @@ -78,8 +53,9 @@ class DiffPrivConfig(BaseModel):

class ProposedWeights(BaseModel):
weights: Weights
vote_score: float
test_score: float
vote_score: dict
test_score: dict
criterion: str
vote: Optional[bool]


Expand All @@ -94,6 +70,17 @@ class ColearnModel(BaseModel):
model: Optional[Any]


class PredictionRequest(BaseModel):
name: str
input_data: Any
pred_dataloader_key: Optional[Any]


class Prediction(BaseModel):
name: str
prediction_data: Any


def deser_model(model: Any) -> onnx.ModelProto:
"""
Helper function to recover a onnx model from its deserialized form
Expand Down Expand Up @@ -136,3 +123,14 @@ def mli_get_current_model(self) -> ColearnModel:
Returns the current model
"""
pass

@abc.abstractmethod
def mli_make_prediction(self, request: PredictionRequest) -> Prediction:
"""
Make prediction using the current model.
Does not change the current weights of the model.

:param request: data to get the prediction for
:returns: the prediction
"""
pass
44 changes: 44 additions & 0 deletions colearn/onnxutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# ------------------------------------------------------------------------------
#
# Copyright 2021 Fetch.AI Limited
#
# Licensed under the Creative Commons Attribution-NonCommercial International
# License, Version 4.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://creativecommons.org/licenses/by-nc/4.0/legalcode
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ------------------------------------------------------------------------------
from typing import Any

import onnxmltools
import sklearn
import tensorflow as tf
import torch
from tensorflow import keras

model_classes_keras = (tf.keras.Model, keras.Model, tf.estimator.Estimator)
model_classes_scipy = (torch.nn.Module)
model_classes_sklearn = (sklearn.base.ClassifierMixin)


def convert_model_to_onnx(model: Any):
"""
Helper function to convert a ML model to onnx format
"""
if isinstance(model, model_classes_keras):
return onnxmltools.convert_keras(model)
if isinstance(model, model_classes_sklearn):
return onnxmltools.convert_sklearn(model)
if 'xgboost' in model.__repr__():
return onnxmltools.convert_sklearn(model)
if isinstance(model, model_classes_scipy):
raise Exception("Pytorch models not yet supported to onnx")
else:
raise Exception("Attempt to convert unsupported model to onnx: {model}")
13 changes: 7 additions & 6 deletions colearn/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def initial_result(learners: Sequence[MachineLearningInterface]):
result = Result()
for learner in learners:
proposed_weights = learner.mli_test_weights(learner.mli_get_current_weights()) # type: ProposedWeights
result.test_scores.append(proposed_weights.test_score)
result.vote_scores.append(proposed_weights.vote_score)
result.test_scores.append(proposed_weights.test_score[proposed_weights.criterion])
result.vote_scores.append(proposed_weights.vote_score[proposed_weights.criterion])
result.votes.append(True)
return result

Expand All @@ -48,9 +48,10 @@ def collective_learning_round(learners: Sequence[MachineLearningInterface], vote
vote_threshold)
result.vote = vote
result.votes = [pw.vote for pw in proposed_weights_list]
result.vote_scores = [pw.vote_score for pw in
# TODO does this make sense?
result.vote_scores = [pw.vote_score[pw.criterion] for pw in
proposed_weights_list]
result.test_scores = [pw.test_score for pw in proposed_weights_list]
result.test_scores = [pw.test_score[pw.criterion] for pw in proposed_weights_list]
result.training_summaries = [
l.mli_get_current_weights().training_summary
for l in learners
Expand All @@ -73,7 +74,7 @@ def individual_training_round(learners: Sequence[MachineLearningInterface], roun
learner.mli_accept_weights(weights)

result.votes.append(True)
result.vote_scores.append(proposed_weights.vote_score)
result.test_scores.append(proposed_weights.test_score)
result.vote_scores.append(proposed_weights.vote_score[proposed_weights.criterion])
result.test_scores.append(proposed_weights.test_score[proposed_weights.criterion])

return result
11 changes: 9 additions & 2 deletions colearn_examples/grpc/mlifactory_grpc_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,18 @@ def get_models(self) -> Dict[str, Dict[str, Any]]:
vote_batches=10,
learning_rate=0.001)}

def get_compatibilities(self) -> Dict[str, Set[str]]:
def get_data_compatibilities(self) -> Dict[str, Set[str]]:
return {model_tag: {dataloader_tag}}

def get_prediction_dataloaders(self) -> Dict[str, Dict[str, Any]]:
raise NotImplementedError

def get_pred_compatibilities(self) -> Dict[str, Set[str]]:
raise NotImplementedError

def get_mli(self, model_name: str, model_params: str, dataloader_name: str,
dataset_params: str) -> MachineLearningInterface:
dataset_params: str, prediction_dataloader_name: str = None,
prediction_dataset_params: str = None) -> MachineLearningInterface:
dataloader_kwargs = json.loads(dataset_params)
data_loaders = prepare_data_loaders(**dataloader_kwargs)

Expand Down
21 changes: 14 additions & 7 deletions colearn_examples/grpc/mnist_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from colearn_keras.keras_mnist import split_to_folders # pylint: disable=C0413 # noqa: F401
from tensorflow.python.data.ops.dataset_ops import PrefetchDataset # pylint: disable=C0413 # noqa: F401
import tensorflow as tf # pylint: disable=C0413 # noqa: F401
import tensorflow_addons as tfa # pylint: disable=C0413 # noqa: F401

dataloader_tag = "KERAS_MNIST_EXAMPLE_DATALOADER"

Expand All @@ -63,6 +64,9 @@ def prepare_data_loaders(location: str,
images = pickle.load(open(Path(data_folder) / image_fl, "rb"))
labels = pickle.load(open(Path(data_folder) / label_fl, "rb"))

# OHE for broader metric usage
labels = tf.keras.utils.to_categorical(labels, 10)

n_cases = int(train_ratio * len(images))
n_vote_cases = int(vote_ratio * len(images))

Expand All @@ -87,7 +91,7 @@ def prepare_data_loaders(location: str,
@FactoryRegistry.register_model_architecture(model_tag, [dataloader_tag])
def prepare_learner(data_loaders: Tuple[PrefetchDataset, PrefetchDataset, PrefetchDataset],
steps_per_epoch: int = 100,
vote_batches: int = 10,
vote_batches: int = 1, # needs to stay one for correct test calculation
learning_rate: float = 0.001
) -> KerasLearner:
"""
Expand All @@ -100,8 +104,11 @@ def prepare_learner(data_loaders: Tuple[PrefetchDataset, PrefetchDataset, Prefet
"""

# 2D Convolutional model for image recognition
loss = "sparse_categorical_crossentropy"
loss = "categorical_crossentropy"
optimizer = tf.keras.optimizers.Adam
n_classes = 10
metric_list = ["accuracy", tf.keras.metrics.AUC(),
tfa.metrics.F1Score(average="macro", num_classes=n_classes)]

input_img = tf.keras.Input(shape=(28, 28, 1), name="Input")
x = tf.keras.layers.Conv2D(32, (5, 5), activation="relu", padding="same", name="Conv1_1")(input_img)
Expand All @@ -112,19 +119,19 @@ def prepare_learner(data_loaders: Tuple[PrefetchDataset, PrefetchDataset, Prefet
x = tf.keras.layers.MaxPooling2D((2, 2), name="pool3")(x)
x = tf.keras.layers.Flatten(name="flatten")(x)
x = tf.keras.layers.Dense(64, activation="relu", name="fc1")(x)
x = tf.keras.layers.Dense(10, activation="softmax", name="fc2")(x)
x = tf.keras.layers.Dense(n_classes, activation="softmax", name="fc2")(x)
model = tf.keras.Model(inputs=input_img, outputs=x)

opt = optimizer(lr=learning_rate)
model.compile(loss=loss, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()], optimizer=opt)
model.compile(loss=loss, metrics=metric_list, optimizer=opt)

learner = KerasLearner(
model=model,
train_loader=data_loaders[0],
vote_loader=data_loaders[1],
test_loader=data_loaders[2],
criterion="sparse_categorical_accuracy",
minimise_criterion=False,
criterion="loss",
minimise_criterion=True,
model_fit_kwargs={"steps_per_epoch": steps_per_epoch},
model_evaluate_kwargs={"steps": vote_batches},
)
Expand Down Expand Up @@ -167,7 +174,7 @@ def prepare_learner(data_loaders: Tuple[PrefetchDataset, PrefetchDataset, Prefet
results = Results()
results.data.append(initial_result(all_learner_models))

plot = ColearnPlot(score_name="accuracy")
plot = ColearnPlot(score_name="loss")

testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", "")) # for testing
n_rounds = 10 if not testing_mode else 1
Expand Down
2 changes: 0 additions & 2 deletions colearn_examples/ml_interface/keras_fraud.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@

"""
Fraud training example using Tensorflow Keras

Used dataset:
- Fraud, download from kaggle: https://www.kaggle.com/c/ieee-fraud-detection

What script does:
- Sets up the Keras model and some configuration parameters
- Randomly splits the dataset between multiple learners
Expand Down
16 changes: 11 additions & 5 deletions colearn_examples/ml_interface/mli_fraud.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import sklearn
from sklearn.linear_model import SGDClassifier

from colearn.ml_interface import MachineLearningInterface, Weights, ProposedWeights, ColearnModel, ModelFormat, convert_model_to_onnx
from colearn.ml_interface import MachineLearningInterface, Prediction, PredictionRequest, Weights, ProposedWeights, ColearnModel, ModelFormat
from colearn.onnxutils import convert_model_to_onnx
from colearn.training import initial_result, collective_learning_round, set_equal_weights
from colearn.utils.plot import ColearnPlot
from colearn.utils.results import Results, print_results
Expand Down Expand Up @@ -87,18 +88,20 @@ def mli_propose_weights(self) -> Weights:
def mli_test_weights(self, weights: Weights) -> ProposedWeights:
current_weights = self.mli_get_current_weights()
self.set_weights(weights)
criterion = "mean_accuracy"

vote_score = self.test(self.vote_data, self.vote_labels)

test_score = self.test(self.test_data, self.test_labels)

vote = self.vote_score <= vote_score
vote = self.vote_score[criterion] <= vote_score[criterion]

self.set_weights(current_weights)
return ProposedWeights(weights=weights,
vote_score=vote_score,
test_score=test_score,
vote=vote
vote=vote,
criterion=criterion
)

def mli_accept_weights(self, weights: Weights):
Expand Down Expand Up @@ -126,9 +129,12 @@ def set_weights(self, weights: Weights):

def test(self, data, labels):
try:
return self.model.score(data, labels)
return {"mean_accuracy": self.model.score(data, labels)}
except sklearn.exceptions.NotFittedError:
return 0
return {"mean_accuracy": 0}

def mli_make_prediction(self, request: PredictionRequest) -> Prediction:
raise NotImplementedError()


if __name__ == "__main__":
Expand Down
15 changes: 11 additions & 4 deletions colearn_examples/ml_interface/mli_random_forest_iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier

from colearn.ml_interface import MachineLearningInterface, Weights, ProposedWeights, ColearnModel, ModelFormat, convert_model_to_onnx
from colearn.ml_interface import MachineLearningInterface, Prediction, PredictionRequest, Weights, ProposedWeights, ColearnModel, ModelFormat
from colearn.onnxutils import convert_model_to_onnx
from colearn.training import initial_result, collective_learning_round
from colearn.utils.plot import ColearnPlot
from colearn.utils.results import Results, print_results
Expand Down Expand Up @@ -76,18 +77,20 @@ def mli_propose_weights(self) -> Weights:
def mli_test_weights(self, weights: Weights) -> ProposedWeights:
current_weights = self.mli_get_current_weights()
self.set_weights(weights)
criterion = "mean_accuracy"

vote_score = self.test(self.vote_data, self.vote_labels)

test_score = self.test(self.test_data, self.test_labels)

vote = self.vote_score <= vote_score
vote = self.vote_score[criterion] <= vote_score[criterion]

self.set_weights(current_weights)
return ProposedWeights(weights=weights,
vote_score=vote_score,
test_score=test_score,
vote=vote
vote=vote,
criterion=criterion
)

def mli_accept_weights(self, weights: Weights):
Expand All @@ -112,7 +115,11 @@ def set_weights(self, weights: Weights):
self.model = pickle.loads(weights.weights)

def test(self, data_array, labels_array):
return self.model.score(data_array, labels_array)
score = {"mean_accuracy": self.model.score(data_array, labels_array)}
return score

def mli_make_prediction(self, request: PredictionRequest) -> Prediction:
raise NotImplementedError()


train_fraction = 0.9
Expand Down
2 changes: 1 addition & 1 deletion colearn_examples/ml_interface/run_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
args = parser.parse_args()

model_name = args.model
dataloader_set = mli_fac.get_compatibilities()[model_name]
dataloader_set = mli_fac.get_data_compatibilities()[model_name]
dataloader_name = next(iter(dataloader_set)) # use the first dataloader

n_learners = args.n_learners
Expand Down
Loading