Skip to content
This repository has been archived by the owner on Jan 5, 2024. It is now read-only.

Onnx wrapper #59

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions model_tools/activations/onnx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import numpy as np
import onnxruntime
import onnxoptimizer

SUBMODULE_SEPARATOR = '.'

from collections import OrderedDict
from model_tools.activations.core import ActivationsExtractorHelper
from skl2onnx.helpers.onnx_helper import enumerate_model_node_outputs
from skl2onnx.helpers.onnx_helper import select_model_inputs_outputs
from skl2onnx.helpers.onnx_helper import save_onnx_model
import onnx
import torch
from torch.autograd import Variable


class OnnxWrapper:
def __init__(self, model, preprocessing, identifier=None, *args, **kwargs):
"""
:param model: a keras model with a function `preprocess_input`
that will later be called on the loaded numpy image
Comment on lines +20 to +21
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this comment accurate? looks like it pertains to keras rather than onnx?

"""
self._model = model
identifier = identifier or model.name
self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self._extractor = ActivationsExtractorHelper(
identifier=identifier, get_activations=self.get_activations, preprocessing=preprocessing,
*args, **kwargs)
self._extractor.insert_attrs(self)

@property
def identifier(self):
return self._extractor.identifier

@identifier.setter
def identifier(self, value):
self._extractor.identifier = value

def __call__(self, *args, **kwargs): # cannot assign __call__ as attribute due to Python convention
return self._extractor(*args, **kwargs)

def get_activations(self, images, layer_names):

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

empty newline after function definition, remove

# create directory to store ONNX models
import os
if not os.path.exists("ONNX Partial Models"):
os.makedirs("ONNX Partial Models")
Comment on lines +44 to +47
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is very ad-hoc, I would:

  1. move the definition of this directory onto the file level
  2. allow this to be changed from environment variables

i.e. ONNX_MODEL_DIRECTORY = os.getenv('ONNX_MODEL_DIRECTORY', '~/.onnx_models')


# check to make sure model is legitimate
onnx_model = self._model
model_name = self.identifier
Comment on lines +50 to +51
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these re-assignments are unnecessary and imo more confusing than helpful, just use self._model and self.identifier in-place

onnx.checker.check_model(onnx_model)

# get the layer names and last layer
output_names = []
for out in enumerate_model_node_outputs(onnx_model):
output_names.append(out)
last_layer = output_names[-1]

# init activations dict for return:
new_dict = {}

# loop through each layer:
for layer in layer_names:

# handle logits case - get last layer activations
if layer_names[0] == 'logits':
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be if layer == 'logits':?

onnx_layer_output = select_model_inputs_outputs(onnx_model, f'{last_layer}')
else:
onnx_layer_output = select_model_inputs_outputs(onnx_model, f'{layer}')

# optimize and save the ONNX model:
passes = ["extract_constant_to_initializer", "eliminate_unused_initializer"]
optimized_model = onnxoptimizer.optimize(onnx_layer_output, passes)

# some model layer names have / in them, which throw off saving and loading.
if "/" in layer:
parsed_layer = layer.replace("/", "-")
else:
parsed_layer = layer

# save the ONNX model layer
save_onnx_model(optimized_model,
f"ONNX Partial Models/{model_name}_layer_{parsed_layer}_output_optimized.onnx")

# start up ONNX Runtime
sess = onnxruntime.InferenceSession(f"ONNX Partial Models/{model_name}_layer_{parsed_layer}_output_optimized.onnx")

# prepare the input
def to_numpy(tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# process images:
torch_images = [torch.from_numpy(image) for image in images]
var_images = Variable(torch.stack(torch_images))
images_device = var_images.to(self._device)

# compute ONNX Runtime output prediction
ort_inputs = {sess.get_inputs()[0].name: to_numpy(images_device)}
ort_outs = sess.run(None, ort_inputs)
activations = ort_outs[0]
Comment on lines +93 to +101
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do I understand this right that for every single layer, the images are passed through the model up to that layer? Is there not a more efficient way where the images are passed through the model only once and all layer activations captured in this single pass?


# add the layer and its activations
new_dict[layer] = activations

final_result = OrderedDict(new_dict)
return final_result

def __repr__(self):
return repr(self._model)

# def graph(self):
# import networkx as nx
# g = nx.DiGraph()
# for layer in self._model.layers:
# g.add_node(layer.name, object=layer, type=type(layer))
# for outbound_node in layer._outbound_nodes:
# g.add_edge(layer.name, outbound_node.outbound_layer.name)
# return g
Comment on lines +112 to +119
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dead code



# takes any framework from supported list and converted to ONNX
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-> takes a model in any supported framework and converts it to ONNX

def to_onnx(batch_size, in_channel, image_size, model, model_name):

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove empty line

# generate dummy input
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this necessary?

x = torch.randn(batch_size, in_channel, image_size, image_size, requires_grad=True)
torch_out = model(x)

# Export the model to onnx
torch.onnx.export(model, # model being run
x, # model input (or a tuple for multiple inputs)
f"{model_name}.onnx", # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=10, # the ONNX version to export the model to
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a rationale behind this? Why not use the default, i.e. don't pass this argument?

do_constant_folding=True, # whether to execute constant folding for optimization
input_names=['input'], # the model's input names
output_names=['output'], # the model's output names
dynamic_axes={'input': {0: 'batch_size'}, # variable length axes
'output': {0: 'batch_size'}})

onnx_model = onnx.load( f"{model_name}.onnx")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete space after (

onnx.checker.check_model(onnx_model)
ort_session = onnxruntime.InferenceSession( f"{model_name}.onnx")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete space after (


def to_numpy(tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
Comment on lines +145 to +146
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redundantly defined with above


# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
ort_outs = ort_session.run(None, ort_inputs)

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh I see -- this, together with the above run, should probably be its own method


print("Exported model has been tested with ONNXRuntime, and the result looks good!")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use logging rather than print

return onnx_model


def get_layers(onnx_model):
layers = []
for out in enumerate_model_node_outputs(onnx_model):
layers.append(out)
return layers


def get_final_model(framework, batch_size, in_channels, image_size, model, model_name):

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete empty line(s)

# print(batch_size, in_channels, image_size)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dead code


# if model is pytorch, convert to ONNX automatically
if framework == "pytorch":
model.eval()
onnx_model = to_onnx(batch_size, in_channels, image_size, model, model_name)
layers = get_layers(onnx_model)
print("Pytorch to ONNX Conversion successful.")
return onnx_model, layers

# if model is already onnx, return that.
elif framework == "onnx":
onnx_model = model
layers = get_layers(onnx_model)
return onnx_model, layers

# unknown model format. In the future, I hope to add automatic conversion to ONNX for other platforms
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that future should be now for this PR to be merged, otherwise we're not winning anything

else:
raise RuntimeError(f"Given framework {framework} not implemented yet. Please convert your "
f"{framework} model to ONNX format. You can view how to do this "
f"here: https://github.com/onnx/tutorials")
26 changes: 18 additions & 8 deletions model_tools/check_submission/check_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,42 @@ def check_brain_model_processing(model):
def check_base_models(module):
module = __import__(module)
for model in module.get_model_list():
layers = module.get_layers(model)
wrapper, layers = module.process_model()
assert layers is not None
assert isinstance(layers, list)
assert len(layers) > 0
assert module.get_model(model) is not None
check_processing(model, module)
check_processing(model, wrapper, layers)
print('Test successful, you are ready to submit!')


def check_processing(model_identifier, module):
def check_processing(model_identifier, wrapper, layers):
os.environ['RESULTCACHING_DISABLE'] = '1'
model_instance = module.get_model(model_identifier)
layers = module.get_layers(model_identifier)
model_instance = wrapper
benchmark = _MockBenchmark()

print("\nStep 1/4: Combining ANN model and preprocessing...")
layer_selection = LayerSelection(model_identifier=model_identifier,
activations_model=model_instance, layers=layers,
visual_degrees=8)
print("Done.\n")

print("Step 2/4: Mapping ANN layers to Brain Regions...")
region_layer_map = RegionLayerMap(layer_selection=layer_selection,
region_benchmarks={'IT': benchmark})
print("Done.\n")

print("Step 3/4: Transforming ANN into Brain Model...")
brain_model = ModelCommitment(identifier=model_identifier, activations_model=model_instance,
layers=layers, region_layer_map=region_layer_map)
print("Done.\n")

print("Step 4/4: Scoring Brain Model on region: IT ...")
score = benchmark(brain_model, do_behavior=True)
print("Done.")
print("\nModel layers", layers, "successfully scored on region IT mock benchmarks.")
print(score)
assert score is not None
assert score.sel(aggregation='center')
# assert score.sel(aggregation='center')


class _MockBenchmark(BenchmarkBase):
Expand All @@ -68,7 +79,6 @@ def __init__(self):
self.assembly = average_repetition(assembly_repetition)
self._assembly = self.assembly
self.timebins = timebins_from_assembly(self.assembly)

self._similarity_metric = CrossRegressedCorrelation(
regression=pls_regression(), correlation=pearsonr_correlation(),
crossvalidation_kwargs=dict(stratification_coord=Split.Defaults.stratification_coord
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
"Pillow",
"numpy",
"tqdm",
"torch",
"torchvision",
"torch<=1.8.1",
"torchvision<=0.9.1",
"tensorflow==1.15",
"keras==2.3.1",
"scikit-learn",
Expand Down