Skip to content

Commit

Permalink
Merge pull request caikit#710 from HonakerM/add_abstractions_for_runt…
Browse files Browse the repository at this point in the history
…ime_models

Add Abstractions for Runtime ModelManagement classes
  • Loading branch information
gabe-l-hart authored Jun 3, 2024
2 parents 176dee3 + 711dfc8 commit 00c792a
Show file tree
Hide file tree
Showing 13 changed files with 435 additions and 189 deletions.
8 changes: 8 additions & 0 deletions caikit/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ model_management:
# List of module backend configurations in priority order
backend_priority:
- type: LOCAL
loaders:
default:
type: CORE
config: {}
sizers:
default:
type: MODEL_MESH
config: {}

log:
# Default level for all python loggers
Expand Down
62 changes: 62 additions & 0 deletions caikit/runtime/model_management/core_model_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright The Caikit Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Standard
from typing import Optional, Union

# Third Party
from prometheus_client import Summary

# First Party
import alog

# Local
from caikit.core import MODEL_MANAGER, ModuleBase
from caikit.core.model_management import ModelFinderBase, ModelInitializerBase
from caikit.runtime.model_management.model_loader_base import ModelLoaderBase

log = alog.use_channel("MODEL-LOADER")

CAIKIT_CORE_LOAD_DURATION_SUMMARY = Summary(
"caikit_core_load_model_duration_seconds",
"Summary of the duration (in seconds) of caikit.core.load(model)",
["model_type"],
)


class CoreModelLoader(ModelLoaderBase):
"""The CoreModelLoader loads a model using the caikit core.ModelManager"""

name = "CORE"

def load_module_instance(
self,
model_path: str,
model_id: str,
model_type: str,
finder: Optional[Union[str, ModelFinderBase]] = None,
initializer: Optional[Union[str, ModelInitializerBase]] = None,
) -> ModuleBase:
"""Start loading a model from disk and associate the ID/size with it"""
log.info("<RUN89711114I>", "Loading model '%s'", model_id)

# Only pass finder/initializer if they have values so that defaults are used otherwise
load_kwargs = {}
if finder:
load_kwargs["finder"] = finder
if initializer:
load_kwargs["initializer"] = initializer

# Load using the caikit.core
with CAIKIT_CORE_LOAD_DURATION_SUMMARY.labels(model_type=model_type).time():
return MODEL_MANAGER.load(model_path, **load_kwargs)
88 changes: 88 additions & 0 deletions caikit/runtime/model_management/directory_model_sizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright The Caikit Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Standard
from pathlib import Path
from typing import Dict
import os

# Third Party
import grpc

# First Party
import aconfig
import alog

# Local
from caikit.runtime.model_management.model_sizer_base import ModelSizerBase
from caikit.runtime.types.caikit_runtime_exception import CaikitRuntimeException

log = alog.use_channel("DIRECTORY-SIZER")


class DirectoryModelSizer(ModelSizerBase):
"""DirectoryModelSizer. This class calculates a models size based on the
size of the files in the model directory
! Note: It caches the size of the directory after first sizing which can cause
race conditions in certain situations.
"""

name = "DIRECTORY"

def __init__(self, config: aconfig.Config, instance_name: str):
super().__init__(config, instance_name)
# Cache of archive sizes: directory model path -> archive size in bytes
self.model_directory_size: Dict[str, int] = {}

def get_model_size(self, model_id, local_model_path, model_type) -> int:
"""
Returns the estimated memory footprint of a model
Args:
model_id: The model identifier, used for informative logging
cos_model_path: The path to the model archive in S3 storage
model_type: The type of model, used to adjust the memory estimate
Returns:
The estimated size in bytes of memory that would be used by loading this model
"""
# Return the cached model size if one exists
if model_size := self.model_directory_size.get(local_model_path):
return model_size

# Calculate the model size and add it to the cache. This uses last in
# methodology so that the most recent size is used during parallel access
dir_size = self.__get_directory_size(model_id, local_model_path)
self.model_directory_size[local_model_path] = dir_size
return dir_size

def __get_directory_size(self, model_id, local_model_path) -> int:
"""Get the size of a directory"""
try:
if os.path.isdir(local_model_path):
# Walk the directory to size all files
return sum(
file.stat().st_size
for file in Path(local_model_path).rglob("*")
if file.is_file()
)

# Probably just an archive file
return os.path.getsize(local_model_path)
except FileNotFoundError as ex:
message = (
f"Failed to estimate size of model '{model_id}',"
f"file '{local_model_path}' not found"
)
log.error("<RUN62168924E>", message)
raise CaikitRuntimeException(grpc.StatusCode.NOT_FOUND, message) from ex
33 changes: 33 additions & 0 deletions caikit/runtime/model_management/factories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright The Caikit Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Global factories for model management
"""

# Local
from caikit.core.toolkit.factory import ImportableFactory
from caikit.runtime.model_management.core_model_loader import CoreModelLoader
from caikit.runtime.model_management.directory_model_sizer import DirectoryModelSizer
from caikit.runtime.model_management.mm_model_sizer import ModelMeshModelSizer

# Model Loader factory. A loader is responsible for constructing
# a LoadedModel instance
model_loader_factory = ImportableFactory("ModelLoader")
model_loader_factory.register(CoreModelLoader)

# Model Sizer factory. A sizer is responsible for estimating
# the size of a model
model_sizer_factory = ImportableFactory("ModelSizer")
model_sizer_factory.register(DirectoryModelSizer)
model_sizer_factory.register(ModelMeshModelSizer)
71 changes: 71 additions & 0 deletions caikit/runtime/model_management/mm_model_sizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright The Caikit Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# First Party
import alog

# Local
from caikit import get_config
from caikit.runtime.model_management.directory_model_sizer import DirectoryModelSizer

log = alog.use_channel("MM-SIZER")


class ModelMeshModelSizer(DirectoryModelSizer):
"""ModelMeshModelSizer. This class estimates a models size based on
the contents of the directory multiplied by a model specific
constant"""

name = "MODEL_MESH"

def get_model_size(self, model_id, local_model_path, model_type) -> int:
"""
Returns the estimated memory footprint of a model
Args:
model_id: The model identifier, used for informative logging
cos_model_path: The path to the model archive in S3 storage
model_type: The type of model, used to adjust the memory estimate
Returns:
The estimated size in bytes of memory that would be used by loading this model
"""

if (
model_type
in get_config().inference_plugin.model_mesh.model_size_multipliers
):
multiplier = (
get_config().inference_plugin.model_mesh.model_size_multipliers[
model_type
]
)
log.debug(
"Using size multiplier '%f' for model '%s' to estimate model size",
multiplier,
model_id,
)
else:
multiplier = (
get_config().inference_plugin.model_mesh.default_model_size_multiplier
)
log.info(
"<RUN62161564I>",
"No configured model size multiplier found for model type '%s' for model '%s'. "
"Using default multiplier '%f'",
model_type,
model_id,
multiplier,
)
return int(
super().get_model_size(model_id, local_model_path, model_type) * multiplier
)
Loading

0 comments on commit 00c792a

Please sign in to comment.