Skip to content

Commit 2e95bc0

Browse files
authored
fix: Sync Nova hosting configs with AGISageMakerInference (#5664)
Align _NOVA_HOSTING_CONFIGS CONTEXT_LENGTH and MAX_CONCURRENCY values with ALLOWLISTED_CONFIGURATIONS from AGISageMakerInference constants.py. Key changes: - micro: correct context/concurrency for g5, g6 instances; add g6e types - lite: add g6.12xlarge, g6.24xlarge; fix p5 to 128000 context - pro: remove unsupported g6.48xlarge; fix p5 to 24000/1 - lite-v2: add g6.48xlarge; fix p5 to 128000 context
1 parent 02e864d commit 2e95bc0

File tree

1 file changed

+22
-11
lines changed

1 file changed

+22
-11
lines changed

sagemaker-serve/src/sagemaker/serve/model_builder.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,25 +1015,36 @@ def _fetch_and_cache_recipe_config(self):
10151015
}
10161016

10171017
# Nova hosting configs per model (from Rhinestone modelDeployment.ts)
1018+
# NOTE: The nova-inference container (:SM-Inference-latest) enforces per-tier
1019+
# MAX_CONCURRENCY limits based on CONTEXT_LENGTH. These values were updated
1020+
# ~2026-03-23 synced with AGISageMakerInference ALLOWLISTED_CONFIGURATIONS.
1021+
# Uses the highest tier's CONTEXT_LENGTH and its MAX_CONCURRENCY per instance.
1022+
# If deployments fail with "MAX_CONCURRENCY N exceeds tier limit M", the
1023+
# container has likely tightened limits — check CloudWatch logs for the cap.
10181024
_NOVA_HOSTING_CONFIGS = {
10191025
"nova-textgeneration-micro": [
1020-
{"InstanceType": "ml.g5.12xlarge", "Environment": {"CONTEXT_LENGTH": "4096", "MAX_CONCURRENCY": "16"}},
1021-
{"InstanceType": "ml.g5.24xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "8192", "MAX_CONCURRENCY": "16"}},
1022-
{"InstanceType": "ml.g6.12xlarge", "Environment": {"CONTEXT_LENGTH": "10000", "MAX_CONCURRENCY": "16"}},
1023-
{"InstanceType": "ml.g6.24xlarge", "Environment": {"CONTEXT_LENGTH": "10000", "MAX_CONCURRENCY": "16"}},
1024-
{"InstanceType": "ml.g6.48xlarge", "Environment": {"CONTEXT_LENGTH": "12000", "MAX_CONCURRENCY": "16"}},
1025-
{"InstanceType": "ml.p5.48xlarge", "Environment": {"CONTEXT_LENGTH": "12000", "MAX_CONCURRENCY": "16"}},
1026+
{"InstanceType": "ml.g5.12xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "6"}},
1027+
{"InstanceType": "ml.g5.24xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "8"}},
1028+
{"InstanceType": "ml.g6.12xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "6"}},
1029+
{"InstanceType": "ml.g6.24xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "8"}},
1030+
{"InstanceType": "ml.g6.48xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "12"}},
1031+
{"InstanceType": "ml.g6e.xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "2"}},
1032+
{"InstanceType": "ml.g6e.2xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "2"}},
1033+
{"InstanceType": "ml.g6e.4xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "4"}},
1034+
{"InstanceType": "ml.p5.48xlarge", "Environment": {"CONTEXT_LENGTH": "128000", "MAX_CONCURRENCY": "8"}},
10261035
],
10271036
"nova-textgeneration-lite": [
1028-
{"InstanceType": "ml.g6.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "20000", "MAX_CONCURRENCY": "16"}},
1029-
{"InstanceType": "ml.p5.48xlarge", "Environment": {"CONTEXT_LENGTH": "12000", "MAX_CONCURRENCY": "16"}},
1037+
{"InstanceType": "ml.g6.12xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "2"}},
1038+
{"InstanceType": "ml.g6.24xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "4"}},
1039+
{"InstanceType": "ml.g6.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "8"}},
1040+
{"InstanceType": "ml.p5.48xlarge", "Environment": {"CONTEXT_LENGTH": "128000", "MAX_CONCURRENCY": "8"}},
10301041
],
10311042
"nova-textgeneration-pro": [
1032-
{"InstanceType": "ml.g6.48xlarge", "Environment": {"CONTEXT_LENGTH": "12000", "MAX_CONCURRENCY": "16"}},
1033-
{"InstanceType": "ml.p5.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "50000", "MAX_CONCURRENCY": "16"}},
1043+
{"InstanceType": "ml.p5.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "24000", "MAX_CONCURRENCY": "1"}},
10341044
],
10351045
"nova-textgeneration-lite-v2": [
1036-
{"InstanceType": "ml.p5.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "50000", "MAX_CONCURRENCY": "16"}},
1046+
{"InstanceType": "ml.g6.48xlarge", "Environment": {"CONTEXT_LENGTH": "8000", "MAX_CONCURRENCY": "8"}},
1047+
{"InstanceType": "ml.p5.48xlarge", "Profile": "Default", "Environment": {"CONTEXT_LENGTH": "128000", "MAX_CONCURRENCY": "8"}},
10371048
],
10381049
}
10391050

0 commit comments

Comments
 (0)