@@ -1015,25 +1015,36 @@ def _fetch_and_cache_recipe_config(self):
10151015 }
10161016
10171017 # Nova hosting configs per model (from Rhinestone modelDeployment.ts)
1018+ # NOTE: The nova-inference container (:SM-Inference-latest) enforces per-tier
1019+ # MAX_CONCURRENCY limits based on CONTEXT_LENGTH. These values were updated
1020+ # ~2026-03-23 synced with AGISageMakerInference ALLOWLISTED_CONFIGURATIONS.
1021+ # Uses the highest tier's CONTEXT_LENGTH and its MAX_CONCURRENCY per instance.
1022+ # If deployments fail with "MAX_CONCURRENCY N exceeds tier limit M", the
1023+ # container has likely tightened limits — check CloudWatch logs for the cap.
10181024 _NOVA_HOSTING_CONFIGS = {
10191025 "nova-textgeneration-micro" : [
1020- {"InstanceType" : "ml.g5.12xlarge" , "Environment" : {"CONTEXT_LENGTH" : "4096" , "MAX_CONCURRENCY" : "16" }},
1021- {"InstanceType" : "ml.g5.24xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "8192" , "MAX_CONCURRENCY" : "16" }},
1022- {"InstanceType" : "ml.g6.12xlarge" , "Environment" : {"CONTEXT_LENGTH" : "10000" , "MAX_CONCURRENCY" : "16" }},
1023- {"InstanceType" : "ml.g6.24xlarge" , "Environment" : {"CONTEXT_LENGTH" : "10000" , "MAX_CONCURRENCY" : "16" }},
1024- {"InstanceType" : "ml.g6.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "12000" , "MAX_CONCURRENCY" : "16" }},
1025- {"InstanceType" : "ml.p5.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "12000" , "MAX_CONCURRENCY" : "16" }},
1026+ {"InstanceType" : "ml.g5.12xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "6" }},
1027+ {"InstanceType" : "ml.g5.24xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "8" }},
1028+ {"InstanceType" : "ml.g6.12xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "6" }},
1029+ {"InstanceType" : "ml.g6.24xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "8" }},
1030+ {"InstanceType" : "ml.g6.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "12" }},
1031+ {"InstanceType" : "ml.g6e.xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "2" }},
1032+ {"InstanceType" : "ml.g6e.2xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "2" }},
1033+ {"InstanceType" : "ml.g6e.4xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "4" }},
1034+ {"InstanceType" : "ml.p5.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "128000" , "MAX_CONCURRENCY" : "8" }},
10261035 ],
10271036 "nova-textgeneration-lite" : [
1028- {"InstanceType" : "ml.g6.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "20000" , "MAX_CONCURRENCY" : "16" }},
1029- {"InstanceType" : "ml.p5.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "12000" , "MAX_CONCURRENCY" : "16" }},
1037+ {"InstanceType" : "ml.g6.12xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "2" }},
1038+ {"InstanceType" : "ml.g6.24xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "4" }},
1039+ {"InstanceType" : "ml.g6.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "8" }},
1040+ {"InstanceType" : "ml.p5.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "128000" , "MAX_CONCURRENCY" : "8" }},
10301041 ],
10311042 "nova-textgeneration-pro" : [
1032- {"InstanceType" : "ml.g6.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "12000" , "MAX_CONCURRENCY" : "16" }},
1033- {"InstanceType" : "ml.p5.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "50000" , "MAX_CONCURRENCY" : "16" }},
1043+ {"InstanceType" : "ml.p5.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "24000" , "MAX_CONCURRENCY" : "1" }},
10341044 ],
10351045 "nova-textgeneration-lite-v2" : [
1036- {"InstanceType" : "ml.p5.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "50000" , "MAX_CONCURRENCY" : "16" }},
1046+ {"InstanceType" : "ml.g6.48xlarge" , "Environment" : {"CONTEXT_LENGTH" : "8000" , "MAX_CONCURRENCY" : "8" }},
1047+ {"InstanceType" : "ml.p5.48xlarge" , "Profile" : "Default" , "Environment" : {"CONTEXT_LENGTH" : "128000" , "MAX_CONCURRENCY" : "8" }},
10371048 ],
10381049 }
10391050
0 commit comments