Skip to content

Commit

Permalink
This release adds APIs for new features for SageMaker endpoint to sca…
Browse files Browse the repository at this point in the history
…le down to zero instances, native support for multi-adapter inference, and endpoint scaling improvements.
  • Loading branch information
aws-sdk-dotnet-automation committed Nov 22, 2024
1 parent ef3f3cd commit 649eebe
Show file tree
Hide file tree
Showing 24 changed files with 716 additions and 40 deletions.
60 changes: 49 additions & 11 deletions generator/ServiceModels/sagemaker/sagemaker-2017-07-24.api.json
Original file line number Diff line number Diff line change
Expand Up @@ -5998,7 +5998,8 @@
"ExecutionRole":{"shape":"RoleArn"},
"ThreadsPerCore":{"shape":"ClusterThreadsPerCore"},
"InstanceStorageConfigs":{"shape":"ClusterInstanceStorageConfigs"},
"OnStartDeepHealthChecks":{"shape":"OnStartDeepHealthChecks"}
"OnStartDeepHealthChecks":{"shape":"OnStartDeepHealthChecks"},
"OverrideVpcConfig":{"shape":"VpcConfig"}
}
},
"ClusterInstanceGroupDetailsList":{
Expand Down Expand Up @@ -6028,7 +6029,8 @@
"ExecutionRole":{"shape":"RoleArn"},
"ThreadsPerCore":{"shape":"ClusterThreadsPerCore"},
"InstanceStorageConfigs":{"shape":"ClusterInstanceStorageConfigs"},
"OnStartDeepHealthChecks":{"shape":"OnStartDeepHealthChecks"}
"OnStartDeepHealthChecks":{"shape":"OnStartDeepHealthChecks"},
"OverrideVpcConfig":{"shape":"VpcConfig"}
}
},
"ClusterInstanceGroupSpecifications":{
Expand Down Expand Up @@ -6174,6 +6176,7 @@
"InstanceType":{"shape":"ClusterInstanceType"},
"LaunchTime":{"shape":"Timestamp"},
"LifeCycleConfig":{"shape":"ClusterLifeCycleConfig"},
"OverrideVpcConfig":{"shape":"VpcConfig"},
"ThreadsPerCore":{"shape":"ClusterThreadsPerCore"},
"InstanceStorageConfigs":{"shape":"ClusterInstanceStorageConfigs"},
"PrivatePrimaryIp":{"shape":"ClusterPrivatePrimaryIp"},
Expand Down Expand Up @@ -7382,9 +7385,7 @@
"required":[
"InferenceComponentName",
"EndpointName",
"VariantName",
"Specification",
"RuntimeConfig"
"Specification"
],
"members":{
"InferenceComponentName":{"shape":"InferenceComponentName"},
Expand Down Expand Up @@ -13726,7 +13727,7 @@
"ImageVersionArn":{
"type":"string",
"max":256,
"pattern":"^arn:aws(-[\\w]+)*:sagemaker:.+:[0-9]{12}:image-version/[a-z0-9]([-.]?[a-z0-9])*/[0-9]+$"
"pattern":"^(arn:aws(-[\\w]+)*:sagemaker:.+:[0-9]{12}:image-version/[a-z0-9]([-.]?[a-z0-9])*/[0-9]+|None)$"
},
"ImageVersionNumber":{
"type":"integer",
Expand Down Expand Up @@ -13868,12 +13869,12 @@
},
"InferenceComponentSpecification":{
"type":"structure",
"required":["ComputeResourceRequirements"],
"members":{
"ModelName":{"shape":"ModelName"},
"Container":{"shape":"InferenceComponentContainerSpecification"},
"StartupParameters":{"shape":"InferenceComponentStartupParameters"},
"ComputeResourceRequirements":{"shape":"InferenceComponentComputeResourceRequirements"}
"ComputeResourceRequirements":{"shape":"InferenceComponentComputeResourceRequirements"},
"BaseInferenceComponentName":{"shape":"InferenceComponentName"}
}
},
"InferenceComponentSpecificationSummary":{
Expand All @@ -13882,7 +13883,8 @@
"ModelName":{"shape":"ModelName"},
"Container":{"shape":"InferenceComponentContainerSpecificationSummary"},
"StartupParameters":{"shape":"InferenceComponentStartupParameters"},
"ComputeResourceRequirements":{"shape":"InferenceComponentComputeResourceRequirements"}
"ComputeResourceRequirements":{"shape":"InferenceComponentComputeResourceRequirements"},
"BaseInferenceComponentName":{"shape":"InferenceComponentName"}
}
},
"InferenceComponentStartupParameters":{
Expand Down Expand Up @@ -16627,7 +16629,7 @@
},
"ManagedInstanceScalingMinInstanceCount":{
"type":"integer",
"min":1
"min":0
},
"ManagedInstanceScalingStatus":{
"type":"string",
Expand Down Expand Up @@ -17745,6 +17747,13 @@
"type":"integer",
"min":0
},
"ModelShardingConfig":{
"type":"structure",
"members":{
"Image":{"shape":"OptimizationContainerImage"},
"OverrideEnvironment":{"shape":"OptimizationJobEnvironmentVariables"}
}
},
"ModelSortKey":{
"type":"string",
"enum":[
Expand Down Expand Up @@ -18693,7 +18702,8 @@
"type":"structure",
"members":{
"ModelQuantizationConfig":{"shape":"ModelQuantizationConfig"},
"ModelCompilationConfig":{"shape":"ModelCompilationConfig"}
"ModelCompilationConfig":{"shape":"ModelCompilationConfig"},
"ModelShardingConfig":{"shape":"ModelShardingConfig"}
},
"union":true
},
Expand Down Expand Up @@ -19774,6 +19784,24 @@
"ml.c6i.16xlarge",
"ml.c6i.24xlarge",
"ml.c6i.32xlarge",
"ml.m6i.large",
"ml.m6i.xlarge",
"ml.m6i.2xlarge",
"ml.m6i.4xlarge",
"ml.m6i.8xlarge",
"ml.m6i.12xlarge",
"ml.m6i.16xlarge",
"ml.m6i.24xlarge",
"ml.m6i.32xlarge",
"ml.r6i.large",
"ml.r6i.xlarge",
"ml.r6i.2xlarge",
"ml.r6i.4xlarge",
"ml.r6i.8xlarge",
"ml.r6i.12xlarge",
"ml.r6i.16xlarge",
"ml.r6i.24xlarge",
"ml.r6i.32xlarge",
"ml.g5.xlarge",
"ml.g5.2xlarge",
"ml.g5.4xlarge",
Expand All @@ -19790,6 +19818,14 @@
"ml.g6.16xlarge",
"ml.g6.24xlarge",
"ml.g6.48xlarge",
"ml.g6e.xlarge",
"ml.g6e.2xlarge",
"ml.g6e.4xlarge",
"ml.g6e.8xlarge",
"ml.g6e.12xlarge",
"ml.g6e.16xlarge",
"ml.g6e.24xlarge",
"ml.g6e.48xlarge",
"ml.p4d.24xlarge",
"ml.c7g.large",
"ml.c7g.xlarge",
Expand Down Expand Up @@ -19851,11 +19887,13 @@
"ml.trn1.2xlarge",
"ml.trn1.32xlarge",
"ml.trn1n.32xlarge",
"ml.trn2.48xlarge",
"ml.inf2.xlarge",
"ml.inf2.8xlarge",
"ml.inf2.24xlarge",
"ml.inf2.48xlarge",
"ml.p5.48xlarge",
"ml.p5e.48xlarge",
"ml.m7i.large",
"ml.m7i.xlarge",
"ml.m7i.2xlarge",
Expand Down
21 changes: 17 additions & 4 deletions generator/ServiceModels/sagemaker/sagemaker-2017-07-24.docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -7655,10 +7655,10 @@
}
},
"InferenceComponentComputeResourceRequirements": {
"base": "<p>Defines the compute resources to allocate to run a model that you assign to an inference component. These resources include CPU cores, accelerators, and memory.</p>",
"base": "<p>Defines the compute resources to allocate to run a model, plus any adapter models, that you assign to an inference component. These resources include CPU cores, accelerators, and memory.</p>",
"refs": {
"InferenceComponentSpecification$ComputeResourceRequirements": "<p>The compute resources allocated to run the model assigned to the inference component.</p>",
"InferenceComponentSpecificationSummary$ComputeResourceRequirements": "<p>The compute resources allocated to run the model assigned to the inference component.</p>"
"InferenceComponentSpecification$ComputeResourceRequirements": "<p>The compute resources allocated to run the model, plus any adapter models, that you assign to the inference component.</p> <p>Omit this parameter if your request is meant to create an adapter inference component. An adapter inference component is loaded by a base inference component, and it uses the compute resources of the base inference component.</p>",
"InferenceComponentSpecificationSummary$ComputeResourceRequirements": "<p>The compute resources allocated to run the model, plus any adapter models, that you assign to the inference component.</p>"
}
},
"InferenceComponentContainerSpecification": {
Expand Down Expand Up @@ -7688,6 +7688,8 @@
"DeleteInferenceComponentInput$InferenceComponentName": "<p>The name of the inference component to delete.</p>",
"DescribeInferenceComponentInput$InferenceComponentName": "<p>The name of the inference component.</p>",
"DescribeInferenceComponentOutput$InferenceComponentName": "<p>The name of the inference component.</p>",
"InferenceComponentSpecification$BaseInferenceComponentName": "<p>The name of an existing inference component that is to contain the inference component that you're creating with your request.</p> <p>Specify this parameter only if your request is meant to create an adapter inference component. An adapter inference component contains the path to an adapter model. The purpose of the adapter model is to tailor the inference output of a base foundation model, which is hosted by the base inference component. The adapter inference component uses the compute resources that you assigned to the base inference component.</p> <p>When you create an adapter inference component, use the <code>Container</code> parameter to specify the location of the adapter artifacts. In the parameter value, use the <code>ArtifactUrl</code> parameter of the <code>InferenceComponentContainerSpecification</code> data type.</p> <p>Before you can create an adapter inference component, you must have an existing inference component that contains the foundation model that you want to adapt.</p>",
"InferenceComponentSpecificationSummary$BaseInferenceComponentName": "<p>The name of the base inference component that contains this inference component.</p>",
"InferenceComponentSummary$InferenceComponentName": "<p>The name of the inference component.</p>",
"UpdateInferenceComponentInput$InferenceComponentName": "<p>The name of the inference component.</p>",
"UpdateInferenceComponentRuntimeConfigInput$InferenceComponentName": "<p>The name of the inference component to update.</p>"
Expand Down Expand Up @@ -10613,6 +10615,12 @@
"RecommendationMetrics$ModelSetupTime": "<p>The time it takes to launch new compute resources for a serverless endpoint. The time can vary depending on the model size, how long it takes to download the model, and the start-up time of the container.</p> <p> <code>NaN</code> indicates that the value is not available.</p>"
}
},
"ModelShardingConfig": {
"base": "<p>Settings for the model sharding technique that's applied by a model optimization job.</p>",
"refs": {
"OptimizationConfig$ModelShardingConfig": "<p>Settings for the model sharding technique that's applied by a model optimization job.</p>"
}
},
"ModelSortKey": {
"base": null,
"refs": {
Expand Down Expand Up @@ -11765,6 +11773,7 @@
"refs": {
"ModelCompilationConfig$Image": "<p>The URI of an LMI DLC in Amazon ECR. SageMaker uses this image to run the optimization.</p>",
"ModelQuantizationConfig$Image": "<p>The URI of an LMI DLC in Amazon ECR. SageMaker uses this image to run the optimization.</p>",
"ModelShardingConfig$Image": "<p>The URI of an LMI DLC in Amazon ECR. SageMaker uses this image to run the optimization.</p>",
"OptimizationOutput$RecommendedInferenceImage": "<p>The image that SageMaker recommends that you use to host the optimized model that you created with an optimization job.</p>"
}
},
Expand All @@ -11790,7 +11799,8 @@
"CreateOptimizationJobRequest$OptimizationEnvironment": "<p>The environment variables to set in the model container.</p>",
"DescribeOptimizationJobResponse$OptimizationEnvironment": "<p>The environment variables to set in the model container.</p>",
"ModelCompilationConfig$OverrideEnvironment": "<p>Environment variables that override the default ones in the model container.</p>",
"ModelQuantizationConfig$OverrideEnvironment": "<p>Environment variables that override the default ones in the model container.</p>"
"ModelQuantizationConfig$OverrideEnvironment": "<p>Environment variables that override the default ones in the model container.</p>",
"ModelShardingConfig$OverrideEnvironment": "<p>Environment variables that override the default ones in the model container.</p>"
}
},
"OptimizationJobModelSource": {
Expand Down Expand Up @@ -17339,6 +17349,9 @@
"base": "<p>Specifies an Amazon Virtual Private Cloud (VPC) that your SageMaker jobs, hosted models, and compute resources have access to. You can control access to and from your resources by configuring a VPC. For more information, see <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/infrastructure-give-access.html\">Give SageMaker Access to Resources in your Amazon VPC</a>. </p>",
"refs": {
"AutoMLSecurityConfig$VpcConfig": "<p>The VPC configuration.</p>",
"ClusterInstanceGroupDetails$OverrideVpcConfig": null,
"ClusterInstanceGroupSpecification$OverrideVpcConfig": null,
"ClusterNodeDetails$OverrideVpcConfig": null,
"CreateClusterRequest$VpcConfig": null,
"CreateEndpointConfigInput$VpcConfig": null,
"CreateModelInput$VpcConfig": "<p>A <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_VpcConfig.html\">VpcConfig</a> object that specifies the VPC that you want your model to connect to. Control access to and from your model container by configuring the VPC. <code>VpcConfig</code> is used in hosting services and in batch transform. For more information, see <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/host-vpc.html\">Protect Endpoints by Using an Amazon Virtual Private Cloud</a> and <a href=\"https://docs.aws.amazon.com/sagemaker/latest/dg/batch-vpc.html\">Protect Data in Batch Transform Jobs by Using an Amazon Virtual Private Cloud</a>.</p>",
Expand Down
Loading

0 comments on commit 649eebe

Please sign in to comment.