databricks.ModelServing
Explore with Pulumi AI
This resource allows you to manage Model Serving endpoints in Databricks.
Note If you replace served_models
with served_entities
in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version.
Example Usage
import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";
const _this = new databricks.ModelServing("this", {
name: "ads-serving-endpoint",
config: {
servedEntities: [
{
name: "prod_model",
entityName: "ads-model",
entityVersion: "2",
workloadSize: "Small",
scaleToZeroEnabled: true,
},
{
name: "candidate_model",
entityName: "ads-model",
entityVersion: "4",
workloadSize: "Small",
scaleToZeroEnabled: false,
},
],
trafficConfig: {
routes: [
{
servedModelName: "prod_model",
trafficPercentage: 90,
},
{
servedModelName: "candidate_model",
trafficPercentage: 10,
},
],
},
},
});
import pulumi
import pulumi_databricks as databricks
this = databricks.ModelServing("this",
name="ads-serving-endpoint",
config=databricks.ModelServingConfigArgs(
served_entities=[
databricks.ModelServingConfigServedEntityArgs(
name="prod_model",
entity_name="ads-model",
entity_version="2",
workload_size="Small",
scale_to_zero_enabled=True,
),
databricks.ModelServingConfigServedEntityArgs(
name="candidate_model",
entity_name="ads-model",
entity_version="4",
workload_size="Small",
scale_to_zero_enabled=False,
),
],
traffic_config=databricks.ModelServingConfigTrafficConfigArgs(
routes=[
databricks.ModelServingConfigTrafficConfigRouteArgs(
served_model_name="prod_model",
traffic_percentage=90,
),
databricks.ModelServingConfigTrafficConfigRouteArgs(
served_model_name="candidate_model",
traffic_percentage=10,
),
],
),
))
package main
import (
"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := databricks.NewModelServing(ctx, "this", &databricks.ModelServingArgs{
Name: pulumi.String("ads-serving-endpoint"),
Config: &databricks.ModelServingConfigArgs{
ServedEntities: databricks.ModelServingConfigServedEntityArray{
&databricks.ModelServingConfigServedEntityArgs{
Name: pulumi.String("prod_model"),
EntityName: pulumi.String("ads-model"),
EntityVersion: pulumi.String("2"),
WorkloadSize: pulumi.String("Small"),
ScaleToZeroEnabled: pulumi.Bool(true),
},
&databricks.ModelServingConfigServedEntityArgs{
Name: pulumi.String("candidate_model"),
EntityName: pulumi.String("ads-model"),
EntityVersion: pulumi.String("4"),
WorkloadSize: pulumi.String("Small"),
ScaleToZeroEnabled: pulumi.Bool(false),
},
},
TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("prod_model"),
TrafficPercentage: pulumi.Int(90),
},
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("candidate_model"),
TrafficPercentage: pulumi.Int(10),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;
return await Deployment.RunAsync(() =>
{
var @this = new Databricks.ModelServing("this", new()
{
Name = "ads-serving-endpoint",
Config = new Databricks.Inputs.ModelServingConfigArgs
{
ServedEntities = new[]
{
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
Name = "prod_model",
EntityName = "ads-model",
EntityVersion = "2",
WorkloadSize = "Small",
ScaleToZeroEnabled = true,
},
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
Name = "candidate_model",
EntityName = "ads-model",
EntityVersion = "4",
WorkloadSize = "Small",
ScaleToZeroEnabled = false,
},
},
TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
{
Routes = new[]
{
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "prod_model",
TrafficPercentage = 90,
},
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "candidate_model",
TrafficPercentage = 10,
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.ModelServing;
import com.pulumi.databricks.ModelServingArgs;
import com.pulumi.databricks.inputs.ModelServingConfigArgs;
import com.pulumi.databricks.inputs.ModelServingConfigTrafficConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var this_ = new ModelServing("this", ModelServingArgs.builder()
.name("ads-serving-endpoint")
.config(ModelServingConfigArgs.builder()
.servedEntities(
ModelServingConfigServedEntityArgs.builder()
.name("prod_model")
.entityName("ads-model")
.entityVersion("2")
.workloadSize("Small")
.scaleToZeroEnabled(true)
.build(),
ModelServingConfigServedEntityArgs.builder()
.name("candidate_model")
.entityName("ads-model")
.entityVersion("4")
.workloadSize("Small")
.scaleToZeroEnabled(false)
.build())
.trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
.routes(
ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("prod_model")
.trafficPercentage(90)
.build(),
ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("candidate_model")
.trafficPercentage(10)
.build())
.build())
.build())
.build());
}
}
resources:
this:
type: databricks:ModelServing
properties:
name: ads-serving-endpoint
config:
servedEntities:
- name: prod_model
entityName: ads-model
entityVersion: '2'
workloadSize: Small
scaleToZeroEnabled: true
- name: candidate_model
entityName: ads-model
entityVersion: '4'
workloadSize: Small
scaleToZeroEnabled: false
trafficConfig:
routes:
- servedModelName: prod_model
trafficPercentage: 90
- servedModelName: candidate_model
trafficPercentage: 10
Access Control
- databricks.Permissions can control which groups or individual users can Manage, Query or View individual serving endpoints.
Related Resources
The following resources are often used in the same context:
- databricks.RegisteredModel to create Models in Unity Catalog in Databricks.
- End to end workspace management guide.
- databricks.Directory to manage directories in Databricks Workspace.
- databricks.MlflowModel to create models in the workspace model registry in Databricks.
- databricks.Notebook to manage Databricks Notebooks.
- databricks.Notebook data to export a notebook from Databricks Workspace.
- databricks.Repo to manage Databricks Repos.
Create ModelServing Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new ModelServing(name: string, args: ModelServingArgs, opts?: CustomResourceOptions);
@overload
def ModelServing(resource_name: str,
args: ModelServingArgs,
opts: Optional[ResourceOptions] = None)
@overload
def ModelServing(resource_name: str,
opts: Optional[ResourceOptions] = None,
config: Optional[ModelServingConfigArgs] = None,
name: Optional[str] = None,
rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
route_optimized: Optional[bool] = None,
tags: Optional[Sequence[ModelServingTagArgs]] = None)
func NewModelServing(ctx *Context, name string, args ModelServingArgs, opts ...ResourceOption) (*ModelServing, error)
public ModelServing(string name, ModelServingArgs args, CustomResourceOptions? opts = null)
public ModelServing(String name, ModelServingArgs args)
public ModelServing(String name, ModelServingArgs args, CustomResourceOptions options)
type: databricks:ModelServing
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var modelServingResource = new Databricks.ModelServing("modelServingResource", new()
{
Config = new Databricks.Inputs.ModelServingConfigArgs
{
AutoCaptureConfig = new Databricks.Inputs.ModelServingConfigAutoCaptureConfigArgs
{
CatalogName = "string",
Enabled = false,
SchemaName = "string",
TableNamePrefix = "string",
},
ServedEntities = new[]
{
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
EntityName = "string",
EntityVersion = "string",
EnvironmentVars =
{
{ "string", "any" },
},
ExternalModel = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelArgs
{
Name = "string",
Provider = "string",
Task = "string",
Ai21labsConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
{
Ai21labsApiKey = "string",
},
AmazonBedrockConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
{
AwsAccessKeyId = "string",
AwsRegion = "string",
AwsSecretAccessKey = "string",
BedrockProvider = "string",
},
AnthropicConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
{
AnthropicApiKey = "string",
},
CohereConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelCohereConfigArgs
{
CohereApiKey = "string",
},
DatabricksModelServingConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
{
DatabricksApiToken = "string",
DatabricksWorkspaceUrl = "string",
},
OpenaiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
{
MicrosoftEntraClientId = "string",
MicrosoftEntraClientSecret = "string",
MicrosoftEntraTenantId = "string",
OpenaiApiBase = "string",
OpenaiApiKey = "string",
OpenaiApiType = "string",
OpenaiApiVersion = "string",
OpenaiDeploymentName = "string",
OpenaiOrganization = "string",
},
PalmConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelPalmConfigArgs
{
PalmApiKey = "string",
},
},
InstanceProfileArn = "string",
MaxProvisionedThroughput = 0,
MinProvisionedThroughput = 0,
Name = "string",
ScaleToZeroEnabled = false,
WorkloadSize = "string",
WorkloadType = "string",
},
},
TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
{
Routes = new[]
{
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "string",
TrafficPercentage = 0,
},
},
},
},
Name = "string",
RateLimits = new[]
{
new Databricks.Inputs.ModelServingRateLimitArgs
{
Calls = 0,
RenewalPeriod = "string",
Key = "string",
},
},
RouteOptimized = false,
Tags = new[]
{
new Databricks.Inputs.ModelServingTagArgs
{
Key = "string",
Value = "string",
},
},
});
example, err := databricks.NewModelServing(ctx, "modelServingResource", &databricks.ModelServingArgs{
Config: &databricks.ModelServingConfigArgs{
AutoCaptureConfig: &databricks.ModelServingConfigAutoCaptureConfigArgs{
CatalogName: pulumi.String("string"),
Enabled: pulumi.Bool(false),
SchemaName: pulumi.String("string"),
TableNamePrefix: pulumi.String("string"),
},
ServedEntities: databricks.ModelServingConfigServedEntityArray{
&databricks.ModelServingConfigServedEntityArgs{
EntityName: pulumi.String("string"),
EntityVersion: pulumi.String("string"),
EnvironmentVars: pulumi.Map{
"string": pulumi.Any("any"),
},
ExternalModel: &databricks.ModelServingConfigServedEntityExternalModelArgs{
Name: pulumi.String("string"),
Provider: pulumi.String("string"),
Task: pulumi.String("string"),
Ai21labsConfig: &databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs{
Ai21labsApiKey: pulumi.String("string"),
},
AmazonBedrockConfig: &databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs{
AwsAccessKeyId: pulumi.String("string"),
AwsRegion: pulumi.String("string"),
AwsSecretAccessKey: pulumi.String("string"),
BedrockProvider: pulumi.String("string"),
},
AnthropicConfig: &databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs{
AnthropicApiKey: pulumi.String("string"),
},
CohereConfig: &databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs{
CohereApiKey: pulumi.String("string"),
},
DatabricksModelServingConfig: &databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs{
DatabricksApiToken: pulumi.String("string"),
DatabricksWorkspaceUrl: pulumi.String("string"),
},
OpenaiConfig: &databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs{
MicrosoftEntraClientId: pulumi.String("string"),
MicrosoftEntraClientSecret: pulumi.String("string"),
MicrosoftEntraTenantId: pulumi.String("string"),
OpenaiApiBase: pulumi.String("string"),
OpenaiApiKey: pulumi.String("string"),
OpenaiApiType: pulumi.String("string"),
OpenaiApiVersion: pulumi.String("string"),
OpenaiDeploymentName: pulumi.String("string"),
OpenaiOrganization: pulumi.String("string"),
},
PalmConfig: &databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs{
PalmApiKey: pulumi.String("string"),
},
},
InstanceProfileArn: pulumi.String("string"),
MaxProvisionedThroughput: pulumi.Int(0),
MinProvisionedThroughput: pulumi.Int(0),
Name: pulumi.String("string"),
ScaleToZeroEnabled: pulumi.Bool(false),
WorkloadSize: pulumi.String("string"),
WorkloadType: pulumi.String("string"),
},
},
TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("string"),
TrafficPercentage: pulumi.Int(0),
},
},
},
},
Name: pulumi.String("string"),
RateLimits: databricks.ModelServingRateLimitArray{
&databricks.ModelServingRateLimitArgs{
Calls: pulumi.Int(0),
RenewalPeriod: pulumi.String("string"),
Key: pulumi.String("string"),
},
},
RouteOptimized: pulumi.Bool(false),
Tags: databricks.ModelServingTagArray{
&databricks.ModelServingTagArgs{
Key: pulumi.String("string"),
Value: pulumi.String("string"),
},
},
})
var modelServingResource = new ModelServing("modelServingResource", ModelServingArgs.builder()
.config(ModelServingConfigArgs.builder()
.autoCaptureConfig(ModelServingConfigAutoCaptureConfigArgs.builder()
.catalogName("string")
.enabled(false)
.schemaName("string")
.tableNamePrefix("string")
.build())
.servedEntities(ModelServingConfigServedEntityArgs.builder()
.entityName("string")
.entityVersion("string")
.environmentVars(Map.of("string", "any"))
.externalModel(ModelServingConfigServedEntityExternalModelArgs.builder()
.name("string")
.provider("string")
.task("string")
.ai21labsConfig(ModelServingConfigServedEntityExternalModelAi21labsConfigArgs.builder()
.ai21labsApiKey("string")
.build())
.amazonBedrockConfig(ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs.builder()
.awsAccessKeyId("string")
.awsRegion("string")
.awsSecretAccessKey("string")
.bedrockProvider("string")
.build())
.anthropicConfig(ModelServingConfigServedEntityExternalModelAnthropicConfigArgs.builder()
.anthropicApiKey("string")
.build())
.cohereConfig(ModelServingConfigServedEntityExternalModelCohereConfigArgs.builder()
.cohereApiKey("string")
.build())
.databricksModelServingConfig(ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs.builder()
.databricksApiToken("string")
.databricksWorkspaceUrl("string")
.build())
.openaiConfig(ModelServingConfigServedEntityExternalModelOpenaiConfigArgs.builder()
.microsoftEntraClientId("string")
.microsoftEntraClientSecret("string")
.microsoftEntraTenantId("string")
.openaiApiBase("string")
.openaiApiKey("string")
.openaiApiType("string")
.openaiApiVersion("string")
.openaiDeploymentName("string")
.openaiOrganization("string")
.build())
.palmConfig(ModelServingConfigServedEntityExternalModelPalmConfigArgs.builder()
.palmApiKey("string")
.build())
.build())
.instanceProfileArn("string")
.maxProvisionedThroughput(0)
.minProvisionedThroughput(0)
.name("string")
.scaleToZeroEnabled(false)
.workloadSize("string")
.workloadType("string")
.build())
.trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
.routes(ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("string")
.trafficPercentage(0)
.build())
.build())
.build())
.name("string")
.rateLimits(ModelServingRateLimitArgs.builder()
.calls(0)
.renewalPeriod("string")
.key("string")
.build())
.routeOptimized(false)
.tags(ModelServingTagArgs.builder()
.key("string")
.value("string")
.build())
.build());
model_serving_resource = databricks.ModelServing("modelServingResource",
config=databricks.ModelServingConfigArgs(
auto_capture_config=databricks.ModelServingConfigAutoCaptureConfigArgs(
catalog_name="string",
enabled=False,
schema_name="string",
table_name_prefix="string",
),
served_entities=[databricks.ModelServingConfigServedEntityArgs(
entity_name="string",
entity_version="string",
environment_vars={
"string": "any",
},
external_model=databricks.ModelServingConfigServedEntityExternalModelArgs(
name="string",
provider="string",
task="string",
ai21labs_config=databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs(
ai21labs_api_key="string",
),
amazon_bedrock_config=databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs(
aws_access_key_id="string",
aws_region="string",
aws_secret_access_key="string",
bedrock_provider="string",
),
anthropic_config=databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs(
anthropic_api_key="string",
),
cohere_config=databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs(
cohere_api_key="string",
),
databricks_model_serving_config=databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs(
databricks_api_token="string",
databricks_workspace_url="string",
),
openai_config=databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs(
microsoft_entra_client_id="string",
microsoft_entra_client_secret="string",
microsoft_entra_tenant_id="string",
openai_api_base="string",
openai_api_key="string",
openai_api_type="string",
openai_api_version="string",
openai_deployment_name="string",
openai_organization="string",
),
palm_config=databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs(
palm_api_key="string",
),
),
instance_profile_arn="string",
max_provisioned_throughput=0,
min_provisioned_throughput=0,
name="string",
scale_to_zero_enabled=False,
workload_size="string",
workload_type="string",
)],
traffic_config=databricks.ModelServingConfigTrafficConfigArgs(
routes=[databricks.ModelServingConfigTrafficConfigRouteArgs(
served_model_name="string",
traffic_percentage=0,
)],
),
),
name="string",
rate_limits=[databricks.ModelServingRateLimitArgs(
calls=0,
renewal_period="string",
key="string",
)],
route_optimized=False,
tags=[databricks.ModelServingTagArgs(
key="string",
value="string",
)])
const modelServingResource = new databricks.ModelServing("modelServingResource", {
config: {
autoCaptureConfig: {
catalogName: "string",
enabled: false,
schemaName: "string",
tableNamePrefix: "string",
},
servedEntities: [{
entityName: "string",
entityVersion: "string",
environmentVars: {
string: "any",
},
externalModel: {
name: "string",
provider: "string",
task: "string",
ai21labsConfig: {
ai21labsApiKey: "string",
},
amazonBedrockConfig: {
awsAccessKeyId: "string",
awsRegion: "string",
awsSecretAccessKey: "string",
bedrockProvider: "string",
},
anthropicConfig: {
anthropicApiKey: "string",
},
cohereConfig: {
cohereApiKey: "string",
},
databricksModelServingConfig: {
databricksApiToken: "string",
databricksWorkspaceUrl: "string",
},
openaiConfig: {
microsoftEntraClientId: "string",
microsoftEntraClientSecret: "string",
microsoftEntraTenantId: "string",
openaiApiBase: "string",
openaiApiKey: "string",
openaiApiType: "string",
openaiApiVersion: "string",
openaiDeploymentName: "string",
openaiOrganization: "string",
},
palmConfig: {
palmApiKey: "string",
},
},
instanceProfileArn: "string",
maxProvisionedThroughput: 0,
minProvisionedThroughput: 0,
name: "string",
scaleToZeroEnabled: false,
workloadSize: "string",
workloadType: "string",
}],
trafficConfig: {
routes: [{
servedModelName: "string",
trafficPercentage: 0,
}],
},
},
name: "string",
rateLimits: [{
calls: 0,
renewalPeriod: "string",
key: "string",
}],
routeOptimized: false,
tags: [{
key: "string",
value: "string",
}],
});
type: databricks:ModelServing
properties:
config:
autoCaptureConfig:
catalogName: string
enabled: false
schemaName: string
tableNamePrefix: string
servedEntities:
- entityName: string
entityVersion: string
environmentVars:
string: any
externalModel:
ai21labsConfig:
ai21labsApiKey: string
amazonBedrockConfig:
awsAccessKeyId: string
awsRegion: string
awsSecretAccessKey: string
bedrockProvider: string
anthropicConfig:
anthropicApiKey: string
cohereConfig:
cohereApiKey: string
databricksModelServingConfig:
databricksApiToken: string
databricksWorkspaceUrl: string
name: string
openaiConfig:
microsoftEntraClientId: string
microsoftEntraClientSecret: string
microsoftEntraTenantId: string
openaiApiBase: string
openaiApiKey: string
openaiApiType: string
openaiApiVersion: string
openaiDeploymentName: string
openaiOrganization: string
palmConfig:
palmApiKey: string
provider: string
task: string
instanceProfileArn: string
maxProvisionedThroughput: 0
minProvisionedThroughput: 0
name: string
scaleToZeroEnabled: false
workloadSize: string
workloadType: string
trafficConfig:
routes:
- servedModelName: string
trafficPercentage: 0
name: string
rateLimits:
- calls: 0
key: string
renewalPeriod: string
routeOptimized: false
tags:
- key: string
value: string
ModelServing Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
The ModelServing resource accepts the following input properties:
- Config
Model
Serving Config - The model serving endpoint configuration.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Rate
Limits List<ModelServing Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- Config
Model
Serving Config Args - The model serving endpoint configuration.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Rate
Limits []ModelServing Rate Limit Args - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- []Model
Serving Tag Args - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config - The model serving endpoint configuration.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits List<ModelServing Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config - The model serving endpoint configuration.
- name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits ModelServing Rate Limit[] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- Model
Serving Tag[] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config Args - The model serving endpoint configuration.
- name str
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate_
limits Sequence[ModelServing Rate Limit Args] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route_
optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- Sequence[Model
Serving Tag Args] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config Property Map
- The model serving endpoint configuration.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits List<Property Map> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- List<Property Map>
- Tags to be attached to the serving endpoint and automatically propagated to billing logs.
Outputs
All input properties are implicitly available as output properties. Additionally, the ModelServing resource produces the following output properties:
- Id string
- The provider-assigned unique ID for this managed resource.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Id string
- The provider-assigned unique ID for this managed resource.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id String
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id string
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id str
- The provider-assigned unique ID for this managed resource.
- serving_
endpoint_ strid - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id String
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
Look up Existing ModelServing Resource
Get an existing ModelServing resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: ModelServingState, opts?: CustomResourceOptions): ModelServing
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
config: Optional[ModelServingConfigArgs] = None,
name: Optional[str] = None,
rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
route_optimized: Optional[bool] = None,
serving_endpoint_id: Optional[str] = None,
tags: Optional[Sequence[ModelServingTagArgs]] = None) -> ModelServing
func GetModelServing(ctx *Context, name string, id IDInput, state *ModelServingState, opts ...ResourceOption) (*ModelServing, error)
public static ModelServing Get(string name, Input<string> id, ModelServingState? state, CustomResourceOptions? opts = null)
public static ModelServing get(String name, Output<String> id, ModelServingState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Config
Model
Serving Config - The model serving endpoint configuration.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Rate
Limits List<ModelServing Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- Config
Model
Serving Config Args - The model serving endpoint configuration.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- Rate
Limits []ModelServing Rate Limit Args - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- []Model
Serving Tag Args - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config - The model serving endpoint configuration.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits List<ModelServing Rate Limit> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config - The model serving endpoint configuration.
- name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits ModelServing Rate Limit[] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Model
Serving Tag[] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config Args - The model serving endpoint configuration.
- name str
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate_
limits Sequence[ModelServing Rate Limit Args] - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route_
optimized bool - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- serving_
endpoint_ strid - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Sequence[Model
Serving Tag Args] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config Property Map
- The model serving endpoint configuration.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
- rate
Limits List<Property Map> - A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Property Map>
- Tags to be attached to the serving endpoint and automatically propagated to billing logs.
Supporting Types
ModelServingConfig, ModelServingConfigArgs
- Auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- Served
Entities List<ModelServing Config Served Entity> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- Served
Models List<ModelServing Config Served Model> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- Traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- Auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- Served
Entities []ModelServing Config Served Entity - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- Served
Models []ModelServing Config Served Model - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- Traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities List<ModelServing Config Served Entity> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models List<ModelServing Config Served Model> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities ModelServing Config Served Entity[] - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models ModelServing Config Served Model[] - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto_
capture_ Modelconfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served_
entities Sequence[ModelServing Config Served Entity] - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served_
models Sequence[ModelServing Config Served Model] - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic_
config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture Property MapConfig - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities List<Property Map> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models List<Property Map> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config Property Map - A single block represents the traffic split configuration amongst the served models.
ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog_
name str - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema_
name str - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table_
name_ strprefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs
- Entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - Entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - Environment
Vars Dictionary<string, object> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- External
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - Instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - Scale
To boolZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- Workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - Workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- Entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - Entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - Environment
Vars map[string]interface{} - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- External
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - Instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - Scale
To boolZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- Workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - Workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name String - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version String - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars Map<String,Object> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile StringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned IntegerThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned IntegerThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To BooleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size String - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type String - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars {[key: string]: any} - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned numberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned numberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To booleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity_
name str - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity_
version str - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment_
vars Mapping[str, Any] - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external_
model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance_
profile_ strarn - ARN of the instance profile that the served entity uses to access AWS resources.
- max_
provisioned_ intthroughput - The maximum tokens per second that the endpoint can scale up to.
- min_
provisioned_ intthroughput - The minimum tokens per second that the endpoint can scale down to.
- name str
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale_
to_ boolzero_ enabled - Whether the compute resources for the served entity should scale down to zero.
- workload_
size str - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload_
type str - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name String - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version String - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars Map<Any> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model Property Map - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. For an existing endpoint withexternal_model
, it can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile StringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned NumberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned NumberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To BooleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size String - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type String - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs
- Name string
- The name of the external model.
- Provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - Task string
- The task type of the external model.
- Ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - Amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Cohere
Config ModelServing Config Served Entity External Model Cohere Config - Databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Openai
Config ModelServing Config Served Entity External Model Openai Config - Palm
Config ModelServing Config Served Entity External Model Palm Config
- Name string
- The name of the external model.
- Provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - Task string
- The task type of the external model.
- Ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - Amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Cohere
Config ModelServing Config Served Entity External Model Cohere Config - Databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Openai
Config ModelServing Config Served Entity External Model Openai Config - Palm
Config ModelServing Config Served Entity External Model Palm Config
- name String
- The name of the external model.
- provider String
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - task String
- The task type of the external model.
- ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - cohere
Config ModelServing Config Served Entity External Model Cohere Config - databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - openai
Config ModelServing Config Served Entity External Model Openai Config - palm
Config ModelServing Config Served Entity External Model Palm Config
- name string
- The name of the external model.
- provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - task string
- The task type of the external model.
- ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - cohere
Config ModelServing Config Served Entity External Model Cohere Config - databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - openai
Config ModelServing Config Served Entity External Model Openai Config - palm
Config ModelServing Config Served Entity External Model Palm Config
- name str
- The name of the external model.
- provider str
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - task str
- The task type of the external model.
- ai21labs_
config ModelServing Config Served Entity External Model Ai21labs Config - amazon_
bedrock_ Modelconfig Serving Config Served Entity External Model Amazon Bedrock Config - anthropic_
config ModelServing Config Served Entity External Model Anthropic Config - cohere_
config ModelServing Config Served Entity External Model Cohere Config - databricks_
model_ Modelserving_ config Serving Config Served Entity External Model Databricks Model Serving Config - openai_
config ModelServing Config Served Entity External Model Openai Config - palm_
config ModelServing Config Served Entity External Model Palm Config
- name String
- The name of the external model.
- provider String
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,openai
, andpalm
. - task String
- The task type of the external model.
- ai21labs
Config Property Map - amazon
Bedrock Property MapConfig - anthropic
Config Property Map - cohere
Config Property Map - databricks
Model Property MapServing Config - openai
Config Property Map - palm
Config Property Map
ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
- Ai21labs
Api stringKey
- Ai21labs
Api stringKey
- ai21labs
Api StringKey
- ai21labs
Api stringKey
- ai21labs_
api_ strkey
- ai21labs
Api StringKey
ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
- Aws
Access stringKey Id - Aws
Region string - Aws
Secret stringAccess Key - Bedrock
Provider string
- Aws
Access stringKey Id - Aws
Region string - Aws
Secret stringAccess Key - Bedrock
Provider string
- aws
Access StringKey Id - aws
Region String - aws
Secret StringAccess Key - bedrock
Provider String
- aws
Access stringKey Id - aws
Region string - aws
Secret stringAccess Key - bedrock
Provider string
- aws_
access_ strkey_ id - aws_
region str - aws_
secret_ straccess_ key - bedrock_
provider str
- aws
Access StringKey Id - aws
Region String - aws
Secret StringAccess Key - bedrock
Provider String
ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
- Anthropic
Api stringKey
- Anthropic
Api stringKey
- anthropic
Api StringKey
- anthropic
Api stringKey
- anthropic
Api StringKey
ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs
- Cohere
Api stringKey
- Cohere
Api stringKey
- cohere
Api StringKey
- cohere
Api stringKey
- cohere_
api_ strkey
- cohere
Api StringKey
ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
- Databricks
Api stringToken - Databricks
Workspace stringUrl
- Databricks
Api stringToken - Databricks
Workspace stringUrl
- databricks
Api StringToken - databricks
Workspace StringUrl
- databricks
Api stringToken - databricks
Workspace stringUrl
- databricks
Api StringToken - databricks
Workspace StringUrl
ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
- Microsoft
Entra stringClient Id - Microsoft
Entra stringClient Secret - Microsoft
Entra stringTenant Id - Openai
Api stringBase - Openai
Api stringKey - Openai
Api stringType - Openai
Api stringVersion - Openai
Deployment stringName - Openai
Organization string
- Microsoft
Entra stringClient Id - Microsoft
Entra stringClient Secret - Microsoft
Entra stringTenant Id - Openai
Api stringBase - Openai
Api stringKey - Openai
Api stringType - Openai
Api stringVersion - Openai
Deployment stringName - Openai
Organization string
- microsoft
Entra StringClient Id - microsoft
Entra StringClient Secret - microsoft
Entra StringTenant Id - openai
Api StringBase - openai
Api StringKey - openai
Api StringType - openai
Api StringVersion - openai
Deployment StringName - openai
Organization String
- microsoft
Entra stringClient Id - microsoft
Entra stringClient Secret - microsoft
Entra stringTenant Id - openai
Api stringBase - openai
Api stringKey - openai
Api stringType - openai
Api stringVersion - openai
Deployment stringName - openai
Organization string
- microsoft
Entra StringClient Id - microsoft
Entra StringClient Secret - microsoft
Entra StringTenant Id - openai
Api StringBase - openai
Api StringKey - openai
Api StringType - openai
Api StringVersion - openai
Deployment StringName - openai
Organization String
ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs
- Palm
Api stringKey
- Palm
Api stringKey
- palm
Api StringKey
- palm
Api stringKey
- palm_
api_ strkey
- palm
Api StringKey
ModelServingConfigServedModel, ModelServingConfigServedModelArgs
- Model
Name string - The name of the model in Databricks Model Registry to be served.
- Model
Version string - The version of the model in Databricks Model Registry to be served.
- Workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - Environment
Vars Dictionary<string, object> - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - Instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- Name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - Scale
To boolZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - Workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- Model
Name string - The name of the model in Databricks Model Registry to be served.
- Model
Version string - The version of the model in Databricks Model Registry to be served.
- Workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - Environment
Vars map[string]interface{} - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - Instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- Name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - Scale
To boolZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - Workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- model
Name String - The name of the model in Databricks Model Registry to be served.
- model
Version String - The version of the model in Databricks Model Registry to be served.
- workload
Size String - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - environment
Vars Map<String,Object> - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile StringArn - ARN of the instance profile that the served model will use to access AWS resources.
- name String
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To BooleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Type String - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- model
Name string - The name of the model in Databricks Model Registry to be served.
- model
Version string - The version of the model in Databricks Model Registry to be served.
- workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - environment
Vars {[key: string]: any} - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To booleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- model_
name str - The name of the model in Databricks Model Registry to be served.
- model_
version str - The version of the model in Databricks Model Registry to be served.
- workload_
size str - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - environment_
vars Mapping[str, Any] - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance_
profile_ strarn - ARN of the instance profile that the served model will use to access AWS resources.
- name str
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale_
to_ boolzero_ enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload_
type str - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
- model
Name String - The name of the model in Databricks Model Registry to be served.
- model
Version String - The version of the model in Databricks Model Registry to be served.
- workload
Size String - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - environment
Vars Map<Any> - a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile StringArn - ARN of the instance profile that the served model will use to access AWS resources.
- name String
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To BooleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Type String - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See documentation for all options. The default value isCPU
.
ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs
- Routes
List<Model
Serving Config Traffic Config Route> - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- Routes
[]Model
Serving Config Traffic Config Route - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
List<Model
Serving Config Traffic Config Route> - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
Model
Serving Config Traffic Config Route[] - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
Sequence[Model
Serving Config Traffic Config Route] - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes List<Property Map>
- Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs
- Served
Model stringName - Traffic
Percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- Served
Model stringName - Traffic
Percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model StringName - traffic
Percentage Integer - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model stringName - traffic
Percentage number - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served_
model_ strname - traffic_
percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model StringName - traffic
Percentage Number - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
ModelServingRateLimit, ModelServingRateLimitArgs
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Integer
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal_
period str - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key str
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
ModelServingTag, ModelServingTagArgs
Import
The model serving resource can be imported using the name of the endpoint.
bash
$ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- databricks pulumi/pulumi-databricks
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
databricks
Terraform Provider.