Databricks v1.46.1, Jun 28 24

Databricks v1.46.1 published on Friday, Jun 28, 2024 by Pulumi

databricks.ModelServing

Explore with Pulumi AI

Databricks v1.46.1 published on Friday, Jun 28, 2024 by Pulumi

Example Usage

import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";

const _this = new databricks.ModelServing("this", {
    name: "ads-serving-endpoint",
    config: {
        servedEntities: [
            {
                name: "prod_model",
                entityName: "ads-model",
                entityVersion: "2",
                workloadSize: "Small",
                scaleToZeroEnabled: true,
            },
            {
                name: "candidate_model",
                entityName: "ads-model",
                entityVersion: "4",
                workloadSize: "Small",
                scaleToZeroEnabled: false,
            },
        ],
        trafficConfig: {
            routes: [
                {
                    servedModelName: "prod_model",
                    trafficPercentage: 90,
                },
                {
                    servedModelName: "candidate_model",
                    trafficPercentage: 10,
                },
            ],
        },
    },
});

import pulumi
import pulumi_databricks as databricks

this = databricks.ModelServing("this",
    name="ads-serving-endpoint",
    config=databricks.ModelServingConfigArgs(
        served_entities=[
            databricks.ModelServingConfigServedEntityArgs(
                name="prod_model",
                entity_name="ads-model",
                entity_version="2",
                workload_size="Small",
                scale_to_zero_enabled=True,
            ),
            databricks.ModelServingConfigServedEntityArgs(
                name="candidate_model",
                entity_name="ads-model",
                entity_version="4",
                workload_size="Small",
                scale_to_zero_enabled=False,
            ),
        ],
        traffic_config=databricks.ModelServingConfigTrafficConfigArgs(
            routes=[
                databricks.ModelServingConfigTrafficConfigRouteArgs(
                    served_model_name="prod_model",
                    traffic_percentage=90,
                ),
                databricks.ModelServingConfigTrafficConfigRouteArgs(
                    served_model_name="candidate_model",
                    traffic_percentage=10,
                ),
            ],
        ),
    ))

package main

import (
	"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := databricks.NewModelServing(ctx, "this", &databricks.ModelServingArgs{
			Name: pulumi.String("ads-serving-endpoint"),
			Config: &databricks.ModelServingConfigArgs{
				ServedEntities: databricks.ModelServingConfigServedEntityArray{
					&databricks.ModelServingConfigServedEntityArgs{
						Name:               pulumi.String("prod_model"),
						EntityName:         pulumi.String("ads-model"),
						EntityVersion:      pulumi.String("2"),
						WorkloadSize:       pulumi.String("Small"),
						ScaleToZeroEnabled: pulumi.Bool(true),
					},
					&databricks.ModelServingConfigServedEntityArgs{
						Name:               pulumi.String("candidate_model"),
						EntityName:         pulumi.String("ads-model"),
						EntityVersion:      pulumi.String("4"),
						WorkloadSize:       pulumi.String("Small"),
						ScaleToZeroEnabled: pulumi.Bool(false),
					},
				},
				TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
					Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
						&databricks.ModelServingConfigTrafficConfigRouteArgs{
							ServedModelName:   pulumi.String("prod_model"),
							TrafficPercentage: pulumi.Int(90),
						},
						&databricks.ModelServingConfigTrafficConfigRouteArgs{
							ServedModelName:   pulumi.String("candidate_model"),
							TrafficPercentage: pulumi.Int(10),
						},
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;

return await Deployment.RunAsync(() => 
{
    var @this = new Databricks.ModelServing("this", new()
    {
        Name = "ads-serving-endpoint",
        Config = new Databricks.Inputs.ModelServingConfigArgs
        {
            ServedEntities = new[]
            {
                new Databricks.Inputs.ModelServingConfigServedEntityArgs
                {
                    Name = "prod_model",
                    EntityName = "ads-model",
                    EntityVersion = "2",
                    WorkloadSize = "Small",
                    ScaleToZeroEnabled = true,
                },
                new Databricks.Inputs.ModelServingConfigServedEntityArgs
                {
                    Name = "candidate_model",
                    EntityName = "ads-model",
                    EntityVersion = "4",
                    WorkloadSize = "Small",
                    ScaleToZeroEnabled = false,
                },
            },
            TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
            {
                Routes = new[]
                {
                    new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                    {
                        ServedModelName = "prod_model",
                        TrafficPercentage = 90,
                    },
                    new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                    {
                        ServedModelName = "candidate_model",
                        TrafficPercentage = 10,
                    },
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.ModelServing;
import com.pulumi.databricks.ModelServingArgs;
import com.pulumi.databricks.inputs.ModelServingConfigArgs;
import com.pulumi.databricks.inputs.ModelServingConfigTrafficConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var this_ = new ModelServing("this", ModelServingArgs.builder()
            .name("ads-serving-endpoint")
            .config(ModelServingConfigArgs.builder()
                .servedEntities(                
                    ModelServingConfigServedEntityArgs.builder()
                        .name("prod_model")
                        .entityName("ads-model")
                        .entityVersion("2")
                        .workloadSize("Small")
                        .scaleToZeroEnabled(true)
                        .build(),
                    ModelServingConfigServedEntityArgs.builder()
                        .name("candidate_model")
                        .entityName("ads-model")
                        .entityVersion("4")
                        .workloadSize("Small")
                        .scaleToZeroEnabled(false)
                        .build())
                .trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
                    .routes(                    
                        ModelServingConfigTrafficConfigRouteArgs.builder()
                            .servedModelName("prod_model")
                            .trafficPercentage(90)
                            .build(),
                        ModelServingConfigTrafficConfigRouteArgs.builder()
                            .servedModelName("candidate_model")
                            .trafficPercentage(10)
                            .build())
                    .build())
                .build())
            .build());

    }
}

resources:
  this:
    type: databricks:ModelServing
    properties:
      name: ads-serving-endpoint
      config:
        servedEntities:
          - name: prod_model
            entityName: ads-model
            entityVersion: '2'
            workloadSize: Small
            scaleToZeroEnabled: true
          - name: candidate_model
            entityName: ads-model
            entityVersion: '4'
            workloadSize: Small
            scaleToZeroEnabled: false
        trafficConfig:
          routes:
            - servedModelName: prod_model
              trafficPercentage: 90
            - servedModelName: candidate_model
              trafficPercentage: 10

Access Control

databricks.Permissions can control which groups or individual users can Manage, Query or View individual serving endpoints.

The following resources are often used in the same context:

databricks.RegisteredModel to create Models in Unity Catalog in Databricks.
End to end workspace management guide.
databricks.Directory to manage directories in Databricks Workspace.
databricks.MlflowModel to create models in the workspace model registry in Databricks.
databricks.Notebook to manage Databricks Notebooks.
databricks.Notebook data to export a notebook from Databricks Workspace.
databricks.Repo to manage Databricks Repos.

Create ModelServing Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new ModelServing(name: string, args: ModelServingArgs, opts?: CustomResourceOptions);

@overload
def ModelServing(resource_name: str,
                 args: ModelServingArgs,
                 opts: Optional[ResourceOptions] = None)

@overload
def ModelServing(resource_name: str,
                 opts: Optional[ResourceOptions] = None,
                 config: Optional[ModelServingConfigArgs] = None,
                 name: Optional[str] = None,
                 rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
                 route_optimized: Optional[bool] = None,
                 tags: Optional[Sequence[ModelServingTagArgs]] = None)

func NewModelServing(ctx *Context, name string, args ModelServingArgs, opts ...ResourceOption) (*ModelServing, error)

public ModelServing(string name, ModelServingArgs args, CustomResourceOptions? opts = null)

public ModelServing(String name, ModelServingArgs args)
public ModelServing(String name, ModelServingArgs args, CustomResourceOptions options)

type: databricks:ModelServing
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

Constructor example

The following reference example uses placeholder values for all input properties.

var modelServingResource = new Databricks.ModelServing("modelServingResource", new()
{
    Config = new Databricks.Inputs.ModelServingConfigArgs
    {
        AutoCaptureConfig = new Databricks.Inputs.ModelServingConfigAutoCaptureConfigArgs
        {
            CatalogName = "string",
            Enabled = false,
            SchemaName = "string",
            TableNamePrefix = "string",
        },
        ServedEntities = new[]
        {
            new Databricks.Inputs.ModelServingConfigServedEntityArgs
            {
                EntityName = "string",
                EntityVersion = "string",
                EnvironmentVars = 
                {
                    { "string", "any" },
                },
                ExternalModel = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelArgs
                {
                    Name = "string",
                    Provider = "string",
                    Task = "string",
                    Ai21labsConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
                    {
                        Ai21labsApiKey = "string",
                    },
                    AmazonBedrockConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
                    {
                        AwsAccessKeyId = "string",
                        AwsRegion = "string",
                        AwsSecretAccessKey = "string",
                        BedrockProvider = "string",
                    },
                    AnthropicConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
                    {
                        AnthropicApiKey = "string",
                    },
                    CohereConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelCohereConfigArgs
                    {
                        CohereApiKey = "string",
                    },
                    DatabricksModelServingConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
                    {
                        DatabricksApiToken = "string",
                        DatabricksWorkspaceUrl = "string",
                    },
                    OpenaiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
                    {
                        MicrosoftEntraClientId = "string",
                        MicrosoftEntraClientSecret = "string",
                        MicrosoftEntraTenantId = "string",
                        OpenaiApiBase = "string",
                        OpenaiApiKey = "string",
                        OpenaiApiType = "string",
                        OpenaiApiVersion = "string",
                        OpenaiDeploymentName = "string",
                        OpenaiOrganization = "string",
                    },
                    PalmConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelPalmConfigArgs
                    {
                        PalmApiKey = "string",
                    },
                },
                InstanceProfileArn = "string",
                MaxProvisionedThroughput = 0,
                MinProvisionedThroughput = 0,
                Name = "string",
                ScaleToZeroEnabled = false,
                WorkloadSize = "string",
                WorkloadType = "string",
            },
        },
        TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
        {
            Routes = new[]
            {
                new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                {
                    ServedModelName = "string",
                    TrafficPercentage = 0,
                },
            },
        },
    },
    Name = "string",
    RateLimits = new[]
    {
        new Databricks.Inputs.ModelServingRateLimitArgs
        {
            Calls = 0,
            RenewalPeriod = "string",
            Key = "string",
        },
    },
    RouteOptimized = false,
    Tags = new[]
    {
        new Databricks.Inputs.ModelServingTagArgs
        {
            Key = "string",
            Value = "string",
        },
    },
});

example, err := databricks.NewModelServing(ctx, "modelServingResource", &databricks.ModelServingArgs{
	Config: &databricks.ModelServingConfigArgs{
		AutoCaptureConfig: &databricks.ModelServingConfigAutoCaptureConfigArgs{
			CatalogName:     pulumi.String("string"),
			Enabled:         pulumi.Bool(false),
			SchemaName:      pulumi.String("string"),
			TableNamePrefix: pulumi.String("string"),
		},
		ServedEntities: databricks.ModelServingConfigServedEntityArray{
			&databricks.ModelServingConfigServedEntityArgs{
				EntityName:    pulumi.String("string"),
				EntityVersion: pulumi.String("string"),
				EnvironmentVars: pulumi.Map{
					"string": pulumi.Any("any"),
				},
				ExternalModel: &databricks.ModelServingConfigServedEntityExternalModelArgs{
					Name:     pulumi.String("string"),
					Provider: pulumi.String("string"),
					Task:     pulumi.String("string"),
					Ai21labsConfig: &databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs{
						Ai21labsApiKey: pulumi.String("string"),
					},
					AmazonBedrockConfig: &databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs{
						AwsAccessKeyId:     pulumi.String("string"),
						AwsRegion:          pulumi.String("string"),
						AwsSecretAccessKey: pulumi.String("string"),
						BedrockProvider:    pulumi.String("string"),
					},
					AnthropicConfig: &databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs{
						AnthropicApiKey: pulumi.String("string"),
					},
					CohereConfig: &databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs{
						CohereApiKey: pulumi.String("string"),
					},
					DatabricksModelServingConfig: &databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs{
						DatabricksApiToken:     pulumi.String("string"),
						DatabricksWorkspaceUrl: pulumi.String("string"),
					},
					OpenaiConfig: &databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs{
						MicrosoftEntraClientId:     pulumi.String("string"),
						MicrosoftEntraClientSecret: pulumi.String("string"),
						MicrosoftEntraTenantId:     pulumi.String("string"),
						OpenaiApiBase:              pulumi.String("string"),
						OpenaiApiKey:               pulumi.String("string"),
						OpenaiApiType:              pulumi.String("string"),
						OpenaiApiVersion:           pulumi.String("string"),
						OpenaiDeploymentName:       pulumi.String("string"),
						OpenaiOrganization:         pulumi.String("string"),
					},
					PalmConfig: &databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs{
						PalmApiKey: pulumi.String("string"),
					},
				},
				InstanceProfileArn:       pulumi.String("string"),
				MaxProvisionedThroughput: pulumi.Int(0),
				MinProvisionedThroughput: pulumi.Int(0),
				Name:                     pulumi.String("string"),
				ScaleToZeroEnabled:       pulumi.Bool(false),
				WorkloadSize:             pulumi.String("string"),
				WorkloadType:             pulumi.String("string"),
			},
		},
		TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
			Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
				&databricks.ModelServingConfigTrafficConfigRouteArgs{
					ServedModelName:   pulumi.String("string"),
					TrafficPercentage: pulumi.Int(0),
				},
			},
		},
	},
	Name: pulumi.String("string"),
	RateLimits: databricks.ModelServingRateLimitArray{
		&databricks.ModelServingRateLimitArgs{
			Calls:         pulumi.Int(0),
			RenewalPeriod: pulumi.String("string"),
			Key:           pulumi.String("string"),
		},
	},
	RouteOptimized: pulumi.Bool(false),
	Tags: databricks.ModelServingTagArray{
		&databricks.ModelServingTagArgs{
			Key:   pulumi.String("string"),
			Value: pulumi.String("string"),
		},
	},
})

var modelServingResource = new ModelServing("modelServingResource", ModelServingArgs.builder()
    .config(ModelServingConfigArgs.builder()
        .autoCaptureConfig(ModelServingConfigAutoCaptureConfigArgs.builder()
            .catalogName("string")
            .enabled(false)
            .schemaName("string")
            .tableNamePrefix("string")
            .build())
        .servedEntities(ModelServingConfigServedEntityArgs.builder()
            .entityName("string")
            .entityVersion("string")
            .environmentVars(Map.of("string", "any"))
            .externalModel(ModelServingConfigServedEntityExternalModelArgs.builder()
                .name("string")
                .provider("string")
                .task("string")
                .ai21labsConfig(ModelServingConfigServedEntityExternalModelAi21labsConfigArgs.builder()
                    .ai21labsApiKey("string")
                    .build())
                .amazonBedrockConfig(ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs.builder()
                    .awsAccessKeyId("string")
                    .awsRegion("string")
                    .awsSecretAccessKey("string")
                    .bedrockProvider("string")
                    .build())
                .anthropicConfig(ModelServingConfigServedEntityExternalModelAnthropicConfigArgs.builder()
                    .anthropicApiKey("string")
                    .build())
                .cohereConfig(ModelServingConfigServedEntityExternalModelCohereConfigArgs.builder()
                    .cohereApiKey("string")
                    .build())
                .databricksModelServingConfig(ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs.builder()
                    .databricksApiToken("string")
                    .databricksWorkspaceUrl("string")
                    .build())
                .openaiConfig(ModelServingConfigServedEntityExternalModelOpenaiConfigArgs.builder()
                    .microsoftEntraClientId("string")
                    .microsoftEntraClientSecret("string")
                    .microsoftEntraTenantId("string")
                    .openaiApiBase("string")
                    .openaiApiKey("string")
                    .openaiApiType("string")
                    .openaiApiVersion("string")
                    .openaiDeploymentName("string")
                    .openaiOrganization("string")
                    .build())
                .palmConfig(ModelServingConfigServedEntityExternalModelPalmConfigArgs.builder()
                    .palmApiKey("string")
                    .build())
                .build())
            .instanceProfileArn("string")
            .maxProvisionedThroughput(0)
            .minProvisionedThroughput(0)
            .name("string")
            .scaleToZeroEnabled(false)
            .workloadSize("string")
            .workloadType("string")
            .build())
        .trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
            .routes(ModelServingConfigTrafficConfigRouteArgs.builder()
                .servedModelName("string")
                .trafficPercentage(0)
                .build())
            .build())
        .build())
    .name("string")
    .rateLimits(ModelServingRateLimitArgs.builder()
        .calls(0)
        .renewalPeriod("string")
        .key("string")
        .build())
    .routeOptimized(false)
    .tags(ModelServingTagArgs.builder()
        .key("string")
        .value("string")
        .build())
    .build());

model_serving_resource = databricks.ModelServing("modelServingResource",
    config=databricks.ModelServingConfigArgs(
        auto_capture_config=databricks.ModelServingConfigAutoCaptureConfigArgs(
            catalog_name="string",
            enabled=False,
            schema_name="string",
            table_name_prefix="string",
        ),
        served_entities=[databricks.ModelServingConfigServedEntityArgs(
            entity_name="string",
            entity_version="string",
            environment_vars={
                "string": "any",
            },
            external_model=databricks.ModelServingConfigServedEntityExternalModelArgs(
                name="string",
                provider="string",
                task="string",
                ai21labs_config=databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs(
                    ai21labs_api_key="string",
                ),
                amazon_bedrock_config=databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs(
                    aws_access_key_id="string",
                    aws_region="string",
                    aws_secret_access_key="string",
                    bedrock_provider="string",
                ),
                anthropic_config=databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs(
                    anthropic_api_key="string",
                ),
                cohere_config=databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs(
                    cohere_api_key="string",
                ),
                databricks_model_serving_config=databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs(
                    databricks_api_token="string",
                    databricks_workspace_url="string",
                ),
                openai_config=databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs(
                    microsoft_entra_client_id="string",
                    microsoft_entra_client_secret="string",
                    microsoft_entra_tenant_id="string",
                    openai_api_base="string",
                    openai_api_key="string",
                    openai_api_type="string",
                    openai_api_version="string",
                    openai_deployment_name="string",
                    openai_organization="string",
                ),
                palm_config=databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs(
                    palm_api_key="string",
                ),
            ),
            instance_profile_arn="string",
            max_provisioned_throughput=0,
            min_provisioned_throughput=0,
            name="string",
            scale_to_zero_enabled=False,
            workload_size="string",
            workload_type="string",
        )],
        traffic_config=databricks.ModelServingConfigTrafficConfigArgs(
            routes=[databricks.ModelServingConfigTrafficConfigRouteArgs(
                served_model_name="string",
                traffic_percentage=0,
            )],
        ),
    ),
    name="string",
    rate_limits=[databricks.ModelServingRateLimitArgs(
        calls=0,
        renewal_period="string",
        key="string",
    )],
    route_optimized=False,
    tags=[databricks.ModelServingTagArgs(
        key="string",
        value="string",
    )])

const modelServingResource = new databricks.ModelServing("modelServingResource", {
    config: {
        autoCaptureConfig: {
            catalogName: "string",
            enabled: false,
            schemaName: "string",
            tableNamePrefix: "string",
        },
        servedEntities: [{
            entityName: "string",
            entityVersion: "string",
            environmentVars: {
                string: "any",
            },
            externalModel: {
                name: "string",
                provider: "string",
                task: "string",
                ai21labsConfig: {
                    ai21labsApiKey: "string",
                },
                amazonBedrockConfig: {
                    awsAccessKeyId: "string",
                    awsRegion: "string",
                    awsSecretAccessKey: "string",
                    bedrockProvider: "string",
                },
                anthropicConfig: {
                    anthropicApiKey: "string",
                },
                cohereConfig: {
                    cohereApiKey: "string",
                },
                databricksModelServingConfig: {
                    databricksApiToken: "string",
                    databricksWorkspaceUrl: "string",
                },
                openaiConfig: {
                    microsoftEntraClientId: "string",
                    microsoftEntraClientSecret: "string",
                    microsoftEntraTenantId: "string",
                    openaiApiBase: "string",
                    openaiApiKey: "string",
                    openaiApiType: "string",
                    openaiApiVersion: "string",
                    openaiDeploymentName: "string",
                    openaiOrganization: "string",
                },
                palmConfig: {
                    palmApiKey: "string",
                },
            },
            instanceProfileArn: "string",
            maxProvisionedThroughput: 0,
            minProvisionedThroughput: 0,
            name: "string",
            scaleToZeroEnabled: false,
            workloadSize: "string",
            workloadType: "string",
        }],
        trafficConfig: {
            routes: [{
                servedModelName: "string",
                trafficPercentage: 0,
            }],
        },
    },
    name: "string",
    rateLimits: [{
        calls: 0,
        renewalPeriod: "string",
        key: "string",
    }],
    routeOptimized: false,
    tags: [{
        key: "string",
        value: "string",
    }],
});

type: databricks:ModelServing
properties:
    config:
        autoCaptureConfig:
            catalogName: string
            enabled: false
            schemaName: string
            tableNamePrefix: string
        servedEntities:
            - entityName: string
              entityVersion: string
              environmentVars:
                string: any
              externalModel:
                ai21labsConfig:
                    ai21labsApiKey: string
                amazonBedrockConfig:
                    awsAccessKeyId: string
                    awsRegion: string
                    awsSecretAccessKey: string
                    bedrockProvider: string
                anthropicConfig:
                    anthropicApiKey: string
                cohereConfig:
                    cohereApiKey: string
                databricksModelServingConfig:
                    databricksApiToken: string
                    databricksWorkspaceUrl: string
                name: string
                openaiConfig:
                    microsoftEntraClientId: string
                    microsoftEntraClientSecret: string
                    microsoftEntraTenantId: string
                    openaiApiBase: string
                    openaiApiKey: string
                    openaiApiType: string
                    openaiApiVersion: string
                    openaiDeploymentName: string
                    openaiOrganization: string
                palmConfig:
                    palmApiKey: string
                provider: string
                task: string
              instanceProfileArn: string
              maxProvisionedThroughput: 0
              minProvisionedThroughput: 0
              name: string
              scaleToZeroEnabled: false
              workloadSize: string
              workloadType: string
        trafficConfig:
            routes:
                - servedModelName: string
                  trafficPercentage: 0
    name: string
    rateLimits:
        - calls: 0
          key: string
          renewalPeriod: string
    routeOptimized: false
    tags:
        - key: string
          value: string

ModelServing Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

The ModelServing resource accepts the following input properties:

Config ModelServingConfig: The model serving endpoint configuration.
Name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
RateLimits List<ModelServingRateLimit>: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
RouteOptimized bool: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
Tags List<ModelServingTag>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

Config ModelServingConfigArgs: The model serving endpoint configuration.
Name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
RateLimits []ModelServingRateLimitArgs: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
RouteOptimized bool: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
Tags []ModelServingTagArgs: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfig: The model serving endpoint configuration.
name String: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
rateLimits List<ModelServingRateLimit>: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
routeOptimized Boolean: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
tags List<ModelServingTag>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfig: The model serving endpoint configuration.
name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
rateLimits ModelServingRateLimit[]: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
routeOptimized boolean: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
tags ModelServingTag[]: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfigArgs: The model serving endpoint configuration.
name str: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
rate_limits Sequence[ModelServingRateLimitArgs]: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
route_optimized bool: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
tags Sequence[ModelServingTagArgs]: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config Property Map: The model serving endpoint configuration.
name String: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
rateLimits List<Property Map>: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
routeOptimized Boolean: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
tags List<Property Map>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

Outputs

All input properties are implicitly available as output properties. Additionally, the ModelServing resource produces the following output properties:

Id string: The provider-assigned unique ID for this managed resource.
ServingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

Id string: The provider-assigned unique ID for this managed resource.
ServingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

id String: The provider-assigned unique ID for this managed resource.
servingEndpointId String: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

id string: The provider-assigned unique ID for this managed resource.
servingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

id str: The provider-assigned unique ID for this managed resource.
serving_endpoint_id str: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

id String: The provider-assigned unique ID for this managed resource.
servingEndpointId String: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

Look up Existing ModelServing Resource

Get an existing ModelServing resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: ModelServingState, opts?: CustomResourceOptions): ModelServing

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        config: Optional[ModelServingConfigArgs] = None,
        name: Optional[str] = None,
        rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
        route_optimized: Optional[bool] = None,
        serving_endpoint_id: Optional[str] = None,
        tags: Optional[Sequence[ModelServingTagArgs]] = None) -> ModelServing

func GetModelServing(ctx *Context, name string, id IDInput, state *ModelServingState, opts ...ResourceOption) (*ModelServing, error)

public static ModelServing Get(string name, Input<string> id, ModelServingState? state, CustomResourceOptions? opts = null)

public static ModelServing get(String name, Output<String> id, ModelServingState state, CustomResourceOptions options)

Resource lookup is not supported in YAML

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

Config ModelServingConfig: The model serving endpoint configuration.
Name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
RateLimits List<ModelServingRateLimit>: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
RouteOptimized bool: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
ServingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
Tags List<ModelServingTag>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

Config ModelServingConfigArgs: The model serving endpoint configuration.
Name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
RateLimits []ModelServingRateLimitArgs: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
RouteOptimized bool: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
ServingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
Tags []ModelServingTagArgs: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfig: The model serving endpoint configuration.
name String: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
rateLimits List<ModelServingRateLimit>: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
routeOptimized Boolean: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
servingEndpointId String: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
tags List<ModelServingTag>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfig: The model serving endpoint configuration.
name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
rateLimits ModelServingRateLimit[]: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
routeOptimized boolean: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
servingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
tags ModelServingTag[]: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfigArgs: The model serving endpoint configuration.
name str: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
rate_limits Sequence[ModelServingRateLimitArgs]: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
route_optimized bool: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
serving_endpoint_id str: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
tags Sequence[ModelServingTagArgs]: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config Property Map: The model serving endpoint configuration.
name String: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the update name.
rateLimits List<Property Map>: A list of rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
routeOptimized Boolean: A boolean enabling route optimization for the endpoint. NOTE: only available for custom models.
servingEndpointId String: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
tags List<Property Map>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

Supporting Types

ModelServingConfig, ModelServingConfigArgs

AutoCaptureConfig ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
ServedEntities List<ModelServingConfigServedEntity>: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
ServedModels List<ModelServingConfigServedModel>: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
TrafficConfig ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

AutoCaptureConfig ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
ServedEntities []ModelServingConfigServedEntity: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
ServedModels []ModelServingConfigServedModel: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
TrafficConfig ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

autoCaptureConfig ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
servedEntities List<ModelServingConfigServedEntity>: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
servedModels List<ModelServingConfigServedModel>: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
trafficConfig ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

autoCaptureConfig ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
servedEntities ModelServingConfigServedEntity[]: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
servedModels ModelServingConfigServedModel[]: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
trafficConfig ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

auto_capture_config ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
served_entities Sequence[ModelServingConfigServedEntity]: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
served_models Sequence[ModelServingConfigServedModel]: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
traffic_config ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

autoCaptureConfig Property Map: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
servedEntities List<Property Map>: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
servedModels List<Property Map>: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
trafficConfig Property Map: A single block represents the traffic split configuration amongst the served models.

ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs

CatalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
Enabled bool: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
SchemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
TableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

CatalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
Enabled bool: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
SchemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
TableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName String: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled Boolean: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
schemaName String: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix String: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled boolean: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
schemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalog_name str: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled bool: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
schema_name str: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
table_name_prefix str: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName String: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled Boolean: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.
schemaName String: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix String: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs

EntityName string: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
EntityVersion string: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
EnvironmentVars Dictionary<string, object>: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
ExternalModel ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
InstanceProfileArn string: ARN of the instance profile that the served entity uses to access AWS resources.
MaxProvisionedThroughput int: The maximum tokens per second that the endpoint can scale up to.
MinProvisionedThroughput int: The minimum tokens per second that the endpoint can scale down to.
Name string: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
ScaleToZeroEnabled bool: Whether the compute resources for the served entity should scale down to zero.
WorkloadSize string: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
WorkloadType string: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

EntityName string: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
EntityVersion string: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
EnvironmentVars map[string]interface{}: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
ExternalModel ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
InstanceProfileArn string: ARN of the instance profile that the served entity uses to access AWS resources.
MaxProvisionedThroughput int: The maximum tokens per second that the endpoint can scale up to.
MinProvisionedThroughput int: The minimum tokens per second that the endpoint can scale down to.
Name string: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
ScaleToZeroEnabled bool: Whether the compute resources for the served entity should scale down to zero.
WorkloadSize string: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
WorkloadType string: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

entityName String: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
entityVersion String: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
environmentVars Map<String,Object>: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
externalModel ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
instanceProfileArn String: ARN of the instance profile that the served entity uses to access AWS resources.
maxProvisionedThroughput Integer: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput Integer: The minimum tokens per second that the endpoint can scale down to.
name String: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
scaleToZeroEnabled Boolean: Whether the compute resources for the served entity should scale down to zero.
workloadSize String: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
workloadType String: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

entityName string: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
entityVersion string: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
environmentVars {[key: string]: any}: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
externalModel ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
instanceProfileArn string: ARN of the instance profile that the served entity uses to access AWS resources.
maxProvisionedThroughput number: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput number: The minimum tokens per second that the endpoint can scale down to.
name string: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
scaleToZeroEnabled boolean: Whether the compute resources for the served entity should scale down to zero.
workloadSize string: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
workloadType string: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

entity_name str: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
entity_version str: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
environment_vars Mapping[str, Any]: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
external_model ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
instance_profile_arn str: ARN of the instance profile that the served entity uses to access AWS resources.
max_provisioned_throughput int: The maximum tokens per second that the endpoint can scale up to.
min_provisioned_throughput int: The minimum tokens per second that the endpoint can scale down to.
name str: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
scale_to_zero_enabled bool: Whether the compute resources for the served entity should scale down to zero.
workload_size str: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
workload_type str: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

entityName String: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
entityVersion String: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
environmentVars Map<Any>: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
externalModel Property Map: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. For an existing endpoint with external_model, it can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
instanceProfileArn String: ARN of the instance profile that the served entity uses to access AWS resources.
maxProvisionedThroughput Number: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput Number: The minimum tokens per second that the endpoint can scale down to.
name String: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
scaleToZeroEnabled Boolean: Whether the compute resources for the served entity should scale down to zero.
workloadSize String: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
workloadType String: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs

Name string: The name of the external model.
Provider string: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
Task string: The task type of the external model.
Ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig
AmazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
AnthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig
CohereConfig ModelServingConfigServedEntityExternalModelCohereConfig
DatabricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
OpenaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig
PalmConfig ModelServingConfigServedEntityExternalModelPalmConfig

Name string: The name of the external model.
Provider string: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
Task string: The task type of the external model.
Ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig
AmazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
AnthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig
CohereConfig ModelServingConfigServedEntityExternalModelCohereConfig
DatabricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
OpenaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig
PalmConfig ModelServingConfigServedEntityExternalModelPalmConfig

name String: The name of the external model.
provider String: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
task String: The task type of the external model.
ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig
amazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
anthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig
cohereConfig ModelServingConfigServedEntityExternalModelCohereConfig
databricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
openaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig
palmConfig ModelServingConfigServedEntityExternalModelPalmConfig

name string: The name of the external model.
provider string: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
task string: The task type of the external model.
ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig
amazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
anthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig
cohereConfig ModelServingConfigServedEntityExternalModelCohereConfig
databricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
openaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig
palmConfig ModelServingConfigServedEntityExternalModelPalmConfig

name str: The name of the external model.
provider str: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
task str: The task type of the external model.
ai21labs_config ModelServingConfigServedEntityExternalModelAi21labsConfig
amazon_bedrock_config ModelServingConfigServedEntityExternalModelAmazonBedrockConfig
anthropic_config ModelServingConfigServedEntityExternalModelAnthropicConfig
cohere_config ModelServingConfigServedEntityExternalModelCohereConfig
databricks_model_serving_config ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig
openai_config ModelServingConfigServedEntityExternalModelOpenaiConfig
palm_config ModelServingConfigServedEntityExternalModelPalmConfig

name String: The name of the external model.
provider String: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, openai, and palm.
task String: The task type of the external model.
ai21labsConfig Property Map
amazonBedrockConfig Property Map
anthropicConfig Property Map
cohereConfig Property Map
databricksModelServingConfig Property Map
openaiConfig Property Map
palmConfig Property Map

ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs

Ai21labsApiKey string

Ai21labsApiKey string

ai21labsApiKey String

ai21labsApiKey string

ai21labs_api_key str

ai21labsApiKey String

ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs

AwsAccessKeyId string
AwsRegion string
AwsSecretAccessKey string
BedrockProvider string

AwsAccessKeyId string
AwsRegion string
AwsSecretAccessKey string
BedrockProvider string

awsAccessKeyId String
awsRegion String
awsSecretAccessKey String
bedrockProvider String

awsAccessKeyId string
awsRegion string
awsSecretAccessKey string
bedrockProvider string

aws_access_key_id str
aws_region str
aws_secret_access_key str
bedrock_provider str

awsAccessKeyId String
awsRegion String
awsSecretAccessKey String
bedrockProvider String

ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs

AnthropicApiKey string

AnthropicApiKey string

anthropicApiKey String

anthropicApiKey string

anthropic_api_key str

anthropicApiKey String

ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs

CohereApiKey string

CohereApiKey string

cohereApiKey String

cohereApiKey string

cohere_api_key str

cohereApiKey String

ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs

DatabricksApiToken string
DatabricksWorkspaceUrl string

DatabricksApiToken string
DatabricksWorkspaceUrl string

databricksApiToken String
databricksWorkspaceUrl String

databricksApiToken string
databricksWorkspaceUrl string

databricks_api_token str
databricks_workspace_url str

databricksApiToken String
databricksWorkspaceUrl String

ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs

MicrosoftEntraClientId string
MicrosoftEntraClientSecret string
MicrosoftEntraTenantId string
OpenaiApiBase string
OpenaiApiKey string
OpenaiApiType string
OpenaiApiVersion string
OpenaiDeploymentName string
OpenaiOrganization string

MicrosoftEntraClientId string
MicrosoftEntraClientSecret string
MicrosoftEntraTenantId string
OpenaiApiBase string
OpenaiApiKey string
OpenaiApiType string
OpenaiApiVersion string
OpenaiDeploymentName string
OpenaiOrganization string

microsoftEntraClientId String
microsoftEntraClientSecret String
microsoftEntraTenantId String
openaiApiBase String
openaiApiKey String
openaiApiType String
openaiApiVersion String
openaiDeploymentName String
openaiOrganization String

microsoftEntraClientId string
microsoftEntraClientSecret string
microsoftEntraTenantId string
openaiApiBase string
openaiApiKey string
openaiApiType string
openaiApiVersion string
openaiDeploymentName string
openaiOrganization string

microsoft_entra_client_id str
microsoft_entra_client_secret str
microsoft_entra_tenant_id str
openai_api_base str
openai_api_key str
openai_api_type str
openai_api_version str
openai_deployment_name str
openai_organization str

microsoftEntraClientId String
microsoftEntraClientSecret String
microsoftEntraTenantId String
openaiApiBase String
openaiApiKey String
openaiApiType String
openaiApiVersion String
openaiDeploymentName String
openaiOrganization String

ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs

PalmApiKey string

PalmApiKey string

palmApiKey String

palmApiKey string

palm_api_key str

palmApiKey String

ModelServingConfigServedModel, ModelServingConfigServedModelArgs

ModelName string: The name of the model in Databricks Model Registry to be served.
ModelVersion string: The version of the model in Databricks Model Registry to be served.
WorkloadSize string: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
EnvironmentVars Dictionary<string, object>: a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
InstanceProfileArn string: ARN of the instance profile that the served model will use to access AWS resources.
Name string: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
ScaleToZeroEnabled bool: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
WorkloadType string: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.

ModelName string: The name of the model in Databricks Model Registry to be served.
ModelVersion string: The version of the model in Databricks Model Registry to be served.
WorkloadSize string: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
EnvironmentVars map[string]interface{}: a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
InstanceProfileArn string: ARN of the instance profile that the served model will use to access AWS resources.
Name string: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
ScaleToZeroEnabled bool: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
WorkloadType string: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.

modelName String: The name of the model in Databricks Model Registry to be served.
modelVersion String: The version of the model in Databricks Model Registry to be served.
workloadSize String: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
environmentVars Map<String,Object>: a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
instanceProfileArn String: ARN of the instance profile that the served model will use to access AWS resources.
name String: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
scaleToZeroEnabled Boolean: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
workloadType String: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.

modelName string: The name of the model in Databricks Model Registry to be served.
modelVersion string: The version of the model in Databricks Model Registry to be served.
workloadSize string: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
environmentVars {[key: string]: any}: a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
instanceProfileArn string: ARN of the instance profile that the served model will use to access AWS resources.
name string: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
scaleToZeroEnabled boolean: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
workloadType string: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.

model_name str: The name of the model in Databricks Model Registry to be served.
model_version str: The version of the model in Databricks Model Registry to be served.
workload_size str: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
environment_vars Mapping[str, Any]: a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
instance_profile_arn str: ARN of the instance profile that the served model will use to access AWS resources.
name str: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
scale_to_zero_enabled bool: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
workload_type str: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.

modelName String: The name of the model in Databricks Model Registry to be served.
modelVersion String: The version of the model in Databricks Model Registry to be served.
workloadSize String: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
environmentVars Map<Any>: a map of environment variable name/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
instanceProfileArn String: ARN of the instance profile that the served model will use to access AWS resources.
name String: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
scaleToZeroEnabled Boolean: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
workloadType String: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all options. The default value is CPU.

ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs

Routes List<ModelServingConfigTrafficConfigRoute>: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

Routes []ModelServingConfigTrafficConfigRoute: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

routes List<ModelServingConfigTrafficConfigRoute>: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

routes ModelServingConfigTrafficConfigRoute[]: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

routes Sequence[ModelServingConfigTrafficConfigRoute]: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

routes List<Property Map>: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs

ServedModelName string
TrafficPercentage int: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

ServedModelName string
TrafficPercentage int: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName String
trafficPercentage Integer: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName string
trafficPercentage number: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

served_model_name str
traffic_percentage int: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName String
trafficPercentage Number: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

ModelServingRateLimit, ModelServingRateLimitArgs

Calls int: Used to specify how many calls are allowed for a key within the renewal_period.
RenewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
Key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

Calls int: Used to specify how many calls are allowed for a key within the renewal_period.
RenewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
Key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls Integer: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod String: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key String: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls number: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls int: Used to specify how many calls are allowed for a key within the renewal_period.
renewal_period str: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key str: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls Number: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod String: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key String: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

ModelServingTag, ModelServingTagArgs

Key string: The key field for a tag.
Value string: The value field for a tag.

Key string: The key field for a tag.
Value string: The value field for a tag.

key String: The key field for a tag.
value String: The value field for a tag.

key string: The key field for a tag.
value string: The value field for a tag.

key str: The key field for a tag.
value str: The value field for a tag.

key String: The key field for a tag.
value String: The value field for a tag.

Import

The model serving resource can be imported using the name of the endpoint.

bash

$ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: databricks pulumi/pulumi-databricks
License: Apache-2.0
Notes: This Pulumi package is based on the databricks Terraform Provider.

Databricks v1.46.1 published on Friday, Jun 28, 2024 by Pulumi

pulumi/pulumi-databricks

databricks.ModelServing

On this page

On this page

Example Usage

Access Control

Create ModelServing Resource

Constructor syntax

Parameters

Constructor example

ModelServing Resource Properties

Inputs

Outputs

Look up Existing ModelServing Resource

Supporting Types

ModelServingConfig, ModelServingConfigArgs

ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs

ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs

ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs

ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs

ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs

ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs

ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs

ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs

ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs

ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs

ModelServingConfigServedModel, ModelServingConfigServedModelArgs

ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs

ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs

ModelServingRateLimit, ModelServingRateLimitArgs

ModelServingTag, ModelServingTagArgs

Import

Package Details

On this page

On this page

databricks.ModelServing

On this page

On this page

Example Usage

Access Control

Related Resources

Create ModelServing Resource

Constructor syntax

Parameters

Constructor example

ModelServing Resource Properties

Inputs

Outputs

Look up Existing ModelServing Resource

Supporting Types

ModelServingConfig, ModelServingConfigArgs

ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs

ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs

ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs

ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs

ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs

ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs

ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs

ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs

ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs

ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs

ModelServingConfigServedModel, ModelServingConfigServedModelArgs

ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs

ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs

ModelServingRateLimit, ModelServingRateLimitArgs

ModelServingTag, ModelServingTagArgs

Import

Package Details

On this page

On this page