33-AzureML-2/azuremlpythonsdk-v2/azml_03_realtime_inference.py

"""
    Script to create a real-time inferencing service
    Based on:
    https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models
"""
from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint

from azml_02_hyperparameters_tuning import best_model_name
from initialize_constants import AZURE_WORKSPACE_NAME, VM_SIZE
from ml_client import create_or_load_ml_client

online_endpoint_name = ("srv-" + AZURE_WORKSPACE_NAME).lower()


def main():
    # 1. Create or Load a ML client
    ml_client = XXXX()

    # 2. Create a endpoint
    print(f"Creating endpoint {online_endpoint_name}")
    endpoint = XXXX(
        name=online_endpoint_name,
        auth_mode="key",
    )

    # Method `result()` should be added to wait until completion
    ml_client.online_endpoints.XXXX(endpoint).result()

    # 3. Create a deployment
    best_model_latest_version = XXXX

    blue_deployment = XXXX(
        name=online_endpoint_name,
        endpoint_name=online_endpoint_name,
        # @latest doesn't work with model paths
        model=XXXX,
        instance_type=VM_SIZE,
        instance_count=1,
    )

    # Assign all the traffic to this endpoint
    # Method `result()` should be added to wait until completion
    ml_client.begin_create_or_update(blue_deployment).result()
    endpoint.traffic = {online_endpoint_name: 100}
    ml_client.begin_create_or_update(endpoint).result()


if __name__ == "__main__":
    main()