33-AzureML-2/solution-v2/azml_03_realtime_inference.py

"""
    Script to create a real-time inferencing service
    Based on:
    https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models
"""
from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint

from azml_02_hyperparameters_tuning import best_model_name
from initialize_constants import AZURE_WORKSPACE_NAME, VM_SIZE
from ml_client import create_or_load_ml_client

online_endpoint_name = ("srv-" + AZURE_WORKSPACE_NAME).lower()


def main():
    # 1. Create or Load a ML client
    ml_client = create_or_load_ml_client()

    # 2. Create a endpoint
    print(f"Creating endpoint {online_endpoint_name}")
    endpoint = ManagedOnlineEndpoint(
        name=online_endpoint_name,
        auth_mode="key",
    )

    # Method `result()` should be added to wait until completion
    ml_client.online_endpoints.begin_create_or_update(endpoint).result()

    # 3. Create a deployment
    best_model_latest_version = max(
        [int(m.version) for m in ml_client.models.list(name=best_model_name)]
    )

    blue_deployment = ManagedOnlineDeployment(
        name=online_endpoint_name,
        endpoint_name=online_endpoint_name,
        # @latest doesn't work with model paths
        model=f"azureml:{best_model_name}:{best_model_latest_version}",
        instance_type=VM_SIZE,
        instance_count=1,
    )

    # Assign all the traffic to this endpoint
    # Method `result()` should be added to wait until completion
    ml_client.begin_create_or_update(blue_deployment).result()
    endpoint.traffic = {online_endpoint_name: 100}
    ml_client.begin_create_or_update(endpoint).result()


if __name__ == "__main__":
    main()
Init and have all packages required 2024-09-04 10:15:43 +02:00			`"""`
			`Script to create a real-time inferencing service`
			`Based on:`
			`https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models`
			`"""`
			`from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint`

			`from azml_02_hyperparameters_tuning import best_model_name`
			`from initialize_constants import AZURE_WORKSPACE_NAME, VM_SIZE`
			`from ml_client import create_or_load_ml_client`

			`online_endpoint_name = ("srv-" + AZURE_WORKSPACE_NAME).lower()`


			`def main():`
			`# 1. Create or Load a ML client`
			`ml_client = create_or_load_ml_client()`

			`# 2. Create a endpoint`
			`print(f"Creating endpoint {online_endpoint_name}")`
			`endpoint = ManagedOnlineEndpoint(`
			`name=online_endpoint_name,`
			`auth_mode="key",`
			`)`

			# Method `result()` should be added to wait until completion
			`ml_client.online_endpoints.begin_create_or_update(endpoint).result()`

			`# 3. Create a deployment`
			`best_model_latest_version = max(`
			`[int(m.version) for m in ml_client.models.list(name=best_model_name)]`
			`)`

			`blue_deployment = ManagedOnlineDeployment(`
			`name=online_endpoint_name,`
			`endpoint_name=online_endpoint_name,`
			`# @latest doesn't work with model paths`
			`model=f"azureml:{best_model_name}:{best_model_latest_version}",`
			`instance_type=VM_SIZE,`
			`instance_count=1,`
			`)`

			`# Assign all the traffic to this endpoint`
			# Method `result()` should be added to wait until completion
			`ml_client.begin_create_or_update(blue_deployment).result()`
			`endpoint.traffic = {online_endpoint_name: 100}`
			`ml_client.begin_create_or_update(endpoint).result()`


			`if __name__ == "__main__":`
			`main()`