33-AzureML-2/solution-v2/azml_03_realtime_inference.py

52 lines
1.7 KiB
Python
Raw Normal View History

2024-09-04 10:15:43 +02:00
"""
Script to create a real-time inferencing service
Based on:
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models
"""
from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint
from azml_02_hyperparameters_tuning import best_model_name
from initialize_constants import AZURE_WORKSPACE_NAME, VM_SIZE
from ml_client import create_or_load_ml_client
online_endpoint_name = ("srv-" + AZURE_WORKSPACE_NAME).lower()
def main():
# 1. Create or Load a ML client
ml_client = create_or_load_ml_client()
# 2. Create a endpoint
print(f"Creating endpoint {online_endpoint_name}")
endpoint = ManagedOnlineEndpoint(
name=online_endpoint_name,
auth_mode="key",
)
# Method `result()` should be added to wait until completion
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
# 3. Create a deployment
best_model_latest_version = max(
[int(m.version) for m in ml_client.models.list(name=best_model_name)]
)
blue_deployment = ManagedOnlineDeployment(
name=online_endpoint_name,
endpoint_name=online_endpoint_name,
# @latest doesn't work with model paths
model=f"azureml:{best_model_name}:{best_model_latest_version}",
instance_type=VM_SIZE,
instance_count=1,
)
# Assign all the traffic to this endpoint
# Method `result()` should be added to wait until completion
ml_client.begin_create_or_update(blue_deployment).result()
endpoint.traffic = {online_endpoint_name: 100}
ml_client.begin_create_or_update(endpoint).result()
if __name__ == "__main__":
main()