""" Script to create a real-time inferencing service Based on: https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models """ from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint from azml_02_hyperparameters_tuning import best_model_name from initialize_constants import AZURE_WORKSPACE_NAME, VM_SIZE from ml_client import create_or_load_ml_client online_endpoint_name = ("srv-" + AZURE_WORKSPACE_NAME).lower() def main(): # 1. Create or Load a ML client ml_client = create_or_load_ml_client() # 2. Create a endpoint print(f"Creating endpoint {online_endpoint_name}") endpoint = ManagedOnlineEndpoint( name=online_endpoint_name, auth_mode="key", ) # Method `result()` should be added to wait until completion ml_client.online_endpoints.begin_create_or_update(endpoint).result() # 3. Create a deployment best_model_latest_version = max( [int(m.version) for m in ml_client.models.list(name=best_model_name)] ) blue_deployment = ManagedOnlineDeployment( name=online_endpoint_name, endpoint_name=online_endpoint_name, # @latest doesn't work with model paths model=f"azureml:{best_model_name}:{best_model_latest_version}", instance_type=VM_SIZE, instance_count=1, ) # Assign all the traffic to this endpoint # Method `result()` should be added to wait until completion ml_client.begin_create_or_update(blue_deployment).result() endpoint.traffic = {online_endpoint_name: 100} ml_client.begin_create_or_update(endpoint).result() if __name__ == "__main__": main()