""" Script to create a real-time inferencing service Based on: https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models """ from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint from azml_02_hyperparameters_tuning import best_model_name from initialize_constants import AZURE_WORKSPACE_NAME, VM_SIZE from ml_client import create_or_load_ml_client online_endpoint_name = ("srv-" + AZURE_WORKSPACE_NAME).lower() def main(): # 1. Create or Load a ML client ml_client = XXXX() # 2. Create a endpoint print(f"Creating endpoint {online_endpoint_name}") endpoint = XXXX( name=online_endpoint_name, auth_mode="key", ) # Method `result()` should be added to wait until completion ml_client.online_endpoints.XXXX(endpoint).result() # 3. Create a deployment best_model_latest_version = XXXX blue_deployment = XXXX( name=online_endpoint_name, endpoint_name=online_endpoint_name, # @latest doesn't work with model paths model=XXXX, instance_type=VM_SIZE, instance_count=1, ) # Assign all the traffic to this endpoint # Method `result()` should be added to wait until completion ml_client.begin_create_or_update(blue_deployment).result() endpoint.traffic = {online_endpoint_name: 100} ml_client.begin_create_or_update(endpoint).result() if __name__ == "__main__": main()