52 lines
1.7 KiB
Python
52 lines
1.7 KiB
Python
|
"""
|
||
|
Script to create a real-time inferencing service
|
||
|
Based on:
|
||
|
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models
|
||
|
"""
|
||
|
from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint
|
||
|
|
||
|
from azml_02_hyperparameters_tuning import best_model_name
|
||
|
from initialize_constants import AZURE_WORKSPACE_NAME, VM_SIZE
|
||
|
from ml_client import create_or_load_ml_client
|
||
|
|
||
|
online_endpoint_name = ("srv-" + AZURE_WORKSPACE_NAME).lower()
|
||
|
|
||
|
|
||
|
def main():
|
||
|
# 1. Create or Load a ML client
|
||
|
ml_client = create_or_load_ml_client()
|
||
|
|
||
|
# 2. Create a endpoint
|
||
|
print(f"Creating endpoint {online_endpoint_name}")
|
||
|
endpoint = ManagedOnlineEndpoint(
|
||
|
name=online_endpoint_name,
|
||
|
auth_mode="key",
|
||
|
)
|
||
|
|
||
|
# Method `result()` should be added to wait until completion
|
||
|
ml_client.online_endpoints.begin_create_or_update(endpoint).result()
|
||
|
|
||
|
# 3. Create a deployment
|
||
|
best_model_latest_version = max(
|
||
|
[int(m.version) for m in ml_client.models.list(name=best_model_name)]
|
||
|
)
|
||
|
|
||
|
blue_deployment = ManagedOnlineDeployment(
|
||
|
name=online_endpoint_name,
|
||
|
endpoint_name=online_endpoint_name,
|
||
|
# @latest doesn't work with model paths
|
||
|
model=f"azureml:{best_model_name}:{best_model_latest_version}",
|
||
|
instance_type=VM_SIZE,
|
||
|
instance_count=1,
|
||
|
)
|
||
|
|
||
|
# Assign all the traffic to this endpoint
|
||
|
# Method `result()` should be added to wait until completion
|
||
|
ml_client.begin_create_or_update(blue_deployment).result()
|
||
|
endpoint.traffic = {online_endpoint_name: 100}
|
||
|
ml_client.begin_create_or_update(endpoint).result()
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|