50 lines
1.5 KiB
Python
50 lines
1.5 KiB
Python
"""
|
|
Script to create a real-time inferencing service
|
|
Based on:
|
|
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-deploy-mlflow-models
|
|
"""
|
|
from azure.ai.ml.entities import ManagedOnlineDeployment, ManagedOnlineEndpoint
|
|
|
|
from azml_02_hyperparameters_tuning import best_model_name
|
|
from initialize_constants import AZURE_WORKSPACE_NAME, VM_SIZE
|
|
from ml_client import create_or_load_ml_client
|
|
|
|
online_endpoint_name = ("srv-" + AZURE_WORKSPACE_NAME).lower()
|
|
|
|
|
|
def main():
|
|
# 1. Create or Load a ML client
|
|
ml_client = XXXX()
|
|
|
|
# 2. Create a endpoint
|
|
print(f"Creating endpoint {online_endpoint_name}")
|
|
endpoint = XXXX(
|
|
name=online_endpoint_name,
|
|
auth_mode="key",
|
|
)
|
|
|
|
# Method `result()` should be added to wait until completion
|
|
ml_client.online_endpoints.XXXX(endpoint).result()
|
|
|
|
# 3. Create a deployment
|
|
best_model_latest_version = XXXX
|
|
|
|
blue_deployment = XXXX(
|
|
name=online_endpoint_name,
|
|
endpoint_name=online_endpoint_name,
|
|
# @latest doesn't work with model paths
|
|
model=XXXX,
|
|
instance_type=VM_SIZE,
|
|
instance_count=1,
|
|
)
|
|
|
|
# Assign all the traffic to this endpoint
|
|
# Method `result()` should be added to wait until completion
|
|
ml_client.begin_create_or_update(blue_deployment).result()
|
|
endpoint.traffic = {online_endpoint_name: 100}
|
|
ml_client.begin_create_or_update(endpoint).result()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|