33-AzureML-2/solution-v2/azml_01_experiment_remote_compute.py

71 lines
2.2 KiB
Python
Raw Normal View History

2024-09-04 10:15:43 +02:00
"""
Script to train a model from a tabular dataset using a remote compute
Based on:
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-scikit-learn
"""
from azure.ai.ml import Input, command
from azure.ai.ml.constants import AssetTypes
from compute_aml import create_or_load_aml
from data_tabular import create_tabular_dataset, name_dataset
from environment import custom_env_name
from initialize_constants import AML_COMPUTE_NAME
from ml_client import create_or_load_ml_client
experiment_name = "mslearn-train-diabetes"
experiment_folder = "./diabetes_training"
script_name = "diabetes_training.py"
registered_model_name = "diabetes_model"
def main():
# 1. Create or Load a ML client
ml_client = create_or_load_ml_client()
# 2. Create compute resources
create_or_load_aml()
# 3. Create and register a File Dataset
create_tabular_dataset()
latest_version_dataset = next(
dataset.latest_version
for dataset in ml_client.data.list()
if dataset.name == name_dataset
)
print(list(ml_client.data.list()))
# 4. Run Job
job = command(
inputs=dict(
script_name=script_name,
data=Input(
type=AssetTypes.URI_FILE,
# @latest doesn't work with dataset paths
path=f"azureml:{name_dataset}:{latest_version_dataset}",
),
registered_model_name=registered_model_name,
),
code=experiment_folder,
command=(
"python ${{inputs.script_name}}"
+ " --data ${{inputs.data}}"
+ " --registered_model_name ${{inputs.registered_model_name}}"
),
environment=f"{custom_env_name}@latest",
compute=AML_COMPUTE_NAME,
experiment_name=experiment_name,
display_name=experiment_name,
)
# submit the command
returned_job = ml_client.jobs.create_or_update(job)
# stream the output and wait until the job is finished
ml_client.jobs.stream(returned_job.name)
# refresh the latest status of the job after streaming
returned_job = ml_client.jobs.get(name=returned_job.name)
if __name__ == "__main__":
main()