113 lines
		
	
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			113 lines
		
	
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
|     Script to train tune hyperparameters
 | |
|     Based on:
 | |
|     https://learn.microsoft.com/en-us/azure/machine-learning/how-to-train-scikit-learn
 | |
| """
 | |
| from azure.ai.ml import Input, command
 | |
| from azure.ai.ml.constants import AssetTypes
 | |
| from azure.ai.ml.entities import Model
 | |
| from azure.ai.ml.sweep import Choice
 | |
| 
 | |
| from compute_aml import create_or_load_aml
 | |
| from data_tabular import create_tabular_dataset, name_dataset
 | |
| from environment import create_docker_environment, custom_env_name
 | |
| from initialize_constants import AML_COMPUTE_NAME
 | |
| from ml_client import create_or_load_ml_client
 | |
| 
 | |
| experiment_folder = "diabetes_hyperdrive"
 | |
| experiment_name = "mslearn-diabetes-hyperdrive"
 | |
| script_name = "diabetes_training.py"
 | |
| registered_model_name = "diabetes_model_hyper"
 | |
| best_model_name = "best_diabetes_model"
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     # 1. Create or Load a ML client
 | |
|     ml_client = XXXX()
 | |
| 
 | |
|     # 2. Create compute resources
 | |
|     XXXX()
 | |
| 
 | |
|     # 3. Create and register a File Dataset
 | |
|     XXXX()
 | |
|     latest_version_dataset =  XXXX()
 | |
| 
 | |
|     # 4. Environment
 | |
|     environment_names = [env.name for XXXX in ml_client.environments.list()]
 | |
|     if custom_env_name not in environment_names:
 | |
|         create_docker_environment()
 | |
| 
 | |
|     # 5. Run Job
 | |
|     job_for_sweep = command(
 | |
|         inputs=dict(
 | |
|             script_name=script_name,
 | |
|             data=Input(
 | |
|                 type=AssetTypes.URI_FILE,
 | |
|                 # @latest doesn't work with dataset paths
 | |
|                 path=f"azureml:{name_dataset}:{latest_version_dataset}",
 | |
|             ),
 | |
|             registered_model_name=registered_model_name,
 | |
|             learning_rate=XXXX(values= XXXX),
 | |
|             n_estimators=XXXX(values=XXXX),
 | |
|         ),
 | |
|         code=experiment_folder,
 | |
|         command=(
 | |
|             "python XXXX"
 | |
|             + " --data XXXX"
 | |
|             + " --registered_model_name XXXX"
 | |
|             + " --learning_rate XXXX"
 | |
|             + " --n_estimators XXXX"
 | |
|         ),
 | |
|         environment=XXXX,
 | |
|         compute=AML_COMPUTE_NAME,
 | |
|         experiment_name=experiment_name,
 | |
|         display_name=experiment_name,
 | |
|     )
 | |
| 
 | |
|     # Configure hyperdrive settings
 | |
|     sweep_job = job_for_sweep.XXXX(
 | |
|         compute=AML_COMPUTE_NAME,
 | |
|         sampling_algorithm="grid",
 | |
|         primary_metric="AUC",
 | |
|         goal="Maximize",
 | |
|         max_total_trials=6,
 | |
|         max_concurrent_trials=2,
 | |
|     )
 | |
| 
 | |
|     # submit the command
 | |
|     returned_sweep_job = ml_client.create_or_update(sweep_job)
 | |
| 
 | |
|     # stream the output and wait until the job is finished
 | |
|     ml_client.jobs.stream(returned_sweep_job.name)
 | |
| 
 | |
|     # refresh the latest status of the job after streaming
 | |
|     returned_sweep_job = ml_client.jobs.get(name=returned_sweep_job.name)
 | |
| 
 | |
|     # Find and register the best model
 | |
|     if returned_sweep_job.status == "Completed":
 | |
|         # First let us get the run which gave us the best result
 | |
|         best_run = returned_sweep_job.properties["best_child_run_id"]
 | |
| 
 | |
|         # lets get the model from this run
 | |
|         model = Model(
 | |
|             # the script stores the model as the given name
 | |
|             path=(
 | |
|                 f"azureml://jobs/{best_run}/outputs/artifacts/paths/"
 | |
|                 + f"{registered_model_name}/"
 | |
|             ),
 | |
|             name=best_model_name,
 | |
|             type="mlflow_model",
 | |
|         )
 | |
|     else:
 | |
|         print(
 | |
|             f"Sweep job status: {returned_sweep_job.status}. \
 | |
|                 Please wait until it completes"
 | |
|         )
 | |
| 
 | |
|     # Register best model
 | |
|     print(f"Registering Model {best_model_name}")
 | |
|     ml_client.models.XXXX(model=model)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |