diff --git a/azuremlpythonsdk-v2/azml_02_hyperparameters_tuning.py b/azuremlpythonsdk-v2/azml_02_hyperparameters_tuning.py index f6af11c..7da730e 100644 --- a/azuremlpythonsdk-v2/azml_02_hyperparameters_tuning.py +++ b/azuremlpythonsdk-v2/azml_02_hyperparameters_tuning.py @@ -23,17 +23,19 @@ best_model_name = "best_diabetes_model" def main(): # 1. Create or Load a ML client - ml_client = XXXX() + ml_client = create_or_load_ml_client() # 2. Create compute resources - XXXX() + create_or_load_aml() # 3. Create and register a File Dataset - XXXX() - latest_version_dataset = XXXX() + create_tabular_dataset() + latest_version_dataset = max( + [int(d.version) for d in ml_client.data.list(name=name_dataset)] + ) # 4. Environment - environment_names = [env.name for XXXX in ml_client.environments.list()] + environment_names = [env.name for env in ml_client.environments.list()] if custom_env_name not in environment_names: create_docker_environment() @@ -47,25 +49,25 @@ def main(): path=f"azureml:{name_dataset}:{latest_version_dataset}", ), registered_model_name=registered_model_name, - learning_rate=XXXX(values= XXXX), - n_estimators=XXXX(values=XXXX), + learning_rate=Choice(values= [0.01, 0.1, 1.0]), + n_estimators=Choice(values=[10, 100]), ), code=experiment_folder, command=( - "python XXXX" - + " --data XXXX" - + " --registered_model_name XXXX" - + " --learning_rate XXXX" - + " --n_estimators XXXX" + "python ${{inputs.script_name}}" + + " --data ${{inputs.data}}" + + " --registered_model_name ${{inputs.registered_model_name}}" + + " --learning_rate ${{inputs.learning_rate}}" + + " --n_estimators ${{inputs.n_estimators}}" ), - environment=XXXX, + environment=f"{custom_env_name}@latest", compute=AML_COMPUTE_NAME, experiment_name=experiment_name, display_name=experiment_name, ) # Configure hyperdrive settings - sweep_job = job_for_sweep.XXXX( + sweep_job = job_for_sweep.sweep( compute=AML_COMPUTE_NAME, sampling_algorithm="grid", primary_metric="AUC", @@ -106,7 +108,7 @@ def main(): # Register best model print(f"Registering Model {best_model_name}") - ml_client.models.XXXX(model=model) + ml_client.models.register(model=model) if __name__ == "__main__": diff --git a/azuremlpythonsdk-v2/diabetes_hyperdrive/diabetes_training.py b/azuremlpythonsdk-v2/diabetes_hyperdrive/diabetes_training.py index 4f3b4ab..2c8f785 100644 --- a/azuremlpythonsdk-v2/diabetes_hyperdrive/diabetes_training.py +++ b/azuremlpythonsdk-v2/diabetes_hyperdrive/diabetes_training.py @@ -17,28 +17,28 @@ def main(): # Input and output arguments # Get script arguments - parser = XXXX() + parser = argparse() # Input dataset parser.add_argument( - "XXXX", + "--data", type=str, help="path to input data", ) # Model name - parser.add_argument("XXXX", type=str, help="model name") + parser.add_argument("--registered_model_name", type=str, help="model name") # Hyperparameters parser.add_argument( - "XXXX", + "--learning_rate", type=float, dest="learning_rate", default=0.1, help="learning rate", ) parser.add_argument( - "XXXX", + "--n_estimators", type=int, dest="n_estimators", default=100, @@ -50,10 +50,10 @@ def main(): print(" ".join(f"{k}={v}" for k, v in vars(args).items())) # Start Logging - mlflow.XXXX() + mlflow.start_run() # enable autologging - mlflow.XXXX() + mlflow.sklearn.autolog() # load the diabetes data (passed as an input dataset) print("input data:", args.data) @@ -78,32 +78,32 @@ def main(): ) # Split data into training set and test set - X_train, X_test, y_train, y_test = XXXX( + X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.30, random_state=0 ) # Train a Gradient Boosting classification model # with the specified hyperparameters print("Training a classification model") - model = XXXX( - learning_rate=XXXX, n_estimators=XXXX + model = GradientBoostingClassifier( + learning_rate=args.learning_rate, n_estimators=args.n_estimators ).fit(X_train, y_train) # calculate accuracy - y_hat = model.XXXX(X_test) + y_hat = model.predict(X_test) accuracy = np.average(y_hat == y_test) print("Accuracy:", accuracy) mlflow.log_metric("Accuracy", float(accuracy)) # calculate AUC - y_scores = model.XXXX(X_test) + y_scores = model.predict_proba(X_test) auc = roc_auc_score(y_test, y_scores[:, 1]) print("AUC: " + str(auc)) mlflow.log_metric("AUC", float(auc)) # Registering the model to the workspace print("Registering the model via MLFlow") - mlflow.XXXX( + mlflow.sklearn.log_model( sk_model=model, registered_model_name=args.registered_model_name, artifact_path=args.registered_model_name, @@ -116,7 +116,7 @@ def main(): ) # Stop Logging - mlflow.XXXX() + mlflow.end_run() if __name__ == "__main__":