We get a little further on the azml_02 but it seems to fail still

This commit is contained in:
Lillian Violet 2024-09-05 13:03:42 +02:00
parent 032b05b9c3
commit 1acf43fa82
2 changed files with 31 additions and 29 deletions

View file

@ -23,17 +23,19 @@ best_model_name = "best_diabetes_model"
def main(): def main():
# 1. Create or Load a ML client # 1. Create or Load a ML client
ml_client = XXXX() ml_client = create_or_load_ml_client()
# 2. Create compute resources # 2. Create compute resources
XXXX() create_or_load_aml()
# 3. Create and register a File Dataset # 3. Create and register a File Dataset
XXXX() create_tabular_dataset()
latest_version_dataset = XXXX() latest_version_dataset = max(
[int(d.version) for d in ml_client.data.list(name=name_dataset)]
)
# 4. Environment # 4. Environment
environment_names = [env.name for XXXX in ml_client.environments.list()] environment_names = [env.name for env in ml_client.environments.list()]
if custom_env_name not in environment_names: if custom_env_name not in environment_names:
create_docker_environment() create_docker_environment()
@ -47,25 +49,25 @@ def main():
path=f"azureml:{name_dataset}:{latest_version_dataset}", path=f"azureml:{name_dataset}:{latest_version_dataset}",
), ),
registered_model_name=registered_model_name, registered_model_name=registered_model_name,
learning_rate=XXXX(values= XXXX), learning_rate=Choice(values= [0.01, 0.1, 1.0]),
n_estimators=XXXX(values=XXXX), n_estimators=Choice(values=[10, 100]),
), ),
code=experiment_folder, code=experiment_folder,
command=( command=(
"python XXXX" "python ${{inputs.script_name}}"
+ " --data XXXX" + " --data ${{inputs.data}}"
+ " --registered_model_name XXXX" + " --registered_model_name ${{inputs.registered_model_name}}"
+ " --learning_rate XXXX" + " --learning_rate ${{inputs.learning_rate}}"
+ " --n_estimators XXXX" + " --n_estimators ${{inputs.n_estimators}}"
), ),
environment=XXXX, environment=f"{custom_env_name}@latest",
compute=AML_COMPUTE_NAME, compute=AML_COMPUTE_NAME,
experiment_name=experiment_name, experiment_name=experiment_name,
display_name=experiment_name, display_name=experiment_name,
) )
# Configure hyperdrive settings # Configure hyperdrive settings
sweep_job = job_for_sweep.XXXX( sweep_job = job_for_sweep.sweep(
compute=AML_COMPUTE_NAME, compute=AML_COMPUTE_NAME,
sampling_algorithm="grid", sampling_algorithm="grid",
primary_metric="AUC", primary_metric="AUC",
@ -106,7 +108,7 @@ def main():
# Register best model # Register best model
print(f"Registering Model {best_model_name}") print(f"Registering Model {best_model_name}")
ml_client.models.XXXX(model=model) ml_client.models.register(model=model)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -17,28 +17,28 @@ def main():
# Input and output arguments # Input and output arguments
# Get script arguments # Get script arguments
parser = XXXX() parser = argparse()
# Input dataset # Input dataset
parser.add_argument( parser.add_argument(
"XXXX", "--data",
type=str, type=str,
help="path to input data", help="path to input data",
) )
# Model name # Model name
parser.add_argument("XXXX", type=str, help="model name") parser.add_argument("--registered_model_name", type=str, help="model name")
# Hyperparameters # Hyperparameters
parser.add_argument( parser.add_argument(
"XXXX", "--learning_rate",
type=float, type=float,
dest="learning_rate", dest="learning_rate",
default=0.1, default=0.1,
help="learning rate", help="learning rate",
) )
parser.add_argument( parser.add_argument(
"XXXX", "--n_estimators",
type=int, type=int,
dest="n_estimators", dest="n_estimators",
default=100, default=100,
@ -50,10 +50,10 @@ def main():
print(" ".join(f"{k}={v}" for k, v in vars(args).items())) print(" ".join(f"{k}={v}" for k, v in vars(args).items()))
# Start Logging # Start Logging
mlflow.XXXX() mlflow.start_run()
# enable autologging # enable autologging
mlflow.XXXX() mlflow.sklearn.autolog()
# load the diabetes data (passed as an input dataset) # load the diabetes data (passed as an input dataset)
print("input data:", args.data) print("input data:", args.data)
@ -78,32 +78,32 @@ def main():
) )
# Split data into training set and test set # Split data into training set and test set
X_train, X_test, y_train, y_test = XXXX( X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.30, random_state=0 X, y, test_size=0.30, random_state=0
) )
# Train a Gradient Boosting classification model # Train a Gradient Boosting classification model
# with the specified hyperparameters # with the specified hyperparameters
print("Training a classification model") print("Training a classification model")
model = XXXX( model = GradientBoostingClassifier(
learning_rate=XXXX, n_estimators=XXXX learning_rate=args.learning_rate, n_estimators=args.n_estimators
).fit(X_train, y_train) ).fit(X_train, y_train)
# calculate accuracy # calculate accuracy
y_hat = model.XXXX(X_test) y_hat = model.predict(X_test)
accuracy = np.average(y_hat == y_test) accuracy = np.average(y_hat == y_test)
print("Accuracy:", accuracy) print("Accuracy:", accuracy)
mlflow.log_metric("Accuracy", float(accuracy)) mlflow.log_metric("Accuracy", float(accuracy))
# calculate AUC # calculate AUC
y_scores = model.XXXX(X_test) y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test, y_scores[:, 1]) auc = roc_auc_score(y_test, y_scores[:, 1])
print("AUC: " + str(auc)) print("AUC: " + str(auc))
mlflow.log_metric("AUC", float(auc)) mlflow.log_metric("AUC", float(auc))
# Registering the model to the workspace # Registering the model to the workspace
print("Registering the model via MLFlow") print("Registering the model via MLFlow")
mlflow.XXXX( mlflow.sklearn.log_model(
sk_model=model, sk_model=model,
registered_model_name=args.registered_model_name, registered_model_name=args.registered_model_name,
artifact_path=args.registered_model_name, artifact_path=args.registered_model_name,
@ -116,7 +116,7 @@ def main():
) )
# Stop Logging # Stop Logging
mlflow.XXXX() mlflow.end_run()
if __name__ == "__main__": if __name__ == "__main__":