Skip to content

Commit 5f550cb

Browse files
authored
[ML][Pipelines] Support serverless compute in pipeline & add end-to-end tests (Azure#28919)
* skip try get compute operation for serverless * test: add first E2E test for serverless * update pipeline job YAML * test: add sweep * test: add pipeline component * test: add AutoML * test: add parallel and spark * test: update test and skip when playback mode * run black * update spark test case * Update skip test with work item * update VM size * add env YAML * add back default compute * add comment and explicitl specify VM size
1 parent 9242b45 commit 5f550cb

File tree

33 files changed

+157290
-0
lines changed

33 files changed

+157290
-0
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_job_operations.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
GIT_PATH_PREFIX,
5858
LEVEL_ONE_NAMED_RESOURCE_ID_FORMAT,
5959
LOCAL_COMPUTE_TARGET,
60+
SERVERLESS_COMPUTE,
6061
SHORT_URI_FORMAT,
6162
SWEEP_JOB_BEST_CHILD_RUN_ID_PROPERTY_NAME,
6263
TID_FMT,
@@ -379,6 +380,8 @@ def _try_get_compute_arm_id(self, compute: Union[Compute, str]):
379380

380381
if is_data_binding_expression(compute_name):
381382
return compute_name
383+
if compute_name == SERVERLESS_COMPUTE:
384+
return compute_name
382385

383386
try:
384387
return self._compute_operations.get(compute_name).id

sdk/ml/azure-ai-ml/tests/internal/e2etests/test_pipeline_job.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def pipeline_func():
112112
json.dumps(node_rest_dict, indent=2), json.dumps(mismatched_runsettings, indent=2)
113113
)
114114

115+
@pytest.mark.skip(reason="Task 2262955: skip to unblock March release.")
115116
@pytest.mark.parametrize(
116117
"test_case",
117118
PARAMETERS_TO_TEST_WITH_OUTPUT,

sdk/ml/azure-ai-ml/tests/pipeline_job/e2etests/test_pipeline_job.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1900,6 +1900,22 @@ def check_name_version_and_register_succeed(output, asset_name):
19001900
assert pipeline_job.jobs["compare"].outputs.best_model.name == "best_model"
19011901
assert pipeline_job.jobs["compare"].outputs.best_model.version == random_version
19021902

1903+
@pytest.mark.skipif(condition=not is_live(), reason="Task 2177353: component version changes across tests.")
1904+
@pytest.mark.parametrize(
1905+
"yaml_path",
1906+
[
1907+
"./tests/test_configs/pipeline_jobs/serverless_compute/all_types/command/pipeline.yml",
1908+
"./tests/test_configs/pipeline_jobs/serverless_compute/all_types/sweep/pipeline.yml",
1909+
"./tests/test_configs/pipeline_jobs/serverless_compute/all_types/pipeline/pipeline.yml",
1910+
"./tests/test_configs/pipeline_jobs/serverless_compute/all_types/automl/pipeline.yml",
1911+
"./tests/test_configs/pipeline_jobs/serverless_compute/all_types/parallel/pipeline.yml",
1912+
"./tests/test_configs/pipeline_jobs/serverless_compute/all_types/spark/pipeline.yml",
1913+
],
1914+
)
1915+
def test_serverless_compute_in_pipeline(self, client: MLClient, yaml_path: str) -> None:
1916+
pipeline_job = load_job(yaml_path)
1917+
assert_job_cancel(pipeline_job, client)
1918+
19031919

19041920
@pytest.mark.usefixtures("enable_pipeline_private_preview_features")
19051921
@pytest.mark.e2etest
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
2+
name: data_preprocessing
3+
display_name: Data Preprocessing
4+
version: 1
5+
type: command
6+
inputs:
7+
train_data:
8+
type: mltable
9+
validation_data:
10+
type: mltable
11+
outputs:
12+
preprocessed_train_data:
13+
type: mltable
14+
preprocessed_validation_data:
15+
type: mltable
16+
code: ./src/
17+
environment:
18+
name: pipeline-custom-environment
19+
image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
20+
conda_file: ./preprocessing_env.yaml
21+
command: >-
22+
python preprocess.py
23+
--train_data ${{inputs.train_data}}
24+
--validation_data ${{inputs.validation_data}}
25+
--preprocessed_train_data ${{outputs.preprocessed_train_data}}
26+
--preprocessed_validation_data ${{outputs.preprocessed_validation_data}}
27+
resources:
28+
instance_type: Standard_D3_v2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
2+
name: register_model
3+
display_name: Register Model
4+
version: 1
5+
type: command
6+
inputs:
7+
model_input_path:
8+
type: uri_folder
9+
model_base_name:
10+
type: string
11+
code: ./src/
12+
environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:1
13+
command: >-
14+
python register.py
15+
--model_input_path ${{inputs.model_input_path}}
16+
--model_base_name ${{inputs.model_base_name}}
17+
resources:
18+
instance_type: Standard_D3_v2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
channels:
2+
- conda-forge
3+
dependencies:
4+
- python=3.8
5+
- numpy=1.21.2
6+
- pip=21.2.4
7+
- scikit-learn=0.24.2
8+
- scipy=1.7.1
9+
- 'pandas>=1.1,<1.2'
10+
- pip:
11+
- 'inference-schema[numpy-support]==1.3.0'
12+
- xlrd==2.0.1
13+
- azureml-mlflow==1.42.0
14+
- mltable==0.1.0b4
15+
- pyyaml==6.0
16+
name: model-env
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# ---------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# ---------------------------------------------------------
4+
5+
import os
6+
import argparse
7+
import datetime
8+
from pathlib import Path
9+
import yaml
10+
from mltable import load
11+
12+
13+
def parse_args():
14+
# setup arg parser
15+
parser = argparse.ArgumentParser()
16+
17+
# add arguments
18+
parser.add_argument("--train_data", type=str)
19+
parser.add_argument("--validation_data", type=str)
20+
parser.add_argument("--preprocessed_train_data", type=str)
21+
parser.add_argument("--preprocessed_validation_data", type=str)
22+
# parse args
23+
args = parser.parse_args()
24+
print("args received ", args)
25+
# return args
26+
return args
27+
28+
29+
def get_preprocessed_data(text):
30+
"""
31+
Do preprocessing as needed
32+
Currently we are just passing text file as it is
33+
"""
34+
return text
35+
36+
37+
def main(args):
38+
"""
39+
Preprocessing of training/validation data
40+
"""
41+
train_data_path = os.path.join(args.train_data, "train.txt")
42+
with open(train_data_path) as f:
43+
train_data = f.read()
44+
preprocessed_train_data = get_preprocessed_data(train_data)
45+
46+
# write preprocessed train txt file
47+
preprocessed_train_data_path = os.path.join(args.preprocessed_train_data, "train.txt")
48+
with open(preprocessed_train_data_path, "w") as f:
49+
f.write(preprocessed_train_data)
50+
51+
validation_data_path = os.path.join(args.validation_data, "valid.txt")
52+
with open(validation_data_path) as f:
53+
validation_data = f.read()
54+
preprocessed_validation_data = get_preprocessed_data(validation_data)
55+
56+
# write preprocessed validation txt file
57+
preprocessed_validation_data_path = os.path.join(args.preprocessed_validation_data, "valid.txt")
58+
with open(preprocessed_validation_data_path, "w") as f:
59+
f.write(preprocessed_validation_data)
60+
61+
# Write MLTable yaml file as well in output folder
62+
# Since in this example we are not doing any preprocessing, we are just copying same yaml file from input,change it if needed
63+
64+
# read and write MLModel yaml file for train data
65+
train_data_mltable_path = os.path.join(args.train_data, "MLTable")
66+
preprocessed_train_data_mltable_path = os.path.join(args.preprocessed_train_data, "MLTable")
67+
with open(train_data_mltable_path, "r") as file:
68+
yaml_file = yaml.safe_load(file)
69+
with open(preprocessed_train_data_mltable_path, "w") as file:
70+
yaml.dump(yaml_file, file)
71+
72+
# read and write MLModel yaml file for validation data
73+
validation_data_mltable_path = os.path.join(args.validation_data, "MLTable")
74+
preprocessed_validation_data_mltable_path = os.path.join(args.preprocessed_validation_data, "MLTable")
75+
with open(validation_data_mltable_path, "r") as file:
76+
yaml_file = yaml.safe_load(file)
77+
with open(preprocessed_validation_data_mltable_path, "w") as file:
78+
yaml.dump(yaml_file, file)
79+
80+
81+
# run script
82+
if __name__ == "__main__":
83+
# parse args
84+
args = parse_args()
85+
86+
# run main function
87+
main(args)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# ---------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# ---------------------------------------------------------
4+
5+
import argparse
6+
import json
7+
import os
8+
import time
9+
10+
11+
from azureml.core import Run
12+
13+
import mlflow
14+
import mlflow.sklearn
15+
16+
# Based on example:
17+
# https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-cli
18+
# which references
19+
# https://github.com/Azure/azureml-examples/tree/main/cli/jobs/train/lightgbm/iris
20+
21+
22+
def parse_args():
23+
# setup arg parser
24+
parser = argparse.ArgumentParser()
25+
26+
# add arguments
27+
parser.add_argument("--model_input_path", type=str, help="Path to input model")
28+
parser.add_argument("--model_base_name", type=str, help="Name of the registered model")
29+
30+
# parse args
31+
args = parser.parse_args()
32+
print("Path: " + args.model_input_path)
33+
# return args
34+
return args
35+
36+
37+
def main(args):
38+
"""
39+
Register Model Example
40+
"""
41+
# Set Tracking URI
42+
current_experiment = Run.get_context().experiment
43+
tracking_uri = current_experiment.workspace.get_mlflow_tracking_uri()
44+
print("tracking_uri: {0}".format(tracking_uri))
45+
mlflow.set_tracking_uri(tracking_uri)
46+
mlflow.set_experiment(current_experiment.name)
47+
48+
# Get Run ID from model path
49+
print("Getting model path")
50+
mlmodel_path = os.path.join(args.model_input_path, "MLmodel")
51+
runid = ""
52+
with open(mlmodel_path, "r") as modelfile:
53+
for line in modelfile:
54+
if "run_id" in line:
55+
runid = line.split(":")[1].strip()
56+
57+
# Construct Model URI from run ID extract previously
58+
model_uri = "runs:/{}/outputs/".format(runid)
59+
print("Model URI: " + model_uri)
60+
61+
# Register the model with Model URI and Name of choice
62+
registered_name = args.model_base_name
63+
print(f"Registering model as {registered_name}")
64+
mlflow.register_model(model_uri, registered_name)
65+
66+
67+
# run script
68+
if __name__ == "__main__":
69+
# parse args
70+
args = parse_args()
71+
72+
# run main function
73+
main(args)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
2+
type: pipeline
3+
display_name: Serverless in Pipeline - AutoML
4+
settings:
5+
default_compute: azureml:serverless
6+
inputs:
7+
text_ner_training_data:
8+
type: mltable
9+
path: ./training-mltable-folder
10+
text_ner_validation_data:
11+
type: mltable
12+
path: ./validation-mltable-folder
13+
jobs:
14+
preprocessing_node:
15+
type: command
16+
component: file:./components/component_preprocessing.yaml
17+
inputs:
18+
train_data: ${{parent.inputs.text_ner_training_data}}
19+
validation_data: ${{parent.inputs.text_ner_validation_data}}
20+
outputs:
21+
preprocessed_train_data:
22+
type: mltable
23+
preprocessed_validation_data:
24+
type: mltable
25+
text_ner_node:
26+
type: automl
27+
task: text_ner
28+
log_verbosity: info
29+
primary_metric: accuracy
30+
limits:
31+
max_trials: 1
32+
timeout_minutes: 60
33+
target_column_name: label
34+
training_data: ${{parent.jobs.preprocessing_node.outputs.preprocessed_train_data}}
35+
validation_data: ${{parent.jobs.preprocessing_node.outputs.preprocessed_validation_data}}
36+
outputs:
37+
best_model:
38+
type: mlflow_model
39+
# BUG 2246569: resources should be optional, explicitly specify here with NC/ND-family for dnn tasks.
40+
resources:
41+
instance_type: Standard_NC6
42+
register_model_node:
43+
type: command
44+
component: file:./components/component_register_model.yaml
45+
inputs:
46+
model_input_path: ${{parent.jobs.text_ner_node.outputs.best_model}}
47+
model_base_name: paper_categorization_model
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
paths:
2+
- file: ./train.txt
3+
transformations:
4+
- take: 1

0 commit comments

Comments
 (0)