Skip to content

Commit d2c7c9c

Browse files
authored
[ML][Pipelines] feat: add autotoken related run settings to internal Scope (Azure#29643)
* script: record tests with recording mismatch * feat: support autotoken related run settings for internal scope * doc: update CHANGELOG.md * Revert "doc: update CHANGELOG.md" This reverts commit 139b259.
1 parent e888d6e commit d2c7c9c

10 files changed

+1061
-761
lines changed

.vscode/cspell.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@
417417
"Phong"
418418
]
419419
},
420-
{
420+
{
421421
"filename": "tools/azure-sdk-tools/ci_tools/deps.html.j2",
422422
"words": [
423423
"isfork",
@@ -738,7 +738,7 @@
738738
"filename": "sdk/synapse/azure-synapse-artifacts/azure/synapse/artifacts/operations/*.py",
739739
"words": [
740740
"Syms",
741-
"Updation",
741+
"Updation"
742742
]
743743
},
744744
{
@@ -929,7 +929,8 @@
929929
"SEPS",
930930
"wargs",
931931
"pycache",
932-
"ruamel"
932+
"ruamel",
933+
"reprcrash"
933934
]
934935
},
935936
{

sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/node.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ class ScopeSchema(InternalBaseNodeSchema):
5353
scope_param = fields.Str()
5454
custom_job_name_suffix = fields.Str()
5555
priority = fields.Int()
56+
auto_token = fields.Int()
57+
tokens = fields.Int()
58+
vcp = fields.Float()
5659

5760

5861
class HDInsightSchema(InternalBaseNodeSchema):

sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/scope.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ def __init__(self, **kwargs):
2424
self._scope_param = kwargs.pop("scope_param", None)
2525
self._custom_job_name_suffix = kwargs.pop("custom_job_name_suffix", None)
2626
self._priority = kwargs.pop("priority", None)
27+
self._auto_token = kwargs.pop("auto_token", None)
28+
self._tokens = kwargs.pop("tokens", None)
29+
self._vcp = kwargs.pop("vcp", None)
2730
self._init = False
2831

2932
@property
@@ -65,9 +68,36 @@ def priority(self) -> int:
6568
def priority(self, value: int):
6669
self._priority = value
6770

71+
@property
72+
def auto_token(self) -> int:
73+
"""A predictor for estimating the peak resource usage of scope job."""
74+
return self._auto_token
75+
76+
@auto_token.setter
77+
def auto_token(self, value: int):
78+
self._auto_token = value
79+
80+
@property
81+
def tokens(self) -> int:
82+
"""Standard token allocation in integer."""
83+
return self._tokens
84+
85+
@tokens.setter
86+
def tokens(self, value: int):
87+
self._tokens = value
88+
89+
@property
90+
def vcp(self) -> float:
91+
"""Standard VC percent allocation; should be a float between 0 and 1."""
92+
return self._vcp
93+
94+
@vcp.setter
95+
def vcp(self, value: float):
96+
self._vcp = value
97+
6898
@classmethod
6999
def _picked_fields_from_dict_to_rest_object(cls) -> List[str]:
70-
return ["custom_job_name_suffix", "scope_param", "adla_account_name", "priority"]
100+
return ["custom_job_name_suffix", "scope_param", "adla_account_name", "priority", "auto_token", "tokens", "vcp"]
71101

72102
@classmethod
73103
def _create_schema_for_validation(cls, context) -> Union[PathAwareSchema, Schema]:

sdk/ml/azure-ai-ml/dev_requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@ numpy;platform.python_implementation!="PyPy"
1818
scikit-image;platform.python_implementation!="PyPy"
1919
mldesigner
2020
azure-mgmt-resourcegraph<9.0.0,>=2.0.0
21-
azure-mgmt-resource<23.0.0,>=3.0.0
21+
azure-mgmt-resource<23.0.0,>=3.0.0
22+
pytest-reportlog
23+
python-dotenv
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
tmp
Lines changed: 182 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,202 @@
11
# ---------------------------------------------------------
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
4-
4+
import argparse
5+
import contextlib
6+
import json
7+
import os
8+
import re
59
import subprocess
610
import sys
11+
from collections import defaultdict
712
from pathlib import Path
813

14+
import dotenv
915

10-
def run_tests(input_file):
11-
"""Run tests listed in a file. Lines starting with # or ; are ignored.
1216

13-
:param input_file: Path to a file containing a list of tests to run.
14-
:type input_file: str
15-
"""
16-
tests_to_run = []
17+
def normalize_test_name(test_name):
18+
if "[" in test_name:
19+
test_name = test_name.split("[")[0]
20+
return test_name.strip()
21+
22+
23+
def extract_test_location(location):
24+
test_path, line_no, test_func = location
25+
test_class_name, test_func_name = test_func.split(".", 1)
26+
test_class = test_path.split(os.path.sep, 3)[-1] + "::" + test_class_name
27+
m = re.match(r"(\w+)\[(\w+)]", test_func_name)
28+
if m:
29+
test_func_name, test_param = m.groups()
30+
else:
31+
test_param = None
32+
return test_class, test_func_name, test_param
33+
34+
35+
def load_tests_from_file(input_file):
36+
tests_to_run = set()
1737
with open(input_file, "r") as f:
1838

1939
for line in f:
2040
if len(line) < 1 or line[0] in ["#", ";"]:
2141
continue
22-
if "[" in line:
23-
line = line.split("[")[0]
24-
line = line.strip()
25-
if line not in tests_to_run:
26-
tests_to_run.append(line)
42+
line = normalize_test_name(line)
43+
tests_to_run.add(line)
44+
return tests_to_run
45+
46+
47+
@contextlib.contextmanager
48+
def update_dot_env_file(env_override):
49+
"""Update env file with env_override, and restore it after the context is exited.
50+
Support bool variable only for now.
51+
"""
52+
env_file = dotenv.find_dotenv(raise_error_if_not_found=True)
53+
print(f"Updating env file: {env_file}")
54+
origin_env_content = None
55+
try:
56+
with open(env_file, "r") as f:
57+
origin_env_content = f.read()
58+
env_vars = [line.strip() for line in origin_env_content.splitlines() if line.strip()]
59+
for key, value in env_override.items():
60+
if isinstance(value, bool):
61+
target_line = f"{key}='true'"
62+
for i, line in enumerate(env_vars):
63+
if line == target_line and not value:
64+
env_vars[i] = f"#{target_line}"
65+
elif re.match(rf"# *{target_line}", line) and value:
66+
env_vars[i] = f"{target_line}"
67+
with open(env_file, "w") as f:
68+
f.write("\n".join(env_vars))
69+
yield
70+
finally:
71+
if origin_env_content is not None:
72+
with open(env_file, "w") as f:
73+
f.write(origin_env_content)
74+
75+
76+
def run_simple(tests_to_run, working_dir, extra_params, is_live_and_recording):
77+
print(f"Running {len(tests_to_run)} tests under {working_dir}: ")
2778
for test_name in tests_to_run:
2879
print(test_name)
2980

30-
for test_name in tests_to_run:
31-
print(f"Running test: {test_name}")
32-
subprocess.call(
33-
[
34-
sys.executable,
35-
"-m",
36-
"pytest",
37-
"--disable-warnings",
38-
"--disable-pytest-warnings",
39-
test_name,
40-
],
41-
cwd=Path(__file__).parent.parent,
42-
)
81+
with update_dot_env_file(
82+
{"AZURE_TEST_RUN_LIVE": is_live_and_recording, "AZURE_SKIP_LIVE_RECORDING": not is_live_and_recording},
83+
):
84+
for test_name in tests_to_run:
85+
print(
86+
f"pytest {test_name} {' '.join(extra_params)} in {'live' if is_live_and_recording else 'playback'} mode..."
87+
)
88+
subprocess.run(
89+
[
90+
sys.executable,
91+
"-m",
92+
"pytest",
93+
test_name,
94+
]
95+
+ extra_params,
96+
cwd=working_dir,
97+
)
98+
99+
100+
def run_tests(tests_to_run, extras, *, skip_first_run=False, record_mismatch=False, is_live_and_recording=False):
101+
working_dir = Path(__file__).parent.parent
102+
if record_mismatch:
103+
log_file_path = working_dir / "scripts" / "tmp" / "pytest_log.json"
104+
log_file_path.parent.mkdir(parents=True, exist_ok=True)
105+
106+
if not skip_first_run:
107+
run_simple(
108+
tests_to_run,
109+
working_dir,
110+
extra_params=[
111+
"--disable-warnings",
112+
"--disable-pytest-warnings",
113+
"--report-log",
114+
log_file_path.as_posix(),
115+
]
116+
+ extras,
117+
is_live_and_recording=False,
118+
)
119+
120+
tests_failed_with_recording_mismatch = defaultdict(dict)
121+
with open(log_file_path, "r") as f:
122+
for line in f:
123+
node = json.loads(line)
124+
if "outcome" not in node:
125+
continue
126+
if node["outcome"] != "failed":
127+
continue
128+
test_class, test_name, test_param = extract_test_location(node["location"])
129+
130+
msg = node["longrepr"]["reprcrash"]["message"]
131+
if "ResourceNotFoundError" in msg:
132+
if test_param is None:
133+
tests_failed_with_recording_mismatch[test_class][test_name] = None
134+
elif test_name not in tests_failed_with_recording_mismatch[test_class]:
135+
tests_failed_with_recording_mismatch[test_class][test_name] = [test_param]
136+
else:
137+
tests_failed_with_recording_mismatch[test_class][test_name].append(test_param)
138+
139+
if tests_failed_with_recording_mismatch:
140+
# re-run the tests with recording mismatch in live mode
141+
for test_class, test_info in tests_failed_with_recording_mismatch.items():
142+
keys = []
143+
for test_name, test_params in test_info.items():
144+
if test_params is not None:
145+
keys.append(f"{test_name}[{'-'.join(test_params)}]")
146+
else:
147+
keys.append(test_name)
148+
run_simple(
149+
[test_class],
150+
working_dir,
151+
["-k", " or ".join(keys), "--tb=line"],
152+
is_live_and_recording=True,
153+
)
154+
155+
# re-run the original tests to check if they are still failing
156+
run_simple(tests_to_run, working_dir, extras, is_live_and_recording=False)
157+
else:
158+
run_simple(tests_to_run, working_dir, extras, is_live_and_recording=is_live_and_recording)
43159

44160

45161
if __name__ == "__main__":
46-
run_tests(sys.argv[1])
162+
parser = argparse.ArgumentParser()
163+
parser.add_argument(
164+
"--file",
165+
type=str,
166+
help="File containing tests to run, each line is a test name",
167+
)
168+
parser.add_argument(
169+
"--name",
170+
type=str,
171+
help="Name of the test to run. Format is aligned with pytest, e.g. 'tests/pipeline_job/'.",
172+
)
173+
parser.add_argument(
174+
"--record-mismatch",
175+
"-r",
176+
action="store_true",
177+
help="If specified, pytest log will be outputted to tmp/pytest_log.json, "
178+
"then tests failed with recording not found error will be rerun in live & recording mode."
179+
"Note that .env file will be updated during the process, so please revert the change manually "
180+
"if the script run is stopped early.",
181+
)
182+
parser.add_argument(
183+
"--skip-first-run",
184+
"-s",
185+
action="store_true",
186+
help="If specified, will skip the first run in record-mismatch mode.",
187+
)
188+
189+
_args, _extras = parser.parse_known_args()
190+
191+
if _args.file:
192+
_tests = load_tests_from_file(_args.file)
193+
elif _args.name:
194+
_tests = [_args.name]
195+
else:
196+
raise ValueError("Must specify either --file or --name")
197+
run_tests(
198+
_tests,
199+
_extras,
200+
skip_first_run=_args.skip_first_run,
201+
record_mismatch=_args.record_mismatch,
202+
)

sdk/ml/azure-ai-ml/tests/internal/_utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@
9191
"scope_param": "-tokens 50", # runsettings.scope.scope_param
9292
"custom_job_name_suffix": "component_sdk_test", # runsettings.scope.custom_job_name_suffix
9393
"priority": 800, # runsettings.scope.priority
94+
"auto_token": 150, # runsettings.scope.auto_token
95+
"tokens": 2, # runsettings.scope.token
96+
"vcp": 0.2, # runsettings.scope.vcp
9497
},
9598
{
9699
"default_compute": "cpu-cluster",

0 commit comments

Comments
 (0)