Skip to content

Commit f87ac8c

Browse files
authored
[ML][Pipelines] feat: use origin name and version in request body for all anonymous components (Azure#28092)
* feat: use origin name and version in request body for all anonymous components * refactor: minor change * record: update recordings
1 parent 2a26cd6 commit f87ac8c

File tree

168 files changed

+21669
-32036
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+21669
-32036
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/_internal/entities/component.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ def __init__(
118118
# Store original yaml
119119
self._yaml_str = yaml_str
120120
self._other_parameter = kwargs
121-
self._origin_name, self._origin_version = None, None
122121

123122
self.successful_return_code = successful_return_code
124123
self.code = code
@@ -191,19 +190,9 @@ def _from_rest_object_to_init_params(cls, obj: ComponentVersionData) -> Dict:
191190
init_kwargs["distribution"] = DistributionConfiguration._from_rest_object(distribution)
192191
return init_kwargs
193192

194-
def _set_is_anonymous(self, is_anonymous: bool):
195-
if is_anonymous:
196-
self._origin_name, self._origin_version = self.name, self.version
197-
super()._set_is_anonymous(is_anonymous)
198-
199193
def _to_rest_object(self) -> ComponentVersionData:
200194
component = convert_ordered_dict_to_dict(self._to_dict())
201195

202-
if self._origin_name:
203-
component["name"] = self._origin_name
204-
if self._origin_version:
205-
component["version"] = self._origin_version
206-
207196
properties = ComponentVersionDetails(
208197
component_spec=component,
209198
description=self.description,

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_component/component.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ def __init__(
116116
self._source = (
117117
self._resolve_component_source_from_id(id) if id else kwargs.pop("_source", ComponentSource.CLASS)
118118
)
119+
# use ANONYMOUS_COMPONENT_NAME instead of guid
120+
is_anonymous = kwargs.pop("is_anonymous", False)
121+
if not name and version is None:
122+
name = ANONYMOUS_COMPONENT_NAME
123+
version = "1"
124+
is_anonymous = True
125+
119126
super().__init__(
120127
name=name,
121128
version=version,
@@ -124,17 +131,13 @@ def __init__(
124131
tags=tags,
125132
properties=properties,
126133
creation_context=creation_context,
127-
is_anonymous=kwargs.pop("is_anonymous", False),
134+
is_anonymous=is_anonymous,
128135
base_path=kwargs.pop("base_path", None),
129136
source_path=kwargs.pop("source_path", None),
130137
)
131138
# store kwargs to self._other_parameter instead of pop to super class to allow component have extra
132139
# fields not defined in current schema.
133140

134-
# update component name to ANONYMOUS_COMPONENT_NAME if it is anonymous
135-
if hasattr(self, "_is_anonymous"):
136-
self._set_is_anonymous(self._is_anonymous)
137-
138141
inputs = inputs if inputs else {}
139142
outputs = outputs if outputs else {}
140143

@@ -411,25 +414,6 @@ def _from_rest_object_to_init_params(cls, obj: ComponentVersionData) -> Dict:
411414
# remove empty values, because some property only works for specific component, eg: distribution for command
412415
return {k: v for k, v in init_kwargs.items() if v is not None and v != {}}
413416

414-
def _set_is_anonymous(self, is_anonymous: bool):
415-
"""Mark this component as anonymous and overwrite component name to
416-
ANONYMOUS_COMPONENT_NAME."""
417-
if is_anonymous is True:
418-
self._is_anonymous = True
419-
self.name = ANONYMOUS_COMPONENT_NAME
420-
else:
421-
self._is_anonymous = False
422-
423-
def _update_anonymous_hash(self):
424-
"""For anonymous component, we use code hash + yaml hash as component
425-
version so the same anonymous component(same interface and same code)
426-
won't be created again.
427-
428-
Should be called before _to_rest_object.
429-
"""
430-
if self._is_anonymous:
431-
self.version = self._get_anonymous_hash()
432-
433417
def _get_anonymous_hash(self) -> str:
434418
"""Return the name of anonymous component.
435419
@@ -441,6 +425,16 @@ def _get_anonymous_hash(self) -> str:
441425
# omit name since name doesn't impact component's uniqueness
442426
return hash_dict(component_interface_dict, keys_to_omit=["name", "id", "version"])
443427

428+
def _validate(self, raise_error=False) -> MutableValidationResult:
429+
origin_name = self.name
430+
# skip name validation for anonymous component as ANONYMOUS_COMPONENT_NAME will be used in component creation
431+
if self._is_anonymous:
432+
self.name = ANONYMOUS_COMPONENT_NAME
433+
try:
434+
return super()._validate(raise_error)
435+
finally:
436+
self.name = origin_name
437+
444438
def _customized_validate(self) -> MutableValidationResult:
445439
validation_result = super(Component, self)._customized_validate()
446440
# If private features are enable and component has code value of type str we need to check
@@ -464,6 +458,11 @@ def _customized_validate(self) -> MutableValidationResult:
464458

465459
return validation_result
466460

461+
def _get_rest_name_version(self):
462+
if self._is_anonymous:
463+
return ANONYMOUS_COMPONENT_NAME, self._get_anonymous_hash()
464+
return self.name, self.version
465+
467466
def _to_rest_object(self) -> ComponentVersionData:
468467
component = self._to_dict()
469468

@@ -489,7 +488,10 @@ def _to_rest_object(self) -> ComponentVersionData:
489488
tags=self.tags,
490489
)
491490
result = ComponentVersionData(properties=properties)
492-
result.name = self.name
491+
if self._is_anonymous:
492+
result.name = ANONYMOUS_COMPONENT_NAME
493+
else:
494+
result.name = self.name
493495
return result
494496

495497
def _to_dict(self) -> Dict:

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -300,15 +300,16 @@ def create_or_update(
300300
**self._init_args,
301301
)
302302

303-
if not (hasattr(component, "_is_anonymous") and component._is_anonymous):
304-
component._set_is_anonymous(kwargs.pop("is_anonymous", False))
303+
if not component._is_anonymous:
304+
component._is_anonymous = kwargs.pop("is_anonymous", False)
305+
305306
if not skip_validation:
306307
self._validate(component, raise_on_failure=True)
307308

308309
# Create all dependent resources
309310
self._resolve_arm_id_or_upload_dependencies(component)
310311

311-
component._update_anonymous_hash()
312+
name, version = component._get_rest_name_version()
312313
rest_component_resource = component._to_rest_object()
313314
result = None
314315
try:
@@ -320,8 +321,8 @@ def create_or_update(
320321
"registryName": self._registry_name,
321322
}
322323
poller = self._version_operation.begin_create_or_update(
323-
name=component.name,
324-
version=component.version,
324+
name=name,
325+
version=version,
325326
resource_group_name=self._operation_scope.resource_group_name,
326327
registry_name=self._registry_name,
327328
body=rest_component_resource,
@@ -335,8 +336,8 @@ def create_or_update(
335336

336337
else:
337338
result = self._version_operation.create_or_update(
338-
name=rest_component_resource.name,
339-
version=component.version,
339+
name=name,
340+
version=version,
340341
resource_group_name=self._resource_group_name,
341342
workspace_name=self._workspace_name,
342343
body=rest_component_resource,

sdk/ml/azure-ai-ml/tests/dsl/e2etests/test_dsl_pipeline.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -792,10 +792,6 @@ def default_optional_pipeline():
792792
# TODO: optional_param_with_default should also exists
793793
assert len(pipeline_job.jobs["default_optional_component_1"].inputs) == 2
794794

795-
@pytest.mark.skipif(
796-
not is_live(),
797-
reason="TODO 2144070: recording is not stable for this test before the fix after we enable on-disk cache",
798-
)
799795
def test_pipeline_with_none_parameter_has_default_optional_false(self, client: MLClient) -> None:
800796
default_optional_func = load_component(source=str(components_dir / "default_optional_component.yml"))
801797

@@ -844,7 +840,7 @@ def pipeline_with_default_optional_parameters(
844840
)
845841
pipeline_job = client.jobs.create_or_update(pipeline, experiment_name="default_optional_pipeline")
846842

847-
# only the two required input exists
843+
# only the two required inputs exists
848844
assert len(next(pipeline_job.jobs.values().__iter__()).inputs) == 2
849845
validate_result = pipeline._validate()
850846
assert validate_result.passed is True
@@ -1456,8 +1452,10 @@ def test_anon_component_in_pipeline(
14561452
mpi_func = load_component(source=str(components_dir / "helloworld_component_mpi.yml"))
14571453
assert mpi_func._validate().passed
14581454

1455+
invalid_component_name = "_invalid"
1456+
14591457
# name of anonymous component in pipeline job should be overwritten
1460-
mpi_func.name = "_invalid"
1458+
mpi_func.name = invalid_component_name
14611459
assert not mpi_func._validate().passed
14621460

14631461
@dsl.pipeline(
@@ -1481,6 +1479,11 @@ def pipeline_distribution_components(job_in_number, job_in_path):
14811479
created_job: PipelineJob = client.jobs.create_or_update(
14821480
pipeline, experiment_name=experiment_name, continue_on_step_failure=True
14831481
)
1482+
# Theoretically, we should keep the invalid name in request body,
1483+
# as component name valid in azureml-components maybe invalid in azure-ai-ml.
1484+
# So we leave this validation to server-side for now.
1485+
assert mpi_func._to_rest_object().properties.component_spec["name"] == invalid_component_name
1486+
14841487
# continue_on_step_failure can't be set in create_or_update
14851488
assert created_job.settings.continue_on_step_failure is False
14861489
assert created_job.jobs["hello_world_component_mpi"].component.startswith(ANONYMOUS_COMPONENT_NAME)
@@ -1509,13 +1512,6 @@ def pipeline(job_in_number, job_in_other_number, job_in_path):
15091512
job = client.jobs.create_or_update(pipeline)
15101513
assert job.settings.force_rerun is None
15111514

1512-
# 1 interesting case: in client.jobs.create_or_update, component_func1 and/or component_func2
1513-
# will be renamed to azureml_anonymous in resolution, and the component name will be used as
1514-
# node name when we use them in the 2nd pipeline job.
1515-
# After we enabled in-memory cache, only component_func1 will be resolved, and name of component_func2
1516-
# will keep as randstr("component_name"). Here we manually rename to avoid recording change.
1517-
component_func2.name = "azureml_anonymous"
1518-
15191515
@dsl.pipeline(
15201516
name=randstr("pipeline_name"),
15211517
description="The hello world pipeline job",

0 commit comments

Comments
 (0)