Skip to content

Commit 2110695

Browse files
Anubha98Anubha98
andauthored
Adding resources at sweep level (Azure#33068)
* Adding resources at sweep level * uncomment commented code for testing * Resolving build errors * Resolving comments * Resolving comments * Resolving pipeline errors * Test fix * Fixing pipeline failures * Fixing pipeline failures * Fixing pipeline failures * Fixing pipeline failures * resolving black errors * Updating as per new restclient 2023-08-01-preview * Changing client to oct for sweep * Fixinf test failures * Resolving pipeline errors * Resolving pipeline errors * Resolving pipeline errors * Removing skip condition, resolving comments * Fixing gate failurs * Fixing gate failurs * Fixing gate failurs --------- Co-authored-by: Anubha98 <anubhajain@microsoft.com>
1 parent 4039427 commit 2110695

File tree

16 files changed

+313
-16
lines changed

16 files changed

+313
-16
lines changed

sdk/ml/azure-ai-ml/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/ml/azure-ai-ml",
5-
"Tag": "python/ml/azure-ai-ml_afdee3d631"
5+
"Tag": "python/ml/azure-ai-ml_eef1c37dd2"
66
}

sdk/ml/azure-ai-ml/azure/ai/ml/_schema/_sweep/parameterized_sweep.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# pylint: disable=unused-argument
66

77
from azure.ai.ml._schema.core.fields import ExperimentalField, NestedField, PathAwareSchema
8+
from azure.ai.ml._schema.job_resource_configuration import JobResourceConfigurationSchema
89

910
from ..job.job_limits import SweepJobLimitsSchema
1011
from ..queue_settings import QueueSettingsSchema
@@ -28,3 +29,4 @@ class ParameterizedSweepSchema(PathAwareSchema):
2829
required=True,
2930
)
3031
queue_settings = ExperimentalField(NestedField(QueueSettingsSchema))
32+
resources = NestedField(JobResourceConfigurationSchema)

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_builders/sweep.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
SweepDistribution,
4646
Uniform,
4747
)
48+
from azure.ai.ml.entities._job.job_resource_configuration import JobResourceConfiguration
4849
from azure.ai.ml.exceptions import ErrorTarget, UserErrorException, ValidationErrorType, ValidationException
4950
from azure.ai.ml.sweep import SweepJob
5051

@@ -92,6 +93,8 @@ class Sweep(ParameterizedSweep, BaseNode):
9293
~azure.ai.ml.UserIdentityConfiguration]
9394
:param queue_settings: The queue settings for the job.
9495
:type queue_settings: ~azure.ai.ml.entities.QueueSettings
96+
:param resources: Compute Resource configuration for the job.
97+
:type resources: ~azure.ai.ml.entities.ResourceConfiguration
9598
"""
9699

97100
def __init__(
@@ -117,6 +120,7 @@ def __init__(
117120
Union[ManagedIdentityConfiguration, AmlTokenConfiguration, UserIdentityConfiguration]
118121
] = None,
119122
queue_settings: Optional[QueueSettings] = None,
123+
resources: Optional[Union[dict, JobResourceConfiguration]] = None,
120124
**kwargs,
121125
) -> None:
122126
# TODO: get rid of self._job_inputs, self._job_outputs once we have general Input
@@ -142,6 +146,7 @@ def __init__(
142146
early_termination=early_termination,
143147
search_space=search_space,
144148
queue_settings=queue_settings,
149+
resources=resources,
145150
)
146151

147152
self.identity = identity
@@ -223,6 +228,7 @@ def _picked_fields_from_dict_to_rest_object(cls) -> List[str]:
223228
"early_termination",
224229
"search_space",
225230
"queue_settings",
231+
"resources",
226232
]
227233

228234
def _to_rest_object(self, **kwargs) -> dict:
@@ -310,6 +316,7 @@ def _to_job(self) -> SweepJob:
310316
outputs=self._job_outputs,
311317
identity=self.identity,
312318
queue_settings=self.queue_settings,
319+
resources=self.resources,
313320
)
314321

315322
@classmethod

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/job_limits.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Optional, Union
88

99
from azure.ai.ml._restclient.v2023_04_01_preview.models import CommandJobLimits as RestCommandJobLimits
10-
from azure.ai.ml._restclient.v2023_04_01_preview.models import SweepJobLimits as RestSweepJobLimits
10+
from azure.ai.ml._restclient.v2023_08_01_preview.models import SweepJobLimits as RestSweepJobLimits
1111
from azure.ai.ml._utils.utils import from_iso_duration_format, is_data_binding_expression, to_iso_duration_format
1212
from azure.ai.ml.constants import JobType
1313
from azure.ai.ml.entities._mixins import RestTranslatableMixin

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/sweep/objective.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# ---------------------------------------------------------
44
from typing import Optional
55

6-
from azure.ai.ml._restclient.v2023_04_01_preview.models import Objective as RestObjective
6+
from azure.ai.ml._restclient.v2023_08_01_preview.models import Objective as RestObjective
77
from azure.ai.ml.entities._mixins import RestTranslatableMixin
88

99

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/sweep/parameterized_sweep.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ---------------------------------------------------------
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
4-
from typing import Dict, Optional, Type, Union
4+
from typing import Dict, List, Optional, Type, Union
55

66
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException
77

@@ -14,6 +14,7 @@
1414
MedianStoppingPolicy,
1515
TruncationSelectionPolicy,
1616
)
17+
from ..job_resource_configuration import JobResourceConfiguration
1718
from .objective import Objective
1819
from .sampling_algorithm import (
1920
BayesianSamplingAlgorithm,
@@ -72,6 +73,7 @@ def __init__(
7273
]
7374
] = None,
7475
queue_settings: Optional[QueueSettings] = None,
76+
resources: Optional[Union[dict, JobResourceConfiguration]] = None,
7577
) -> None:
7678
"""
7779
:param limits: Limits for sweep job.
@@ -89,18 +91,30 @@ def __init__(
8991
~azure.ai.ml.sweep.Randint, ~azure.ai.ml.sweep.Uniform]]
9092
:param queue_settings: Queue settings for sweep job.
9193
:type queue_settings: ~azure.ai.ml.entities.QueueSettings
94+
:param resources: Compute Resource configuration for the job.
95+
:type resources: ~azure.ai.ml.entities.ResourceConfiguration
9296
"""
9397
self.sampling_algorithm = sampling_algorithm
9498
self.early_termination = early_termination
9599
self._limits = limits
96100
self.search_space = search_space
97101
self.queue_settings = queue_settings
102+
self.resources = resources
98103

99104
if isinstance(objective, Dict):
100105
self.objective = Objective(**objective)
101106
else:
102107
self.objective = objective
103108

109+
@property
110+
def resources(self) -> JobResourceConfiguration:
111+
"""Resources for sweep job.
112+
113+
:returns: Resources for sweep job.
114+
:rtype: ~azure.ai.ml.entities.ResourceConfiguration
115+
"""
116+
return self._resources
117+
104118
@property
105119
def limits(self) -> SweepJobLimits:
106120
"""Limits for sweep job.
@@ -110,6 +124,17 @@ def limits(self) -> SweepJobLimits:
110124
"""
111125
return self._limits
112126

127+
@resources.setter
128+
def resources(self, value: Union[dict, JobResourceConfiguration]) -> None:
129+
"""Set Resources for sweep job.
130+
131+
:param value: Compute Resource configuration for the job.
132+
:type value: ~azure.ai.ml.entities.ResourceConfiguration
133+
"""
134+
if isinstance(value, dict):
135+
value = JobResourceConfiguration(**value)
136+
self._resources = value
137+
113138
@limits.setter
114139
def limits(self, value: SweepJobLimits) -> None:
115140
"""Set limits for sweep job.
@@ -128,6 +153,34 @@ def limits(self, value: SweepJobLimits) -> None:
128153
)
129154
self._limits = value
130155

156+
def set_resources(
157+
self,
158+
*,
159+
instance_type: Optional[Union[str, List[str]]] = None,
160+
instance_count: Optional[int] = None,
161+
locations: Optional[List[str]] = None,
162+
properties: Optional[Dict] = None,
163+
docker_args: Optional[str] = None,
164+
shm_size: Optional[str] = None,
165+
**kwargs, # pylint: disable=unused-argument
166+
):
167+
"""Set resources for Sweep."""
168+
if self.resources is None:
169+
self.resources = JobResourceConfiguration()
170+
171+
if locations is not None:
172+
self.resources.locations = locations
173+
if instance_type is not None:
174+
self.resources.instance_type = instance_type
175+
if instance_count is not None:
176+
self.resources.instance_count = instance_count
177+
if properties is not None:
178+
self.resources.properties = properties
179+
if docker_args is not None:
180+
self.resources.docker_args = docker_args
181+
if shm_size is not None:
182+
self.resources.shm_size = shm_size
183+
131184
def set_limits(
132185
self,
133186
*,

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/sweep/sampling_algorithm.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
from abc import ABC
55
from typing import Optional, Union
66

7-
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
7+
from azure.ai.ml._restclient.v2023_08_01_preview.models import (
88
BayesianSamplingAlgorithm as RestBayesianSamplingAlgorithm,
99
)
10-
from azure.ai.ml._restclient.v2023_04_01_preview.models import GridSamplingAlgorithm as RestGridSamplingAlgorithm
11-
from azure.ai.ml._restclient.v2023_04_01_preview.models import RandomSamplingAlgorithm as RestRandomSamplingAlgorithm
12-
from azure.ai.ml._restclient.v2023_04_01_preview.models import SamplingAlgorithm as RestSamplingAlgorithm
13-
from azure.ai.ml._restclient.v2023_04_01_preview.models import SamplingAlgorithmType
10+
from azure.ai.ml._restclient.v2023_08_01_preview.models import GridSamplingAlgorithm as RestGridSamplingAlgorithm
11+
from azure.ai.ml._restclient.v2023_08_01_preview.models import RandomSamplingAlgorithm as RestRandomSamplingAlgorithm
12+
from azure.ai.ml._restclient.v2023_08_01_preview.models import SamplingAlgorithm as RestSamplingAlgorithm
13+
from azure.ai.ml._restclient.v2023_08_01_preview.models import SamplingAlgorithmType
1414
from azure.ai.ml.entities._mixins import RestTranslatableMixin
1515

1616

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/sweep/sweep_job.py

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
import logging
88
from typing import Any, Dict, Optional, Union
99

10-
from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase
11-
from azure.ai.ml._restclient.v2023_04_01_preview.models import SweepJob as RestSweepJob
12-
from azure.ai.ml._restclient.v2023_04_01_preview.models import TrialComponent
10+
from azure.ai.ml._restclient.v2023_08_01_preview.models import JobBase
11+
from azure.ai.ml._restclient.v2023_08_01_preview.models import SweepJob as RestSweepJob
12+
from azure.ai.ml._restclient.v2023_08_01_preview.models import TrialComponent
1313
from azure.ai.ml._schema._sweep.sweep_job import SweepJobSchema
1414
from azure.ai.ml._utils.utils import map_single_brackets_and_warn
1515
from azure.ai.ml.constants import JobType
@@ -36,6 +36,7 @@
3636
from azure.ai.ml.entities._job.sweep.sampling_algorithm import SamplingAlgorithm
3737
from azure.ai.ml.entities._system_data import SystemData
3838
from azure.ai.ml.entities._util import load_from_dict
39+
from azure.ai.ml.entities._job.job_resource_configuration import JobResourceConfiguration
3940
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, JobException
4041

4142
# from ..identity import AmlToken, Identity, ManagedIdentity, UserIdentity
@@ -112,6 +113,8 @@ class SweepJob(Job, ParameterizedSweep, JobIOMixin):
112113
:paramtype limits: ~azure.ai.ml.entities.SweepJobLimits
113114
:keyword queue_settings: Queue settings for the job.
114115
:paramtype queue_settings: ~azure.ai.ml.entities.QueueSettings
116+
:param resources: Compute Resource configuration for the job.
117+
:type resources: ~azure.ai.ml.entities.ResourceConfiguration
115118
:keyword kwargs: A dictionary of additional configuration parameters.
116119
:paramtype kwargs: dict
117120
@@ -153,8 +156,61 @@ def __init__(
153156
trial: Optional[Union[CommandJob, CommandComponent]] = None,
154157
early_termination: Optional[Union[BanditPolicy, MedianStoppingPolicy, TruncationSelectionPolicy]] = None,
155158
queue_settings: Optional[QueueSettings] = None,
159+
resources: Optional[Union[dict, JobResourceConfiguration]] = None,
156160
**kwargs: Any,
157161
) -> None:
162+
"""Sweep job for hyperparameter tuning.
163+
164+
:param name: Name of the job.
165+
:type name: str
166+
:param display_name: Display name of the job.
167+
:type display_name: str
168+
:param description: Description of the job.
169+
:type description: str
170+
:param tags: Tag dictionary. Tags can be added, removed, and updated.
171+
:type tags: dict[str, str]
172+
:param properties: The asset property dictionary.
173+
:type properties: dict[str, str]
174+
:param experiment_name: Name of the experiment the job will be created under. If None is provided,
175+
job will be created under experiment 'Default'.
176+
:type experiment_name: str
177+
:param identity: Identity that the training job will use while running on compute.
178+
:type identity: Union[
179+
~azure.ai.ml.ManagedIdentityConfiguration,
180+
~azure.ai.ml.AmlTokenConfiguration,
181+
~azure.ai.ml.UserIdentityConfiguration]
182+
:param inputs: Inputs to the command.
183+
:type inputs: dict
184+
:param outputs: Mapping of output data bindings used in the job.
185+
:type outputs: dict[str, ~azure.ai.ml.Output]
186+
:param sampling_algorithm: The hyperparameter sampling algorithm to use over the `search_space`.
187+
Defaults to "random".
188+
:type sampling_algorithm: str
189+
:param search_space: Dictionary of the hyperparameter search space. The key is the name of the
190+
hyperparameter and the value is the parameter expression.
191+
:type search_space: Dict
192+
:param objective: Metric to optimize for.
193+
:type objective: Objective
194+
:param compute: The compute target the job runs on.
195+
:type compute: str
196+
:param trial: The job configuration for each trial. Each trial will be provided with a different combination
197+
of hyperparameter values that the system samples from the search_space.
198+
:type trial: Union[~azure.ai.ml.entities.CommandJob, ~azure.ai.ml.entities.CommandComponent]
199+
:param early_termination: The early termination policy to use. A trial job is canceled
200+
when the criteria of the specified policy are met. If omitted, no early termination policy will be applied.
201+
:type early_termination: Union[
202+
~azure.mgmt.machinelearningservices.models.BanditPolicy,
203+
~azure.mgmt.machinelearningservices.models.MedianStoppingPolicy,
204+
~azure.mgmt.machinelearningservices.models.TruncationSelectionPolicy]
205+
:param limits: Limits for the sweep job.
206+
:type limits: ~azure.ai.ml.entities.SweepJobLimits
207+
:param queue_settings: Queue settings for the job.
208+
:type queue_settings: ~azure.ai.ml.entities.QueueSettings
209+
:param resources: Compute Resource configuration for the job.
210+
:type resources: ~azure.ai.ml.entities.ResourceConfiguration
211+
:param kwargs: A dictionary of additional configuration parameters.
212+
:type kwargs: dict
213+
"""
158214
kwargs[TYPE] = JobType.SWEEP
159215

160216
Job.__init__(
@@ -180,6 +236,7 @@ def __init__(
180236
early_termination=early_termination,
181237
search_space=search_space,
182238
queue_settings=queue_settings,
239+
resources=resources,
183240
)
184241

185242
def _to_dict(self) -> Dict:
@@ -220,6 +277,7 @@ def _to_rest_object(self) -> JobBase:
220277
outputs=to_rest_data_outputs(self.outputs),
221278
identity=self.identity._to_job_rest_object() if self.identity else None,
222279
queue_settings=self.queue_settings._to_rest_object() if self.queue_settings else None,
280+
resources=self.resources._to_rest_object() if self.resources else None,
223281
)
224282
sweep_job_resource = JobBase(properties=sweep_job)
225283
sweep_job_resource.name = self.name
@@ -281,6 +339,7 @@ def _load_from_rest(cls, obj: JobBase) -> "SweepJob":
281339
if properties.identity
282340
else None,
283341
queue_settings=properties.queue_settings,
342+
resources=properties.resources if hasattr(properties, "resources") else None,
284343
)
285344

286345
def _override_missing_properties_from_trial(self):

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/to_rest_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from functools import singledispatch
88
from pathlib import Path
99

10-
from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase as JobBaseData
10+
from azure.ai.ml._restclient.v2023_08_01_preview.models import JobBase as JobBaseData
1111
from azure.ai.ml.constants._common import DEFAULT_EXPERIMENT_NAME
1212
from azure.ai.ml.entities._builders.command import Command
1313
from azure.ai.ml.entities._builders.pipeline import Pipeline

sdk/ml/azure-ai-ml/azure/ai/ml/operations/_job_operations.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from azure.ai.ml._restclient.runhistory.models import Run
3030
from azure.ai.ml._restclient.v2023_04_01_preview import AzureMachineLearningWorkspaces as ServiceClient022023Preview
3131
from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase
32-
from azure.ai.ml._restclient.v2023_04_01_preview.models import JobType as RestJobType
32+
from azure.ai.ml._restclient.v2023_08_01_preview.models import JobType as RestJobType
3333
from azure.ai.ml._restclient.v2023_04_01_preview.models import ListViewType, UserIdentity
3434
from azure.ai.ml._restclient.v2023_08_01_preview.models import JobBase as JobBase_2308
3535
from azure.ai.ml._scope_dependent_operations import (
@@ -709,6 +709,9 @@ def _create_or_update_with_different_version_api( # pylint: disable=name-too-lo
709709
if rest_job_resource.properties.job_type == RestJobType.PIPELINE:
710710
service_client_operation = self.service_client_08_2023_preview.jobs
711711

712+
if rest_job_resource.properties.job_type == RestJobType.SWEEP:
713+
service_client_operation = self.service_client_08_2023_preview.jobs
714+
712715
result = service_client_operation.create_or_update(
713716
id=rest_job_resource.name,
714717
resource_group_name=self._operation_scope.resource_group_name,

0 commit comments

Comments
 (0)