Skip to content

Commit 428474e

Browse files
svaruagGaurav Singh
andauthored
[ml] Add 'max_instance_count' to JobResourceConfiguration for supporting PyTorch Elastic jobs (Azure#29973)
* updates for using the max_instance_count flag * update import * update imports * undo import job * fix test * update deployment imports * update endpoint svc client * undo / update svc client * fix eq * undo dep ops, update job res cfg * undo res cfg changes * undo res cfg changes * undo formatting changes --------- Co-authored-by: Gaurav Singh <gasi@microsoft.com>
1 parent 7ba8baf commit 428474e

File tree

10 files changed

+61
-22
lines changed

10 files changed

+61
-22
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job/identity.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88

99
from marshmallow import fields, post_load
1010

11-
from azure.ai.ml._restclient.v2022_01_01_preview.models import ConnectionAuthType
12-
from azure.ai.ml._restclient.v2022_10_01_preview.models import IdentityConfigurationType
11+
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
12+
ConnectionAuthType,
13+
IdentityConfigurationType,
14+
)
1315
from azure.ai.ml._schema.core.fields import StringTransformedEnum
1416
from azure.ai.ml._utils.utils import camel_to_snake
1517
from azure.ai.ml.entities._credentials import (

sdk/ml/azure-ai-ml/azure/ai/ml/_schema/job_resource_configuration.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ class JobResourceConfigurationSchema(ResourceConfigurationSchema):
1919
)
2020
}
2121
)
22+
max_instance_count = fields.Int(
23+
metadata={"description": "The maximum number of instances to make available to this job."}
24+
)
2225
docker_args = fields.Str(metadata={"description": "arguments to pass to the Docker run command."})
2326

2427
@post_load

sdk/ml/azure-ai-ml/azure/ai/ml/automl/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
77
Main areas include managing AutoML tasks.
88
"""
9-
from azure.ai.ml.entities._job.automl import (
10-
TrainingSettings,
11-
)
9+
from azure.ai.ml.entities._job.automl import TrainingSettings
1210
from azure.ai.ml.entities._job.automl.image import (
1311
ImageClassificationJob,
1412
ImageClassificationMultilabelJob,
@@ -32,7 +30,9 @@
3230
TextNerJob,
3331
)
3432
from azure.ai.ml.entities._job.automl.search_space import SearchSpace
35-
from azure.ai.ml.entities._job.automl.stack_ensemble_settings import StackEnsembleSettings
33+
from azure.ai.ml.entities._job.automl.stack_ensemble_settings import (
34+
StackEnsembleSettings,
35+
)
3636
from azure.ai.ml.entities._job.automl.tabular import (
3737
ClassificationJob,
3838
ColumnTransformer,
@@ -43,7 +43,7 @@
4343
TabularLimitSettings,
4444
)
4545

46-
from .._restclient.v2022_10_01_preview.models import (
46+
from .._restclient.v2023_04_01_preview.models import (
4747
BlockedTransformers,
4848
ClassificationModels,
4949
ClassificationMultilabelPrimaryMetrics,

sdk/ml/azure-ai-ml/azure/ai/ml/constants/_job/automl.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
from enum import Enum
55

66
# pylint: disable=unused-import
7-
from azure.ai.ml._restclient.v2022_10_01_preview.models import NlpLearningRateScheduler
8-
from azure.ai.ml._restclient.v2023_02_01_preview.models import TrainingMode
7+
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
8+
NlpLearningRateScheduler,
9+
TrainingMode,
10+
)
911
from azure.ai.ml._utils._experimental import experimental
1012

1113

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/data_transfer/data_transfer_job.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,28 +8,28 @@
88
from pathlib import Path
99
from typing import Dict, Optional, Union
1010

11-
from azure.ai.ml._restclient.v2022_10_01_preview.models import JobBase
11+
from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase
1212
from azure.ai.ml._schema.job.data_transfer_job import (
1313
DataTransferCopyJobSchema,
14-
DataTransferImportJobSchema,
1514
DataTransferExportJobSchema,
15+
DataTransferImportJobSchema,
1616
)
1717
from azure.ai.ml.constants import JobType
1818
from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY, TYPE
1919
from azure.ai.ml.constants._component import (
20-
ExternalDataType,
2120
DataTransferBuiltinComponentUri,
2221
DataTransferTaskType,
22+
ExternalDataType,
2323
)
2424
from azure.ai.ml.entities._inputs_outputs import Input, Output
25+
from azure.ai.ml.entities._inputs_outputs.external_data import Database, FileSystem
2526
from azure.ai.ml.entities._util import load_from_dict
2627
from azure.ai.ml.exceptions import (
2728
ErrorCategory,
2829
ErrorTarget,
2930
ValidationErrorType,
3031
ValidationException,
3132
)
32-
from azure.ai.ml.entities._inputs_outputs.external_data import Database, FileSystem
3333

3434
from ..job import Job
3535
from ..job_io_mixin import JobIOMixin

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/job_limits.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,17 @@
66
from abc import ABC
77
from typing import Optional, Union
88

9-
from azure.ai.ml._restclient.v2022_12_01_preview.models import CommandJobLimits as RestCommandJobLimits
10-
from azure.ai.ml._restclient.v2022_12_01_preview.models import SweepJobLimits as RestSweepJobLimits
11-
from azure.ai.ml._utils.utils import from_iso_duration_format, is_data_binding_expression, to_iso_duration_format
9+
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
10+
CommandJobLimits as RestCommandJobLimits,
11+
)
12+
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
13+
SweepJobLimits as RestSweepJobLimits,
14+
)
15+
from azure.ai.ml._utils.utils import (
16+
from_iso_duration_format,
17+
is_data_binding_expression,
18+
to_iso_duration_format,
19+
)
1220
from azure.ai.ml.constants import JobType
1321
from azure.ai.ml.entities._mixins import RestTranslatableMixin
1422

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/job_resource_configuration.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import logging
77
from typing import Any, Dict, List, Optional
88

9-
from azure.ai.ml._restclient.v2023_04_01_preview.models import JobResourceConfiguration as RestJobResourceConfiguration
9+
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
10+
JobResourceConfiguration as RestJobResourceConfiguration,
11+
)
1012
from azure.ai.ml.constants._job.job import JobComputePropertyFields
1113
from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin
1214
from azure.ai.ml.entities._util import convert_ordered_dict_to_dict
@@ -92,6 +94,8 @@ class JobResourceConfiguration(RestTranslatableMixin, DictMixin):
9294
:vartype locations: List[str]
9395
:param instance_type: Optional type of VM used as supported by the compute target.
9496
:type instance_type: str
97+
:param max_instance_count: Optional maximum number of instances or nodes used by the compute target.
98+
:type max_instance_count: int
9599
:param properties: Additional properties bag.
96100
:type properties: Dict[str, Any]
97101
:param docker_args: Extra arguments to pass to the Docker run command. This would override any
@@ -113,12 +117,14 @@ def __init__(
113117
properties: Optional[Dict[str, Any]] = None,
114118
docker_args: Optional[str] = None,
115119
shm_size: Optional[str] = None,
120+
max_instance_count: Optional[int] = None,
116121
**kwargs
117122
): # pylint: disable=unused-argument
118123
self.locations = locations
119124
self.instance_count = instance_count
120125
self.instance_type = instance_type
121126
self.shm_size = shm_size
127+
self.max_instance_count = max_instance_count
122128
self.docker_args = docker_args
123129
self._properties = None
124130
self.properties = properties
@@ -141,6 +147,7 @@ def _to_rest_object(self) -> RestJobResourceConfiguration:
141147
locations=self.locations,
142148
instance_count=self.instance_count,
143149
instance_type=self.instance_type,
150+
max_instance_count=self.max_instance_count,
144151
properties=self.properties.as_dict(),
145152
docker_args=self.docker_args,
146153
shm_size=self.shm_size,
@@ -156,6 +163,7 @@ def _from_rest_object(cls, obj: Optional[RestJobResourceConfiguration]) -> Optio
156163
locations=obj.locations,
157164
instance_count=obj.instance_count,
158165
instance_type=obj.instance_type,
166+
max_instance_count=obj.max_instance_count if hasattr(obj, "max_instance_count") else None,
159167
properties=obj.properties,
160168
docker_args=obj.docker_args,
161169
shm_size=obj.shm_size,
@@ -169,6 +177,7 @@ def __eq__(self, other: object) -> bool:
169177
self.locations == other.locations
170178
and self.instance_count == other.instance_count
171179
and self.instance_type == other.instance_type
180+
and self.max_instance_count == other.max_instance_count
172181
and self.docker_args == other.docker_args
173182
and self.shm_size == other.shm_size
174183
)
@@ -186,6 +195,8 @@ def _merge_with(self, other: "JobResourceConfiguration") -> None:
186195
self.instance_count = other.instance_count
187196
if other.instance_type:
188197
self.instance_type = other.instance_type
198+
if other.max_instance_count:
199+
self.max_instance_count = other.max_instance_count
189200
if other.properties:
190201
self.properties = other.properties
191202
if other.docker_args:

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/job_service.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,21 @@
66

77
import logging
88
from typing import Dict, Optional, Union
9+
910
from typing_extensions import Literal
10-
from azure.ai.ml._restclient.v2022_12_01_preview.models import AllNodes
11-
from azure.ai.ml._restclient.v2022_12_01_preview.models import JobService as RestJobService
11+
12+
from azure.ai.ml._restclient.v2023_04_01_preview.models import AllNodes
13+
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
14+
JobService as RestJobService,
15+
)
1216
from azure.ai.ml.constants._job.job import JobServiceTypeNames
1317
from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin
14-
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException
18+
from azure.ai.ml.exceptions import (
19+
ErrorCategory,
20+
ErrorTarget,
21+
ValidationErrorType,
22+
ValidationException,
23+
)
1524

1625
module_logger = logging.getLogger(__name__)
1726

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/resource_configuration.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import logging
77
from typing import Any, Dict, Optional
88

9-
from azure.ai.ml._restclient.v2022_10_01.models import ResourceConfiguration as RestResourceConfiguration
9+
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
10+
ResourceConfiguration as RestResourceConfiguration,
11+
)
1012
from azure.ai.ml.constants._job.job import JobComputePropertyFields
1113
from azure.ai.ml.entities._mixins import DictMixin, RestTranslatableMixin
1214

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_job/sweep/objective.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
# ---------------------------------------------------------
44
from typing import Optional
55

6-
from azure.ai.ml._restclient.v2022_02_01_preview.models import Objective as RestObjective
6+
from azure.ai.ml._restclient.v2023_04_01_preview.models import (
7+
Objective as RestObjective,
8+
)
79
from azure.ai.ml.entities._mixins import RestTranslatableMixin
810

911

0 commit comments

Comments
 (0)