@@ -64,6 +64,81 @@ def check_name_and_version(output, output_name, output_version):
6464 assert output .version == output_version
6565
6666
67+ def build_pipeline_with_parallel_run_function (data , literal_input = None ):
68+ # command job with dict distribution
69+ environment = "AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:33"
70+ inputs = {
71+ "job_data_path" : Input (
72+ type = AssetTypes .MLTABLE ,
73+ path = "./tests/test_configs/dataset/mnist-data" ,
74+ mode = InputOutputModes .EVAL_MOUNT ,
75+ ),
76+ "job_data_path_optional" : Input (
77+ type = AssetTypes .MLTABLE ,
78+ mode = InputOutputModes .EVAL_MOUNT ,
79+ optional = True ,
80+ ),
81+ }
82+ input_data = "${{inputs.job_data_path}}"
83+ outputs = {"job_output_path" : Output (type = AssetTypes .URI_FOLDER , mode = "rw_mount" )}
84+ expected_resources = {"instance_count" : 2 }
85+
86+ task = RunFunction (
87+ code = "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/" ,
88+ entry_script = "score.py" ,
89+ program_arguments = "--job_output_path ${{outputs.job_output_path}}" ,
90+ environment = environment ,
91+ )
92+ logging_level = "DEBUG"
93+ max_concurrency_per_instance = 1
94+ error_threshold = 1
95+ mini_batch_error_threshold = 1
96+ mini_batch_size = "5"
97+
98+ # Parallel from parallel_run_function()
99+ parallel_function = parallel_run_function (
100+ display_name = "my-evaluate-job" ,
101+ inputs = inputs ,
102+ outputs = outputs ,
103+ mini_batch_size = mini_batch_size ,
104+ task = task ,
105+ logging_level = logging_level ,
106+ max_concurrency_per_instance = max_concurrency_per_instance ,
107+ error_threshold = error_threshold ,
108+ mini_batch_error_threshold = mini_batch_error_threshold ,
109+ resources = expected_resources ,
110+ input_data = input_data ,
111+ )
112+ if literal_input is None :
113+
114+ @dsl .pipeline (experiment_name = "test_pipeline_with_parallel_function" , default_compute = "cpu-cluster" )
115+ def parallel_in_pipeline (job_data_path ):
116+ node1 = parallel_function (job_data_path = job_data_path )
117+ # TODO 2104247: node1.task will be kept as a local path when submitting the pipeline job.
118+ node1 .task = None
119+ return {
120+ "pipeline_output" : node1 .outputs .job_output_path ,
121+ }
122+
123+ return parallel_in_pipeline (data )
124+ else :
125+
126+ @dsl .pipeline (experiment_name = "test_pipeline_with_parallel_function" , default_compute = "cpu-cluster" )
127+ def parallel_in_pipeline (job_data_path , literal_input ):
128+ node1 = parallel_function (job_data_path = job_data_path )
129+ # TODO 2104247: node1.task will be kept as a local path when submitting the pipeline job.
130+ node1 .task = None
131+ node1 .resources .instance_count = literal_input
132+ node1 .max_concurrency_per_instance = literal_input
133+ node1 .error_threshold = literal_input
134+ node1 .mini_batch_error_threshold = literal_input
135+ return {
136+ "pipeline_output" : node1 .outputs .job_output_path ,
137+ }
138+
139+ return parallel_in_pipeline (data , literal_input )
140+
141+
67142@pytest .mark .usefixtures (
68143 "enable_environment_id_arm_expansion" ,
69144 "enable_pipeline_private_preview_features" ,
@@ -1568,6 +1643,44 @@ def parallel_in_pipeline(job_data_path, score_model):
15681643 assert_job_input_output_types (pipeline_job )
15691644 assert pipeline_job .settings .default_compute == "cpu-cluster"
15701645
1646+ def test_parallel_components_with_tabular_input_bind_to_literal_input (self , client : MLClient ) -> None :
1647+ components_dir = tests_root_dir / "test_configs/dsl_pipeline/parallel_component_with_tabular_input"
1648+
1649+ batch_inference = load_component (source = str (components_dir / "tabular_input_e2e.yml" ))
1650+
1651+ # Construct pipeline
1652+ @dsl .pipeline (default_compute = "cpu-cluster" )
1653+ def parallel_in_pipeline (job_data_path , score_model , literal_input ):
1654+ batch_inference_node = batch_inference (job_data_path = job_data_path , score_model = score_model )
1655+ batch_inference_node .mini_batch_size = 5
1656+ batch_inference_node .max_concurrency_per_instance = literal_input
1657+ batch_inference_node .error_threshold = literal_input
1658+ batch_inference_node .mini_batch_error_threshold = literal_input
1659+
1660+ pipeline = parallel_in_pipeline (
1661+ job_data_path = Input (
1662+ type = AssetTypes .MLTABLE ,
1663+ path = "./tests/test_configs/dataset/neural-iris-mltable" ,
1664+ mode = InputOutputModes .DIRECT ,
1665+ ),
1666+ score_model = Input (
1667+ path = "./tests/test_configs/model" , type = AssetTypes .URI_FOLDER , mode = InputOutputModes .DOWNLOAD
1668+ ),
1669+ literal_input = 2 ,
1670+ )
1671+ # submit pipeline job
1672+ pipeline_job = assert_job_cancel (pipeline , client , experiment_name = "parallel_in_pipeline" )
1673+
1674+ # check required fields in job dict
1675+ job_dict = pipeline_job ._to_dict ()
1676+ expected_keys = ["status" , "properties" , "creation_context" ]
1677+ for k in expected_keys :
1678+ assert k in job_dict .keys (), f"failed to get { k } in { job_dict } "
1679+
1680+ # original job did not change
1681+ assert_job_input_output_types (pipeline_job )
1682+ assert pipeline_job .settings .default_compute == "cpu-cluster"
1683+
15711684 def test_parallel_components_with_file_input (self , client : MLClient ) -> None :
15721685 components_dir = tests_root_dir / "test_configs/dsl_pipeline/parallel_component_with_file_input"
15731686
@@ -1599,67 +1712,12 @@ def parallel_in_pipeline(job_data_path):
15991712 assert pipeline_job .settings .default_compute == "cpu-cluster"
16001713
16011714 def test_parallel_run_function (self , client : MLClient ):
1602- # command job with dict distribution
1603- environment = "AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:33"
1604- inputs = {
1605- "job_data_path" : Input (
1606- type = AssetTypes .MLTABLE ,
1607- path = "./tests/test_configs/dataset/mnist-data" ,
1608- mode = InputOutputModes .EVAL_MOUNT ,
1609- ),
1610- "job_data_path_optional" : Input (
1611- type = AssetTypes .MLTABLE ,
1612- mode = InputOutputModes .EVAL_MOUNT ,
1613- optional = True ,
1614- ),
1615- }
1616- input_data = "${{inputs.job_data_path}}"
1617- outputs = {"job_output_path" : Output (type = AssetTypes .URI_FOLDER , mode = "rw_mount" )}
1618- expected_resources = {"instance_count" : 2 }
1619-
1620- task = RunFunction (
1621- code = "./tests/test_configs/dsl_pipeline/parallel_component_with_file_input/src/" ,
1622- entry_script = "score.py" ,
1623- program_arguments = "--job_output_path ${{outputs.job_output_path}}" ,
1624- environment = environment ,
1625- )
1626- logging_level = "DEBUG"
1627- max_concurrency_per_instance = 1
1628- error_threshold = 1
1629- mini_batch_error_threshold = 1
1630- mini_batch_size = "5"
1631-
1632- # Parallel from parallel_run_function()
1633- parallel_function = parallel_run_function (
1634- display_name = "my-evaluate-job" ,
1635- inputs = inputs ,
1636- outputs = outputs ,
1637- mini_batch_size = mini_batch_size ,
1638- task = task ,
1639- logging_level = logging_level ,
1640- max_concurrency_per_instance = max_concurrency_per_instance ,
1641- error_threshold = error_threshold ,
1642- mini_batch_error_threshold = mini_batch_error_threshold ,
1643- resources = expected_resources ,
1644- input_data = input_data ,
1645- )
1646-
16471715 data = Input (
16481716 type = AssetTypes .MLTABLE ,
16491717 path = "./tests/test_configs/dataset/mnist-data" ,
16501718 mode = InputOutputModes .EVAL_MOUNT ,
16511719 )
1652-
1653- @dsl .pipeline (experiment_name = "test_pipeline_with_parallel_function" , default_compute = "cpu-cluster" )
1654- def parallel_in_pipeline (job_data_path ):
1655- node1 = parallel_function (job_data_path = job_data_path )
1656- # TODO 2104247: node1.task will be kept as a local path when submitting the pipeline job.
1657- node1 .task = None
1658- return {
1659- "pipeline_output" : node1 .outputs .job_output_path ,
1660- }
1661-
1662- pipeline = parallel_in_pipeline (data )
1720+ pipeline = build_pipeline_with_parallel_run_function (data )
16631721
16641722 pipeline_job = client .create_or_update (pipeline ) # submit pipeline job
16651723
@@ -1712,6 +1770,65 @@ def parallel_in_pipeline(job_data_path):
17121770 assert_job_input_output_types (pipeline_job )
17131771 assert pipeline_job .settings .default_compute == "cpu-cluster"
17141772
1773+ def test_parallel_run_function_run_settings_bind_to_literal_input (self , client : MLClient ):
1774+ data = Input (
1775+ type = AssetTypes .MLTABLE ,
1776+ path = "./tests/test_configs/dataset/mnist-data" ,
1777+ mode = InputOutputModes .EVAL_MOUNT ,
1778+ )
1779+ pipeline = build_pipeline_with_parallel_run_function (data , 2 )
1780+
1781+ pipeline_job = client .create_or_update (pipeline ) # submit pipeline job
1782+
1783+ actual_job = omit_with_wildcard (pipeline_job ._to_rest_object ().properties .as_dict (), * common_omit_fields )
1784+ expected_job = {
1785+ "tags" : {},
1786+ "is_archived" : False ,
1787+ "job_type" : "Pipeline" ,
1788+ "inputs" : {
1789+ "job_data_path" : {"job_input_type" : "mltable" , "mode" : "EvalMount" },
1790+ "literal_input" : {"job_input_type" : "literal" , "value" : "2" },
1791+ },
1792+ "jobs" : {
1793+ "node1" : {
1794+ "input_data" : "${{inputs.job_data_path}}" ,
1795+ "display_name" : "my-evaluate-job" ,
1796+ "inputs" : {
1797+ "job_data_path" : {
1798+ "job_input_type" : "literal" ,
1799+ "value" : "${{parent.inputs.job_data_path}}" ,
1800+ }
1801+ },
1802+ "name" : "node1" ,
1803+ "mini_batch_size" : 5 ,
1804+ "logging_level" : "DEBUG" ,
1805+ "max_concurrency_per_instance" : "${{parent.inputs.literal_input}}" ,
1806+ "error_threshold" : "${{parent.inputs.literal_input}}" ,
1807+ "mini_batch_error_threshold" : "${{parent.inputs.literal_input}}" ,
1808+ "outputs" : {"job_output_path" : {"type" : "literal" , "value" : "${{parent.outputs.pipeline_output}}" }},
1809+ "resources" : {"instance_count" : "${{parent.inputs.literal_input}}" },
1810+ "type" : "parallel" ,
1811+ },
1812+ },
1813+ "outputs" : {
1814+ "pipeline_output" : {
1815+ "mode" : "ReadWriteMount" ,
1816+ "job_output_type" : "uri_folder" ,
1817+ }
1818+ },
1819+ "settings" : {"default_compute" : "cpu-cluster" },
1820+ }
1821+ assert expected_job == actual_job
1822+ # check required fields in job dict
1823+ job_dict = pipeline_job ._to_dict ()
1824+ expected_keys = ["status" , "properties" , "creation_context" ]
1825+ for k in expected_keys :
1826+ assert k in job_dict .keys (), f"failed to get { k } in { job_dict } "
1827+
1828+ # original job did not change
1829+ assert_job_input_output_types (pipeline_job )
1830+ assert pipeline_job .settings .default_compute == "cpu-cluster"
1831+
17151832 def test_parallel_job (self , randstr : Callable [[str ], str ], client : MLClient ):
17161833 environment = "AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:33"
17171834 inputs = {
0 commit comments