77import tempfile
88import zipfile
99from pathlib import Path
10- from typing import Optional , Union
10+ from typing import Union
1111
1212import yaml
1313
14- from azure .ai .ml ._utils ._asset_utils import IgnoreFile , get_local_paths
14+ from azure .ai .ml ._utils ._asset_utils import IgnoreFile , get_local_paths , get_ignore_file
1515from azure .ai .ml .entities ._util import _general_copy
1616from azure .ai .ml .entities ._validation import MutableValidationResult , _ValidationResultBuilder
1717
@@ -29,25 +29,22 @@ def __init__(
2929 self ,
3030 code_path : Union [None , str ],
3131 yaml_path : str ,
32- ignore_file : Optional [InternalComponentIgnoreFile ] = None ,
3332 ):
3433 self .__yaml_path = yaml_path
3534 self .__code_path = code_path
36- self ._ignore_file = ignore_file
3735
3836 self ._tmp_code_path = None
3937 self .__includes = None
40- self . _is_artifact_includes = False
41- self ._artifact_validate_result = _ValidationResultBuilder . success ()
38+ # artifact validation is done on loading now, so need a private variable to store the result
39+ self .__artifact_validate_result = None
4240
4341 @property
4442 def _includes (self ):
4543 if not self ._additional_includes_file_path .is_file ():
4644 return []
4745 if self .__includes is None :
48- self ._is_artifact_includes = self ._is_yaml_format_additional_includes ()
4946 if self ._is_artifact_includes :
50- self .__includes = self ._load_yaml_format_additional_includes ()
47+ self .__includes = self ._load_artifact_additional_includes ()
5148 else :
5249 with open (self ._additional_includes_file_path , "r" ) as f :
5350 lines = f .readlines ()
@@ -84,24 +81,27 @@ def _additional_includes_file_path(self) -> Path:
8481 def code (self ) -> Path :
8582 return self ._tmp_code_path if self ._tmp_code_path else self ._code_path
8683
87- def _copy (self , src : Path , dst : Path , ignore_file = None ) -> None :
84+ @staticmethod
85+ def _copy (src : Path , dst : Path , * , ignore_file = None ) -> None :
8886 if src .is_file ():
87+ if not dst .parent .is_dir ():
88+ dst .parent .mkdir (parents = True )
8989 _general_copy (src , dst )
9090 else :
9191 # use os.walk to replace shutil.copytree, which may raise
9292 # FileExistsError for same folder, the expected behavior
9393 # is merging ignore will be also applied during this process
94+ # TODO: inner ignore file is not supported with current implementation
95+ # TODO: empty folder will be ignored with current implementation
9496 local_paths , _ = get_local_paths (
9597 source_path = str (src ),
96- ignore_file = ( ignore_file or self . _ignore_file )
98+ ignore_file = ignore_file or IgnoreFile (),
9799 )
98- for path in local_paths :
99- dst_root = Path (dst ) / Path (path ).relative_to (src )
100- dst_root_mkdir_flag = dst_root .is_dir ()
101- # if there is nothing to copy under current dst_root, no need to create this folder
102- if dst_root_mkdir_flag is False :
103- dst_root .mkdir (parents = True )
104- _general_copy (path , dst_root / Path (path ).name )
100+ # local_paths contains and only contains all file paths, so no need to apply ignore-file
101+ for src_path in local_paths :
102+ src_path = Path (src_path )
103+ dst_path = Path (dst ) / src_path .relative_to (src )
104+ _AdditionalIncludes ._copy (src_path , dst_path )
105105
106106 @staticmethod
107107 def _is_folder_to_compress (path : Path ) -> bool :
@@ -163,7 +163,7 @@ def _validate(self) -> MutableValidationResult:
163163 validation_result .append_error (message = error_msg )
164164 return validation_result
165165
166- def resolve (self ) -> None :
166+ def resolve (self , ignore_file : IgnoreFile ) -> None :
167167 """Resolve code and potential additional includes.
168168 If no additional includes is specified, just return and use
169169 original real code path; otherwise, create a tmp folder and copy
@@ -176,34 +176,59 @@ def resolve(self) -> None:
176176 if Path (self ._code_path ).is_file ():
177177 self ._copy (Path (self ._code_path ), tmp_folder_path / Path (self ._code_path ).name )
178178 else :
179- for path in os .listdir (self ._code_path ):
180- src_path = (Path (self ._code_path ) / str (path )).resolve ()
181- if src_path .suffix == ADDITIONAL_INCLUDES_SUFFIX :
182- continue
183- dst_path = tmp_folder_path / str (path )
184- self ._copy (src_path , dst_path )
179+ self ._copy (self ._code_path , tmp_folder_path , ignore_file = ignore_file )
185180 # additional includes
186181 base_path = self ._additional_includes_file_path .parent
182+ # additional includes from artifact will be downloaded to a temp local path on calling
183+ # self._includes, so no need to add specific logic for artifact
184+
185+ # TODO: skip ignored files defined in code when copying additional includes
186+ # copy additional includes disregarding ignore files as current ignore file implementation
187+ # is based on absolute path, which is not suitable for additional includes
187188 for additional_include in self ._includes :
188189 src_path = Path (additional_include )
189190 if not src_path .is_absolute ():
190191 src_path = (base_path / additional_include ).resolve ()
192+ dst_path = (tmp_folder_path / src_path .name ).resolve ()
193+
191194 if self ._is_folder_to_compress (src_path ):
192- self ._resolve_folder_to_compress (additional_include , Path (tmp_folder_path ))
195+ self ._resolve_folder_to_compress (
196+ additional_include ,
197+ Path (tmp_folder_path ),
198+ # TODO: seems it won't work as current ignore file implementation is based on absolute path
199+ ignore_file = ignore_file ,
200+ )
201+ elif src_path .is_dir ():
202+ # support ignore file in additional includes
203+ self ._copy (src_path , dst_path , ignore_file = get_ignore_file (src_path ))
193204 else :
194- dst_path = (tmp_folder_path / src_path .name ).resolve ()
195- self ._copy (src_path , dst_path , ignore_file = IgnoreFile () if self ._is_artifact_includes else None )
205+ # do not apply ignore file for files
206+ self ._copy (src_path , dst_path )
207+
208+ # Remove ignored files copied from additional includes
209+ rebased_ignore_file = InternalComponentIgnoreFile (
210+ directory_path = tmp_folder_path ,
211+ additional_include_file_name = self ._additional_includes_file_path .name ,
212+ )
213+ for base , dirs , files in os .walk (tmp_folder_path ):
214+ for name in files + dirs :
215+ path = os .path .join (base , name )
216+ if rebased_ignore_file .is_file_excluded (path ):
217+ if os .path .isdir (path ):
218+ shutil .rmtree (path )
219+ if os .path .isfile (path ):
220+ os .remove (path )
196221 self ._tmp_code_path = tmp_folder_path # point code path to tmp folder
197222 return
198223
199- def _resolve_folder_to_compress (self , include : str , dst_path : Path ) -> None :
224+ def _resolve_folder_to_compress (self , include : str , dst_path : Path , ignore_file : IgnoreFile ) -> None :
200225 """resolve the zip additional include, need to compress corresponding folder."""
201226 zip_additional_include = (self ._additional_includes_file_path .parent / include ).resolve ()
202227 folder_to_zip = zip_additional_include .parent / zip_additional_include .stem
203228 zip_file = dst_path / zip_additional_include .name
204229 with zipfile .ZipFile (zip_file , "w" ) as zf :
205230 zf .write (folder_to_zip , os .path .relpath (folder_to_zip , folder_to_zip .parent )) # write root in zip
206- local_paths , _ = get_local_paths (source_path = str (folder_to_zip ), ignore_file = self . _ignore_file )
231+ local_paths , _ = get_local_paths (source_path = str (folder_to_zip ), ignore_file = ignore_file )
207232 for path in local_paths :
208233 zf .write (path , os .path .relpath (path , folder_to_zip .parent ))
209234
@@ -216,7 +241,8 @@ def cleanup(self) -> None:
216241 shutil .rmtree (self ._tmp_code_path )
217242 self ._tmp_code_path = None # point code path back to real path
218243
219- def _is_yaml_format_additional_includes (self ):
244+ @property
245+ def _is_artifact_includes (self ):
220246 try :
221247 with open (self ._additional_includes_file_path ) as f :
222248 additional_includes_configs = yaml .safe_load (f )
@@ -227,7 +253,16 @@ def _is_yaml_format_additional_includes(self):
227253 except Exception : # pylint: disable=broad-except
228254 return False
229255
230- def _load_yaml_format_additional_includes (self ):
256+ @property
257+ def _artifact_validate_result (self ):
258+ if not self ._is_artifact_includes :
259+ return _ValidationResultBuilder .success ()
260+ if self .__artifact_validate_result is None :
261+ # artifact validation is done on loading now, so trigger it here
262+ self ._load_artifact_additional_includes ()
263+ return self .__artifact_validate_result
264+
265+ def _load_artifact_additional_includes (self ):
231266 """
232267 Load the additional includes by yaml format.
233268 Addition includes is a list of include files, such as local paths and Azure Devops Artifacts.
@@ -247,7 +282,7 @@ def _load_yaml_format_additional_includes(self):
247282 :rtype additional_includes: List[str]
248283 """
249284 additional_includes , conflict_files = [], {}
250- self ._artifact_validate_result = _ValidationResultBuilder .success ()
285+ self .__artifact_validate_result = _ValidationResultBuilder .success ()
251286
252287 def merge_local_path_to_additional_includes (local_path , config_info ):
253288 additional_includes .append (local_path )
0 commit comments