Skip to content

Commit bfd3e81

Browse files
authored
[Datalake]Batch delete files or empty directories (Azure#21269)
1 parent 4691c52 commit bfd3e81

11 files changed

+3126
-5
lines changed

sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_file_system_client.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# license information.
55
# --------------------------------------------------------------------------
66
import functools
7-
from typing import Optional, Any, Union
7+
from typing import Optional, Any, Union, Iterator
88

99

1010
try:
@@ -14,6 +14,7 @@
1414
from urllib2 import quote, unquote # type: ignore
1515
import six
1616

17+
from azure.core.pipeline.transport import HttpResponse
1718
from azure.core.pipeline import Pipeline
1819
from azure.core.exceptions import HttpResponseError
1920
from azure.core.paging import ItemPaged
@@ -809,6 +810,71 @@ def _get_root_directory_client(self):
809810
"""
810811
return self.get_directory_client('/')
811812

813+
def delete_files(self, *files, **kwargs):
814+
# type: (...) -> Iterator[HttpResponse]
815+
"""Marks the specified files or empty directories for deletion.
816+
817+
The files/empty directories are later deleted during garbage collection.
818+
819+
If a delete retention policy is enabled for the service, then this operation soft deletes the
820+
files/empty directories and retains the files or snapshots for specified number of days.
821+
After specified number of days, files' data is removed from the service during garbage collection.
822+
Soft deleted files/empty directories are accessible through :func:`list_deleted_paths()`.
823+
824+
:param files:
825+
The files/empty directories to delete. This can be a single file/empty directory, or multiple values can
826+
be supplied, where each value is either the name of the file/directory (str) or
827+
FileProperties/DirectoryProperties.
828+
829+
.. note::
830+
When the file/dir type is dict, here's a list of keys, value rules.
831+
832+
blob name:
833+
key: 'name', value type: str
834+
if the file modified or not:
835+
key: 'if_modified_since', 'if_unmodified_since', value type: datetime
836+
etag:
837+
key: 'etag', value type: str
838+
match the etag or not:
839+
key: 'match_condition', value type: MatchConditions
840+
lease:
841+
key: 'lease_id', value type: Union[str, LeaseClient]
842+
timeout for subrequest:
843+
key: 'timeout', value type: int
844+
845+
:type files: list[str], list[dict],
846+
or list[Union[~azure.storage.filedatalake.FileProperties, ~azure.storage.filedatalake.DirectoryProperties]
847+
:keyword ~datetime.datetime if_modified_since:
848+
A DateTime value. Azure expects the date value passed in to be UTC.
849+
If timezone is included, any non-UTC datetimes will be converted to UTC.
850+
If a date is passed in without timezone info, it is assumed to be UTC.
851+
Specify this header to perform the operation only
852+
if the resource has been modified since the specified time.
853+
:keyword ~datetime.datetime if_unmodified_since:
854+
A DateTime value. Azure expects the date value passed in to be UTC.
855+
If timezone is included, any non-UTC datetimes will be converted to UTC.
856+
If a date is passed in without timezone info, it is assumed to be UTC.
857+
Specify this header to perform the operation only if
858+
the resource has not been modified since the specified date/time.
859+
:keyword bool raise_on_any_failure:
860+
This is a boolean param which defaults to True. When this is set, an exception
861+
is raised even if there is a single operation failure.
862+
:keyword int timeout:
863+
The timeout parameter is expressed in seconds.
864+
:return: An iterator of responses, one for each blob in order
865+
:rtype: Iterator[~azure.core.pipeline.transport.HttpResponse]
866+
867+
.. admonition:: Example:
868+
869+
.. literalinclude:: ../samples/datalake_samples_file_system_async.py
870+
:start-after: [START batch_delete_files_or_empty_directories]
871+
:end-before: [END batch_delete_files_or_empty_directories]
872+
:language: python
873+
:dedent: 4
874+
:caption: Deleting multiple files or empty directories.
875+
"""
876+
return self._container_client.delete_blobs(*files, **kwargs)
877+
812878
def get_directory_client(self, directory # type: Union[DirectoryProperties, str]
813879
):
814880
# type: (...) -> DataLakeDirectoryClient

sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def __init__(self, **kwargs):
186186
self.content_settings = ContentSettings(**kwargs)
187187

188188

189-
class PathProperties(object):
189+
class PathProperties(DictMixin):
190190
"""Path properties listed by get_paths api.
191191
192192
:ivar str name: the full path for a file or directory.

sdk/storage/azure-storage-file-datalake/azure/storage/filedatalake/aio/_file_system_client_async.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
# pylint: disable=invalid-overridden-method
88
import functools
99
from typing import ( # pylint: disable=unused-import
10-
Union, Optional, Any, Dict, TYPE_CHECKING
11-
)
10+
Union, Optional, Any, Dict, TYPE_CHECKING,
11+
AsyncIterator)
12+
13+
from azure.core.pipeline.transport import AsyncHttpResponse
1214

1315
from azure.core.exceptions import HttpResponseError
1416
from azure.core.tracing.decorator import distributed_trace
@@ -714,6 +716,72 @@ async def delete_file(self, file, # type: Union[FileProperties, str]
714716
await file_client.delete_file(**kwargs)
715717
return file_client
716718

719+
@distributed_trace_async
720+
async def delete_files(self, *files, **kwargs):
721+
# type: (...) -> AsyncIterator[AsyncHttpResponse]
722+
"""Marks the specified files or empty directories for deletion.
723+
724+
The files/empty directories are later deleted during garbage collection.
725+
726+
If a delete retention policy is enabled for the service, then this operation soft deletes the
727+
files/empty directories and retains the files or snapshots for specified number of days.
728+
After specified number of days, files' data is removed from the service during garbage collection.
729+
Soft deleted files/empty directories are accessible through :func:`list_deleted_paths()`.
730+
731+
:param files:
732+
The files/empty directories to delete. This can be a single file/empty directory, or multiple values can
733+
be supplied, where each value is either the name of the file/directory (str) or
734+
FileProperties/DirectoryProperties.
735+
736+
.. note::
737+
When the file/dir type is dict, here's a list of keys, value rules.
738+
739+
blob name:
740+
key: 'name', value type: str
741+
if the file modified or not:
742+
key: 'if_modified_since', 'if_unmodified_since', value type: datetime
743+
etag:
744+
key: 'etag', value type: str
745+
match the etag or not:
746+
key: 'match_condition', value type: MatchConditions
747+
lease:
748+
key: 'lease_id', value type: Union[str, LeaseClient]
749+
timeout for subrequest:
750+
key: 'timeout', value type: int
751+
752+
:type files: list[str], list[dict],
753+
or list[Union[~azure.storage.filedatalake.FileProperties, ~azure.storage.filedatalake.DirectoryProperties]
754+
:keyword ~datetime.datetime if_modified_since:
755+
A DateTime value. Azure expects the date value passed in to be UTC.
756+
If timezone is included, any non-UTC datetimes will be converted to UTC.
757+
If a date is passed in without timezone info, it is assumed to be UTC.
758+
Specify this header to perform the operation only
759+
if the resource has been modified since the specified time.
760+
:keyword ~datetime.datetime if_unmodified_since:
761+
A DateTime value. Azure expects the date value passed in to be UTC.
762+
If timezone is included, any non-UTC datetimes will be converted to UTC.
763+
If a date is passed in without timezone info, it is assumed to be UTC.
764+
Specify this header to perform the operation only if
765+
the resource has not been modified since the specified date/time.
766+
:keyword bool raise_on_any_failure:
767+
This is a boolean param which defaults to True. When this is set, an exception
768+
is raised even if there is a single operation failure.
769+
:keyword int timeout:
770+
The timeout parameter is expressed in seconds.
771+
:return: An iterator of responses, one for each blob in order
772+
:rtype: AsyncIterator[~azure.core.pipeline.transport.AsyncHttpResponse]
773+
774+
.. admonition:: Example:
775+
776+
.. literalinclude:: ../samples/datalake_samples_file_system_async.py
777+
:start-after: [START batch_delete_files_or_empty_directories]
778+
:end-before: [END batch_delete_files_or_empty_directories]
779+
:language: python
780+
:dedent: 4
781+
:caption: Deleting multiple files or empty directories.
782+
"""
783+
return await self._container_client.delete_blobs(*files, **kwargs)
784+
717785
@distributed_trace_async
718786
async def _undelete_path(self, deleted_path_name, deletion_id, **kwargs):
719787
# type: (str, str, **Any) -> Union[DataLakeDirectoryClient, DataLakeFileClient]

sdk/storage/azure-storage-file-datalake/samples/datalake_samples_file_system.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,55 @@ def create_file_from_file_system(self):
209209

210210
file_system_client.delete_file_system()
211211

212+
# [START batch_delete_files_or_empty_directories]
213+
def batch_delete_files_or_empty_directories(self):
214+
from azure.storage.filedatalake import FileSystemClient
215+
file_system_client = FileSystemClient.from_connection_string(self.connection_string, "filesystem")
216+
217+
file_system_client.create_file_system()
218+
219+
data = b'hello world'
220+
221+
try:
222+
# create file1
223+
file_system_client.get_file_client('file1').upload_data(data, overwrite=True)
224+
225+
# create file2, then pass file properties in batch delete later
226+
file2 = file_system_client.get_file_client('file2')
227+
file2.upload_data(data, overwrite=True)
228+
file2_properties = file2.get_file_properties()
229+
230+
# create file3 and batch delete it later only etag matches this file3 etag
231+
file3 = file_system_client.get_file_client('file3')
232+
file3.upload_data(data, overwrite=True)
233+
file3_etag = file3.get_file_properties().etag
234+
235+
# create dir1. Empty directory can be deleted using delete_files
236+
file_system_client.get_directory_client('dir1').create_directory(),
237+
238+
# create dir2, then pass directory properties in batch delete later
239+
dir2 = file_system_client.get_directory_client('dir2')
240+
dir2.create_directory()
241+
dir2_properties = dir2.get_directory_properties()
242+
except:
243+
pass
244+
245+
# Act
246+
response = file_system_client.delete_files(
247+
'file1',
248+
file2_properties,
249+
{'name': 'file3', 'etag': file3_etag},
250+
'dir1',
251+
dir2_properties,
252+
raise_on_any_failure=False
253+
)
254+
print("total number of sub-responses:" + len(response))
255+
print(response[0].status_code)
256+
print(response[2].status_code)
257+
print(response[3].status_code)
258+
# [END batch_delete_files_or_empty_directories]
259+
260+
212261
if __name__ == '__main__':
213262
sample = FileSystemSamples()
214263
sample.file_system_sample()
@@ -217,3 +266,4 @@ def create_file_from_file_system(self):
217266
sample.list_paths_in_file_system()
218267
sample.get_file_client_from_file_system()
219268
sample.create_file_from_file_system()
269+
sample.batch_delete_files_or_empty_directories()

sdk/storage/azure-storage-file-datalake/samples/datalake_samples_file_system_async.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,64 @@ async def create_file_from_file_system(self):
217217

218218
await file_system_client.delete_file_system()
219219

220+
# [START batch_delete_files_or_empty_directories]
221+
async def batch_delete_files_or_empty_directories(self):
222+
from azure.storage.filedatalake.aio import FileSystemClient
223+
file_system_client = FileSystemClient.from_connection_string(self.connection_string, "filesystemforcreate")
224+
225+
async with file_system_client:
226+
await file_system_client.create_file_system()
227+
228+
data = b'hello world'
229+
230+
try:
231+
# create file1
232+
await file_system_client.get_file_client('file1').upload_data(data, overwrite=True)
233+
234+
# create file2, then pass file properties in batch delete later
235+
file2 = file_system_client.get_file_client('file2')
236+
await file2.upload_data(data, overwrite=True)
237+
file2_properties = await file2.get_file_properties()
238+
239+
# create file3 and batch delete it later only etag matches this file3 etag
240+
file3 = file_system_client.get_file_client('file3')
241+
await file3.upload_data(data, overwrite=True)
242+
file3_props = await file3.get_file_properties()
243+
file3_etag = file3_props.etag
244+
245+
# create dir1
246+
# empty directory can be deleted using delete_files
247+
await file_system_client.get_directory_client('dir1').create_directory(),
248+
249+
# create dir2, then pass directory properties in batch delete later
250+
dir2 = file_system_client.get_directory_client('dir2')
251+
await dir2.create_directory()
252+
dir2_properties = await dir2.get_directory_properties()
253+
254+
except:
255+
pass
256+
257+
# Act
258+
response = await self._to_list(await file_system_client.delete_files(
259+
'file1',
260+
file2_properties,
261+
{'name': 'file3', 'etag': file3_etag},
262+
'dir1',
263+
dir2_properties,
264+
raise_on_any_failure=False
265+
))
266+
print("total number of sub-responses:" + len(response))
267+
print(response[0].status_code)
268+
print(response[2].status_code)
269+
print(response[3].status_code)
270+
271+
async def _to_list(self, async_iterator):
272+
result = []
273+
async for item in async_iterator:
274+
result.append(item)
275+
return result
276+
# [END batch_delete_files_or_empty_directories]
277+
220278

221279
async def run():
222280
sample = FileSystemSamplesAsync()
@@ -226,6 +284,7 @@ async def run():
226284
await sample.list_paths_in_file_system()
227285
await sample.get_file_client_from_file_system()
228286
await sample.create_file_from_file_system()
287+
await sample.batch_delete_files_or_empty_directories()
229288

230289
if __name__ == '__main__':
231290
loop = asyncio.get_event_loop()

0 commit comments

Comments
 (0)