Skip to content

Commit 6a83ffa

Browse files
[Storage] Add progress callback to download_blob methods (Azure#24276)
1 parent c59eb64 commit 6a83ffa

12 files changed

+1517
-182
lines changed

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -840,6 +840,11 @@ def download_blob(self, offset=None, length=None, **kwargs):
840840
The number of parallel connections with which to download.
841841
:keyword str encoding:
842842
Encoding to decode the downloaded bytes. Default is None, i.e. no decoding.
843+
:keyword progress_hook:
844+
A callback to track the progress of a long running download. The signature is
845+
function(current: int, total: int) where current is the number of bytes transfered
846+
so far, and total is the total size of the download.
847+
:paramtype progress_hook: Callable[[int, int], None]
843848
:keyword int timeout:
844849
The timeout parameter is expressed in seconds. This method may make
845850
multiple calls to the Azure service and the timeout will apply to

sdk/storage/azure-storage-blob/azure/storage/blob/_container_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,11 @@ def download_blob(self, blob, offset=None, length=None, **kwargs):
11341134
The number of parallel connections with which to download.
11351135
:keyword str encoding:
11361136
Encoding to decode the downloaded bytes. Default is None, i.e. no decoding.
1137+
:keyword progress_hook:
1138+
A callback to track the progress of a long running download. The signature is
1139+
function(current: int, total: int) where current is the number of bytes transfered
1140+
so far, and total is the total size of the download.
1141+
:paramtype progress_hook: Callable[[int, int], None]
11371142
:keyword int timeout:
11381143
The timeout parameter is expressed in seconds. This method may make
11391144
multiple calls to the Azure service and the timeout will apply to

sdk/storage/azure-storage-blob/azure/storage/blob/_download.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import warnings
1212
from io import BytesIO
13-
from typing import Iterator
13+
from typing import Iterator, Union
1414

1515
import requests
1616
from azure.core.exceptions import HttpResponseError, ServiceResponseError
@@ -81,6 +81,7 @@ def __init__(
8181
parallel=None,
8282
validate_content=None,
8383
encryption_options=None,
84+
progress_hook=None,
8485
**kwargs
8586
):
8687
self.client = client
@@ -96,6 +97,7 @@ def __init__(
9697
self.stream = stream
9798
self.stream_lock = threading.Lock() if parallel else None
9899
self.progress_lock = threading.Lock() if parallel else None
100+
self.progress_hook = progress_hook
99101

100102
# For a parallel download, the stream is always seekable, so we note down the current position
101103
# in order to seek to the right place when out-of-order chunks come in
@@ -143,6 +145,9 @@ def _update_progress(self, length):
143145
else:
144146
self.progress_total += length
145147

148+
if self.progress_hook:
149+
self.progress_hook(self.progress_total, self.total_size)
150+
146151
def _write_to_stream(self, chunk_data, chunk_start):
147152
if self.stream_lock:
148153
with self.stream_lock: # pylint: disable=not-context-manager
@@ -322,6 +327,7 @@ def __init__(
322327
self._encoding = encoding
323328
self._validate_content = validate_content
324329
self._encryption_options = encryption_options or {}
330+
self._progress_hook = kwargs.pop('progress_hook', None)
325331
self._request_options = kwargs
326332
self._location_mode = None
327333
self._download_complete = False
@@ -514,7 +520,6 @@ def readall(self):
514520
"""Download the contents of this blob.
515521
516522
This operation is blocking until all data is downloaded.
517-
518523
:rtype: bytes or str
519524
"""
520525
stream = BytesIO()
@@ -583,6 +588,9 @@ def readinto(self, stream):
583588

584589
# Write the content to the user stream
585590
stream.write(self._current_content)
591+
if self._progress_hook:
592+
self._progress_hook(len(self._current_content), self.size)
593+
586594
if self._download_complete:
587595
return self.size
588596

@@ -604,6 +612,7 @@ def readinto(self, stream):
604612
validate_content=self._validate_content,
605613
encryption_options=self._encryption_options,
606614
use_location=self._location_mode,
615+
progress_hook=self._progress_hook,
607616
**self._request_options
608617
)
609618
if parallel:

sdk/storage/azure-storage-blob/azure/storage/blob/aio/_blob_client_async.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,11 @@ async def download_blob(self, offset=None, length=None, **kwargs):
465465
The number of parallel connections with which to download.
466466
:keyword str encoding:
467467
Encoding to decode the downloaded bytes. Default is None, i.e. no decoding.
468+
:keyword progress_hook:
469+
An async callback to track the progress of a long running download. The signature is
470+
function(current: int, total: int) where current is the number of bytes transfered
471+
so far, and total is the total size of the download.
472+
:paramtype progress_hook: Callable[[int, int], Awaitable[None]]
468473
:keyword int timeout:
469474
The timeout parameter is expressed in seconds. This method may make
470475
multiple calls to the Azure service and the timeout will apply to

sdk/storage/azure-storage-blob/azure/storage/blob/aio/_container_client_async.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,11 @@ async def download_blob(self, blob, offset=None, length=None, **kwargs):
994994
The number of parallel connections with which to download.
995995
:keyword str encoding:
996996
Encoding to decode the downloaded bytes. Default is None, i.e. no decoding.
997+
:keyword progress_hook:
998+
An async callback to track the progress of a long running download. The signature is
999+
function(current: int, total: int) where current is the number of bytes transfered
1000+
so far, and total is the total size of the download.
1001+
:paramtype progress_hook: Callable[[int, int], Awaitable[None]]
9971002
:keyword int timeout:
9981003
The timeout parameter is expressed in seconds. This method may make
9991004
multiple calls to the Azure service and the timeout will apply to

sdk/storage/azure-storage-blob/azure/storage/blob/aio/_download_async.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ async def _update_progress(self, length):
6767
else:
6868
self.progress_total += length
6969

70+
if self.progress_hook:
71+
await self.progress_hook(self.progress_total, self.total_size)
72+
7073
async def _write_to_stream(self, chunk_data, chunk_start):
7174
if self.stream_lock:
7275
async with self.stream_lock: # pylint: disable=not-async-context-manager
@@ -220,6 +223,7 @@ def __init__(
220223
self._encoding = encoding
221224
self._validate_content = validate_content
222225
self._encryption_options = encryption_options or {}
226+
self._progress_hook = kwargs.pop('progress_hook', None)
223227
self._request_options = kwargs
224228
self._location_mode = None
225229
self._download_complete = False
@@ -472,6 +476,9 @@ async def readinto(self, stream):
472476

473477
# Write the content to the user stream
474478
stream.write(self._current_content)
479+
if self._progress_hook:
480+
await self._progress_hook(len(self._current_content), self.size)
481+
475482
if self._download_complete:
476483
return self.size
477484

@@ -493,6 +500,7 @@ async def readinto(self, stream):
493500
validate_content=self._validate_content,
494501
encryption_options=self._encryption_options,
495502
use_location=self._location_mode,
503+
progress_hook=self._progress_hook,
496504
**self._request_options)
497505

498506
dl_tasks = downloader.get_chunk_offsets()

sdk/storage/azure-storage-blob/tests/recordings/test_get_blob.test_get_blob_progress_chunked.yaml

Lines changed: 434 additions & 0 deletions
Large diffs are not rendered by default.

sdk/storage/azure-storage-blob/tests/recordings/test_get_blob.test_get_blob_progress_single_get.yaml

Lines changed: 196 additions & 0 deletions
Large diffs are not rendered by default.

sdk/storage/azure-storage-blob/tests/recordings/test_get_blob_async.test_get_blob_progress_chunked.yaml

Lines changed: 304 additions & 0 deletions
Large diffs are not rendered by default.

sdk/storage/azure-storage-blob/tests/recordings/test_get_blob_async.test_get_blob_progress_single_get.yaml

Lines changed: 140 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)