Skip to content

Commit a7c8aa5

Browse files
authored
Add large file download warning and add AzCopy example to upload warning (Azure#28125)
* fix bug by replacing 'working_dir' with 'path' * Add file size warning for gen2 uploads * Add file size warning for gen2 uploads * Add upload/download warning with AzCopy example for files > 100MB * Update file size warning message * Revert unintended change * Address pylint and cspell errors * Address cspell errors * Make sure proper cloud endpoint is used in warning for URL
1 parent 016b4e2 commit a7c8aa5

File tree

3 files changed

+46
-7
lines changed

3 files changed

+46
-7
lines changed

sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_blob_storage_helper.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
44

5-
# pylint: disable=client-accepts-api-version-keyword,too-many-instance-attributes,client-method-missing-type-annotations,missing-client-constructor-parameter-kwargs
5+
# pylint: disable=client-accepts-api-version-keyword,too-many-instance-attributes,client-method-missing-type-annotations,missing-client-constructor-parameter-kwargs,logging-format-interpolation
66

77
import logging
88
import os
@@ -31,6 +31,7 @@
3131
upload_directory,
3232
upload_file,
3333
)
34+
from azure.ai.ml._azure_environments import _get_cloud_details
3435
from azure.ai.ml.constants._common import STORAGE_AUTH_MISMATCH_ERROR
3536
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, MlException, ValidationException
3637
from azure.core.exceptions import ResourceNotFoundError
@@ -44,6 +45,7 @@
4445

4546
class BlobStorageClient:
4647
def __init__(self, credential: str, account_url: str, container_name: Optional[str] = None):
48+
self.account_name = account_url.split(".")[0].split("//")[1]
4749
self.service_client = BlobServiceClient(account_url=account_url, credential=credential)
4850
self.upload_to_root_container = None
4951
if container_name:
@@ -90,8 +92,11 @@ def upload(
9092
# warn if large file (> 100 MB)
9193
file_size, _ = get_directory_size(source)
9294
file_size_in_mb = file_size / 10**6
95+
cloud = _get_cloud_details()
96+
cloud_endpoint = cloud['storage_endpoint'] # make sure proper cloud endpoint is used
97+
full_storage_url = f"https://{self.account_name}.blob.{cloud_endpoint}/{self.container}/{dest}"
9398
if file_size_in_mb > 100:
94-
module_logger.warning(FILE_SIZE_WARNING)
99+
module_logger.warning(FILE_SIZE_WARNING.format(source=source, destination=full_storage_url))
95100

96101
# start upload
97102
if os.path.isdir(source):
@@ -212,6 +217,7 @@ def download(
212217
"""
213218
try:
214219
my_list = list(self.container_client.list_blobs(name_starts_with=starts_with, include="metadata"))
220+
download_size_in_mb = 0
215221
for item in my_list:
216222
blob_name = item.name[len(starts_with) :].lstrip("/") or Path(starts_with).name
217223
target_path = Path(destination, blob_name).resolve()
@@ -221,6 +227,16 @@ def download(
221227
continue
222228

223229
blob_content = self.container_client.download_blob(item)
230+
231+
# check if total size of download has exceeded 100 MB
232+
# make sure proper cloud endpoint is used
233+
cloud = _get_cloud_details()
234+
cloud_endpoint = cloud['storage_endpoint']
235+
full_storage_url = f"https://{self.account_name}.blob.{cloud_endpoint}/{self.container}/{starts_with}"
236+
download_size_in_mb += (blob_content.size / 10**6)
237+
if download_size_in_mb > 100:
238+
module_logger.warning(FILE_SIZE_WARNING.format(source=full_storage_url, destination=destination))
239+
224240
blob_content = blob_content.content_as_bytes(max_concurrency)
225241
target_path.parent.mkdir(parents=True, exist_ok=True)
226242
with target_path.open("wb") as file:

sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_constants.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@
2828
CHANGED_ASSET_PATH_MSG_NO_PERSONAL_DATA = "The code asset is already linked to an asset."
2929
EMPTY_DIRECTORY_ERROR = "Directory {0} is empty. path or local_path must be a non-empty directory."
3030
FILE_SIZE_WARNING = (
31-
"Your file exceeds 100 MB. If you experience low upload speeds or latency, "
32-
"we recommend using the AzCopy tool for this file transfer. "
33-
"See https://docs.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information."
31+
"Your file exceeds 100 MB. If you experience low speeds, latency, or broken connections, we recommend using "
32+
"the AzCopyv10 tool for this file transfer.\n\nExample: azcopy copy '{source}' '{destination}' " # cspell:disable-line
33+
"\n\nSee https://docs.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information."
3434
)
3535
INVALID_MLTABLE_METADATA_SCHEMA_MSG = "Invalid MLTable metadata schema"
3636
INVALID_MLTABLE_METADATA_SCHEMA_ERROR = (

sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_gen2_storage_helper.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# ---------------------------------------------------------
44

5-
# pylint: disable=client-accepts-api-version-keyword,too-many-instance-attributes,client-method-missing-type-annotations,missing-client-constructor-parameter-kwargs
5+
# pylint: disable=client-accepts-api-version-keyword,too-many-instance-attributes,client-method-missing-type-annotations,missing-client-constructor-parameter-kwargs,logging-format-interpolation
66

77
import logging
88
import os
@@ -14,15 +14,17 @@
1414

1515
from colorama import Fore
1616

17-
from azure.ai.ml._artifacts._constants import UPLOAD_CONFIRMATION
17+
from azure.ai.ml._artifacts._constants import UPLOAD_CONFIRMATION, FILE_SIZE_WARNING
1818
from azure.ai.ml._utils._asset_utils import (
1919
AssetNotChangedError,
2020
IgnoreFile,
2121
_build_metadata_dict,
2222
generate_asset_id,
23+
get_directory_size,
2324
upload_directory,
2425
upload_file,
2526
)
27+
from azure.ai.ml._azure_environments import _get_cloud_details
2628
from azure.ai.ml.constants._common import STORAGE_AUTH_MISMATCH_ERROR
2729
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, MlException, ValidationException
2830
from azure.core.exceptions import ResourceExistsError
@@ -34,6 +36,7 @@
3436
class Gen2StorageClient:
3537
def __init__(self, credential: str, file_system: str, account_url: str):
3638
service_client = DataLakeServiceClient(account_url=account_url, credential=credential)
39+
self.account_name = account_url.split(".")[0].split("//")[1]
3740
self.file_system = file_system
3841
self.file_system_client = service_client.get_file_system_client(file_system=file_system)
3942
try:
@@ -77,6 +80,16 @@ def upload(
7780
# configure progress bar description
7881
msg = Fore.GREEN + f"Uploading {formatted_path}"
7982

83+
# warn if large file (> 100 MB)
84+
file_size, _ = get_directory_size(source)
85+
file_size_in_mb = file_size / 10**6
86+
87+
cloud = _get_cloud_details()
88+
cloud_endpoint = cloud['storage_endpoint'] # make sure proper cloud endpoint is used
89+
full_storage_url = f"https://{self.account_name}.dfs.{cloud_endpoint}/{self.file_system}/{dest}"
90+
if file_size_in_mb > 100:
91+
module_logger.warning(FILE_SIZE_WARNING.format(source=source, destination=full_storage_url))
92+
8093
# start upload
8194
self.directory_client = self.file_system_client.get_directory_client(asset_id)
8295
self.check_blob_exists()
@@ -159,6 +172,7 @@ def download(self, starts_with: str, destination: str = Path.home()) -> None:
159172
prefix `starts_with` to the destination folder."""
160173
try:
161174
mylist = self.file_system_client.get_paths(path=starts_with)
175+
download_size_in_mb = 0
162176
for item in mylist:
163177
file_name = item.name[len(starts_with) :].lstrip("/") or Path(starts_with).name
164178

@@ -168,6 +182,15 @@ def download(self, starts_with: str, destination: str = Path.home()) -> None:
168182

169183
target_path = Path(destination, file_name)
170184
file_client = self.file_system_client.get_file_client(item.name)
185+
186+
# check if total size of download has exceeded 100 MB
187+
cloud = _get_cloud_details()
188+
cloud_endpoint = cloud['storage_endpoint'] # make sure proper cloud endpoint is used
189+
full_storage_url = f"https://{self.account_name}.dfs.{cloud_endpoint}/{self.file_system}/{starts_with}"
190+
download_size_in_mb += (file_client.get_file_properties().size / 10**6)
191+
if download_size_in_mb > 100:
192+
module_logger.warning(FILE_SIZE_WARNING.format(source=full_storage_url, destination=destination))
193+
171194
file_content = file_client.download_file().readall()
172195
try:
173196
os.makedirs(str(target_path.parent), exist_ok=True)

0 commit comments

Comments
 (0)