Skip to content

Commit fce66e6

Browse files
[Storage] Add client-side encryption version 2.0, supporting AES-GCM-256 (Azure#24798)
1 parent 4674d97 commit fce66e6

File tree

119 files changed

+24540
-9113
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+24540
-9113
lines changed

sdk/storage/azure-storage-blob/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
### Features Added
66
- Added support for service version 2021-08-06.
7+
- Added a new version of client-side encryption for blobs (version 2.0) which utilizes AES-GCM-256 encryption.
8+
If you are currently using client-side encryption, it is **highly recommended** to switch to a form of server-side
9+
encryption (Customer-Provided Key, Encryption Scope, etc.) or version 2.0 of client-side encryption. The encryption
10+
version can be specified on any client constructor via the `encryption_version` keyword (`encryption_version='2.0'`).
711

812
## 12.12.0 (2022-05-09)
913

sdk/storage/azure-storage-blob/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ Defaults to `False`.
331331
Use the following keyword arguments when instantiating a client to configure encryption:
332332

333333
* __require_encryption__ (bool): If set to True, will enforce that objects are encrypted and decrypt them.
334+
* __encryption_version__ (str): Specifies the version of encryption to use. Current options are `'2.0'` or `'1.0'` and
335+
the default value is `'1.0'`. Version 1.0 is deprecated, and it is **highly recommended** to use version 2.0.
334336
* __key_encryption_key__ (object): The user-provided key-encryption-key. The instance must implement the following methods:
335337
- `wrap_key(key)`--wraps the specified key using an algorithm of the user's choice.
336338
- `get_key_wrap_algorithm()`--returns the algorithm used to wrap the specified symmetric key.

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,11 +355,14 @@ def _upload_blob_options( # pylint:disable=too-many-statements
355355
raise ValueError("Encryption required but no key was provided.")
356356
encryption_options = {
357357
'required': self.require_encryption,
358+
'version': self.encryption_version,
358359
'key': self.key_encryption_key,
359360
'resolver': self.key_resolver_function,
360361
}
361362
if self.key_encryption_key is not None:
362-
cek, iv, encryption_data = generate_blob_encryption_data(self.key_encryption_key)
363+
cek, iv, encryption_data = generate_blob_encryption_data(
364+
self.key_encryption_key,
365+
self.encryption_version)
363366
encryption_options['cek'] = cek
364367
encryption_options['vector'] = iv
365368
encryption_options['data'] = encryption_data
@@ -422,6 +425,8 @@ def _upload_blob_options( # pylint:disable=too-many-statements
422425
kwargs['client'] = self._client.block_blob
423426
kwargs['data'] = data
424427
elif blob_type == BlobType.PageBlob:
428+
if self.encryption_version == '2.0' and (self.require_encryption or self.key_encryption_key is not None):
429+
raise ValueError("Encryption version 2.0 does not currently support page blobs.")
425430
kwargs['client'] = self._client.page_blob
426431
elif blob_type == BlobType.AppendBlob:
427432
if self.require_encryption or (self.key_encryption_key is not None):
@@ -4126,5 +4131,5 @@ def _get_container_client(self): # pylint: disable=client-method-missing-kwargs
41264131
"{}://{}".format(self.scheme, self.primary_hostname), container_name=self.container_name,
41274132
credential=self._raw_credential, api_version=self.api_version, _configuration=self._config,
41284133
_pipeline=_pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
4129-
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
4130-
key_resolver_function=self.key_resolver_function)
4134+
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
4135+
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_service_client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -683,8 +683,8 @@ def get_container_client(self, container):
683683
self.url, container_name=container_name,
684684
credential=self.credential, api_version=self.api_version, _configuration=self._config,
685685
_pipeline=_pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
686-
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
687-
key_resolver_function=self.key_resolver_function)
686+
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
687+
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)
688688

689689
def get_blob_client(
690690
self, container, # type: Union[ContainerProperties, str]
@@ -736,5 +736,5 @@ def get_blob_client(
736736
self.url, container_name=container_name, blob_name=blob_name, snapshot=snapshot,
737737
credential=self.credential, api_version=self.api_version, _configuration=self._config,
738738
_pipeline=_pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
739-
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
740-
key_resolver_function=self.key_resolver_function)
739+
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
740+
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)

sdk/storage/azure-storage-blob/azure/storage/blob/_container_client.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -323,16 +323,16 @@ def _rename_container(self, new_name, **kwargs):
323323
"""
324324
lease = kwargs.pop('lease', None)
325325
try:
326-
kwargs['source_lease_id'] = lease.id # type: str
326+
kwargs['source_lease_id'] = lease.id
327327
except AttributeError:
328328
kwargs['source_lease_id'] = lease
329329
try:
330330
renamed_container = ContainerClient(
331331
"{}://{}".format(self.scheme, self.primary_hostname), container_name=new_name,
332332
credential=self.credential, api_version=self.api_version, _configuration=self._config,
333333
_pipeline=self._pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
334-
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
335-
key_resolver_function=self.key_resolver_function)
334+
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
335+
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)
336336
renamed_container._client.container.rename(self.container_name, **kwargs) # pylint: disable = protected-access
337337
return renamed_container
338338
except HttpResponseError as error:
@@ -619,8 +619,8 @@ def _get_blob_service_client(self): # pylint: disable=client-method-missing-kwa
619619
"{}://{}".format(self.scheme, self.primary_hostname),
620620
credential=self._raw_credential, api_version=self.api_version, _configuration=self._config,
621621
_location_mode=self._location_mode, _hosts=self._hosts, require_encryption=self.require_encryption,
622-
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function,
623-
_pipeline=_pipeline)
622+
encryption_version=self.encryption_version, key_encryption_key=self.key_encryption_key,
623+
key_resolver_function=self.key_resolver_function, _pipeline=_pipeline)
624624

625625
@distributed_trace
626626
def get_container_access_policy(self, **kwargs):
@@ -1608,5 +1608,5 @@ def get_blob_client(
16081608
self.url, container_name=self.container_name, blob_name=blob_name, snapshot=snapshot,
16091609
credential=self.credential, api_version=self.api_version, _configuration=self._config,
16101610
_pipeline=_pipeline, _location_mode=self._location_mode, _hosts=self._hosts,
1611-
require_encryption=self.require_encryption, key_encryption_key=self.key_encryption_key,
1612-
key_resolver_function=self.key_resolver_function)
1611+
require_encryption=self.require_encryption, encryption_version=self.encryption_version,
1612+
key_encryption_key=self.key_encryption_key, key_resolver_function=self.key_resolver_function)

sdk/storage/azure-storage-blob/azure/storage/blob/_download.py

Lines changed: 62 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,26 @@
1616
from azure.core.exceptions import HttpResponseError, ServiceResponseError
1717

1818
from azure.core.tracing.common import with_current_context
19-
from ._shared.encryption import decrypt_blob
19+
from ._shared.encryption import (
20+
adjust_blob_size_for_encryption,
21+
decrypt_blob,
22+
get_adjusted_download_range_and_offset,
23+
is_encryption_v2,
24+
parse_encryption_data
25+
)
2026
from ._shared.request_handlers import validate_and_format_range_headers
2127
from ._shared.response_handlers import process_storage_error, parse_length_from_content_range
22-
from ._deserialize import get_page_ranges_result
28+
from ._deserialize import deserialize_blob_properties, get_page_ranges_result
2329

2430

25-
def process_range_and_offset(start_range, end_range, length, encryption):
31+
def process_range_and_offset(start_range, end_range, length, encryption_options, encryption_data):
2632
start_offset, end_offset = 0, 0
27-
if encryption.get("key") is not None or encryption.get("resolver") is not None:
28-
if start_range is not None:
29-
# Align the start of the range along a 16 byte block
30-
start_offset = start_range % 16
31-
start_range -= start_offset
32-
33-
# Include an extra 16 bytes for the IV if necessary
34-
# Because of the previous offsetting, start_range will always
35-
# be a multiple of 16.
36-
if start_range > 0:
37-
start_offset += 16
38-
start_range -= 16
39-
40-
if length is not None:
41-
# Align the end of the range along a 16 byte block
42-
end_offset = 15 - (end_range % 16)
43-
end_range += end_offset
33+
if encryption_options.get("key") is not None or encryption_options.get("resolver") is not None:
34+
return get_adjusted_download_range_and_offset(
35+
start_range,
36+
end_range,
37+
length,
38+
encryption_data)
4439

4540
return (start_range, end_range), (start_offset, end_offset)
4641

@@ -81,6 +76,7 @@ def __init__(
8176
parallel=None,
8277
validate_content=None,
8378
encryption_options=None,
79+
encryption_data=None,
8480
progress_hook=None,
8581
**kwargs
8682
):
@@ -108,6 +104,7 @@ def __init__(
108104

109105
# Encryption
110106
self.encryption_options = encryption_options
107+
self.encryption_data = encryption_data
111108

112109
# Parameters for each get operation
113110
self.validate_content = validate_content
@@ -183,7 +180,7 @@ def _do_optimize(self, given_range_start, given_range_end):
183180

184181
def _download_chunk(self, chunk_start, chunk_end):
185182
download_range, offset = process_range_and_offset(
186-
chunk_start, chunk_end, chunk_end, self.encryption_options
183+
chunk_start, chunk_end, chunk_end, self.encryption_options, self.encryption_data
187184
)
188185

189186
# No need to download the empty chunk from server if there's no data in the chunk to be downloaded.
@@ -335,6 +332,10 @@ def __init__(
335332
self._file_size = None
336333
self._non_empty_ranges = None
337334
self._response = None
335+
self._encryption_data = None
336+
337+
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
338+
self._get_encryption_data_request()
338339

339340
# The service only provides transactional MD5s for chunks under 4MB.
340341
# If validate_content is on, get only self.MAX_CHUNK_GET_SIZE for the first
@@ -349,7 +350,11 @@ def __init__(
349350
initial_request_end = initial_request_start + self._first_get_size - 1
350351

351352
self._initial_range, self._initial_offset = process_range_and_offset(
352-
initial_request_start, initial_request_end, self._end_range, self._encryption_options
353+
initial_request_start,
354+
initial_request_end,
355+
self._end_range,
356+
self._encryption_options,
357+
self._encryption_data
353358
)
354359

355360
self._response = self._initial_request()
@@ -376,6 +381,21 @@ def __init__(
376381
def __len__(self):
377382
return self.size
378383

384+
def _get_encryption_data_request(self):
385+
# Save current request cls
386+
download_cls = self._request_options.pop('cls', None)
387+
# Adjust cls for get_properties
388+
self._request_options['cls'] = deserialize_blob_properties
389+
390+
properties = self._clients.blob.get_properties(**self._request_options)
391+
# This will return None if there is no encryption metadata or there are parsing errors.
392+
# That is acceptable here, the proper error will be caught and surfaced when attempting
393+
# to decrypt the blob.
394+
self._encryption_data = parse_encryption_data(properties.metadata)
395+
396+
# Restore cls for download
397+
self._request_options['cls'] = download_cls
398+
379399
def _initial_request(self):
380400
range_header, range_validation = validate_and_format_range_headers(
381401
self._initial_range[0],
@@ -405,6 +425,9 @@ def _initial_request(self):
405425
# Parse the total file size and adjust the download size if ranges
406426
# were specified
407427
self._file_size = parse_length_from_content_range(response.properties.content_range)
428+
# Remove any extra encryption data size from blob size
429+
self._file_size = adjust_blob_size_for_encryption(self._file_size, self._encryption_data)
430+
408431
if self._end_range is not None:
409432
# Use the end range index unless it is over the end of the file
410433
self.size = min(self._file_size, self._end_range - self._start_range + 1)
@@ -465,7 +488,8 @@ def _initial_request(self):
465488

466489
# If the file is small, the download is complete at this point.
467490
# If file size is large, download the rest of the file in chunks.
468-
if response.properties.size != self.size:
491+
# Use less than here for encryption.
492+
if response.properties.size < self.size:
469493
if self._request_options.get("modified_access_conditions"):
470494
self._request_options["modified_access_conditions"].if_match = response.properties.etag
471495
else:
@@ -494,18 +518,25 @@ def chunks(self):
494518
if self._end_range is not None:
495519
# Use the end range index unless it is over the end of the file
496520
data_end = min(self._file_size, self._end_range + 1)
521+
522+
data_start = self._initial_range[1] + 1 # Start where the first download ended
523+
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
524+
if is_encryption_v2(self._encryption_data):
525+
data_start = (self._start_range or 0) + len(self._current_content)
526+
497527
iter_downloader = _ChunkDownloader(
498528
client=self._clients.blob,
499529
non_empty_ranges=self._non_empty_ranges,
500530
total_size=self.size,
501531
chunk_size=self._config.max_chunk_get_size,
502532
current_progress=self._first_get_size,
503-
start_range=self._initial_range[1] + 1, # start where the first download ended
533+
start_range=data_start,
504534
end_range=data_end,
505535
stream=None,
506536
parallel=False,
507537
validate_content=self._validate_content,
508538
encryption_options=self._encryption_options,
539+
encryption_data=self._encryption_data,
509540
use_location=self._location_mode,
510541
**self._request_options
511542
)
@@ -599,18 +630,24 @@ def readinto(self, stream):
599630
# Use the length unless it is over the end of the file
600631
data_end = min(self._file_size, self._end_range + 1)
601632

633+
data_start = self._initial_range[1] + 1 # Start where the first download ended
634+
# For encryption V2 only, adjust start to the end of the fetched data rather than download size
635+
if is_encryption_v2(self._encryption_data):
636+
data_start = (self._start_range or 0) + len(self._current_content)
637+
602638
downloader = _ChunkDownloader(
603639
client=self._clients.blob,
604640
non_empty_ranges=self._non_empty_ranges,
605641
total_size=self.size,
606642
chunk_size=self._config.max_chunk_get_size,
607643
current_progress=self._first_get_size,
608-
start_range=self._initial_range[1] + 1, # Start where the first download ended
644+
start_range=data_start,
609645
end_range=data_end,
610646
stream=stream,
611647
parallel=parallel,
612648
validate_content=self._validate_content,
613649
encryption_options=self._encryption_options,
650+
encryption_data=self._encryption_data,
614651
use_location=self._location_mode,
615652
progress_hook=self._progress_hook,
616653
**self._request_options

sdk/storage/azure-storage-blob/azure/storage/blob/_shared/base_client.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# --------------------------------------------------------------------------
66
import logging
77
import uuid
8+
import warnings
89
from typing import ( # pylint: disable=unused-import
910
Optional,
1011
Any,
@@ -105,8 +106,15 @@ def __init__(
105106
self._hosts = {LocationMode.PRIMARY: primary_hostname, LocationMode.SECONDARY: secondary_hostname}
106107

107108
self.require_encryption = kwargs.get("require_encryption", False)
109+
self.encryption_version = kwargs.get("encryption_version", "1.0")
108110
self.key_encryption_key = kwargs.get("key_encryption_key")
109111
self.key_resolver_function = kwargs.get("key_resolver_function")
112+
if self.key_encryption_key and self.encryption_version == '1.0':
113+
warnings.warn("This client has been configured to use encryption with version 1.0. \
114+
Version 1.0 is deprecated and no longer considered secure. It is highly \
115+
recommended that you switch to using version 2.0. The version can be \
116+
specified using the 'encryption_version' keyword.")
117+
110118
self._config, self._pipeline = self._create_pipeline(self.credential, storage_sdk=service, **kwargs)
111119

112120
def __enter__(self):

0 commit comments

Comments
 (0)