Skip to content

Commit bffbffd

Browse files
Blob hash migration guide and samples. (Azure#17759)
Co-authored-by: jschrepp-MSFT <41338290+jschrepp-MSFT@users.noreply.github.com>
1 parent 1a95f19 commit bffbffd

File tree

2 files changed

+214
-0
lines changed

2 files changed

+214
-0
lines changed

sdk/storage/Azure.Storage.Blobs/AzureStorageNetMigrationV12.md

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Familiarity with the legacy client library is assumed. For those new to the Azur
2323
- [Downloading Blobs from a Container](#downloading-blobs-from-a-container)
2424
- [Listing Blobs in a Container](#listing-blobs-in-a-container)
2525
- [Generate a SAS](#generate-a-sas)
26+
- [Content Hashes](#content-hashes)
2627
- [Additional information](#additional-information)
2728

2829
## Migration benefits
@@ -470,6 +471,93 @@ BlobSasBuilder sasBuilder = new BlobSasBuilder()
470471
};
471472
```
472473

474+
### Content Hashes
475+
476+
#### Blob Content MD5
477+
478+
V11 calculated blob content MD5 for validation on download by default, assuming there was a stored MD5 in the blob properties. Calculation and storage on upload was opt-in. Note that this value is not generated or validated by the service, and is only retained for the client to validate against.
479+
480+
v11
481+
482+
```csharp
483+
BlobRequestOptions options = new BlobRequestOptions
484+
{
485+
ChecksumOptions = new ChecksumOptions()
486+
{
487+
DisableContentMD5Validation = false, // true to disable download content validation
488+
StoreContentMD5 = false // true to calculate content MD5 on upload and store property
489+
}
490+
};
491+
```
492+
493+
V12 does not have an automated mechanism for blob content validation. It must be done per-request by the user.
494+
495+
v12
496+
497+
```C# Snippet:SampleSnippetsBlobMigration_BlobContentMD5
498+
// upload with blob content hash
499+
await blobClient.UploadAsync(
500+
contentStream,
501+
new BlobUploadOptions()
502+
{
503+
HttpHeaders = new BlobHttpHeaders()
504+
{
505+
ContentHash = precalculatedContentHash
506+
}
507+
});
508+
509+
// download whole blob and validate against stored blob content hash
510+
Response<BlobDownloadInfo> response = await blobClient.DownloadAsync();
511+
512+
Stream downloadStream = response.Value.Content;
513+
byte[] blobContentMD5 = response.Value.Details.BlobContentHash ?? response.Value.ContentHash;
514+
// validate stream against hash in your workflow
515+
```
516+
517+
#### Transactional MD5 and CRC64
518+
519+
Transactional hashes are not stored and have a lifespan of the request they are calculated for. Transactional hashes are verified by the service on upload.
520+
521+
V11 provided transactional hashing on uploads and downloads through opt-in request options. MD5 and Storage's custom CRC64 were supported. The SDK calculated and validated these hashes automatically when enabled. The calculation worked on any upload or download method.
522+
523+
v11
524+
525+
```csharp
526+
BlobRequestOptions options = new BlobRequestOptions
527+
{
528+
ChecksumOptions = new ChecksumOptions()
529+
{
530+
// request fails if both are true
531+
UseTransactionalMD5 = false, // true to use MD5 on all blob content transactions
532+
UseTransactionalCRC64 = false // true to use CRC64 on all blob content transactions
533+
}
534+
};
535+
```
536+
537+
V12 does not currently provide this functionality. Users who manage their own individual upload and download HTTP requests can provide a precalculated MD5 on upload and access the MD5 in the response object. V12 currently offers no API to request a transactional CRC64.
538+
539+
```C# Snippet:SampleSnippetsBlobMigration_TransactionalMD5
540+
// upload a block with transactional hash calculated by user
541+
await blockBlobClient.StageBlockAsync(
542+
blockId,
543+
blockContentStream,
544+
transactionalContentHash: precalculatedBlockHash);
545+
546+
// upload more blocks as needed
547+
548+
// commit block list
549+
await blockBlobClient.CommitBlockListAsync(blockList);
550+
551+
// download any range of blob with transactional MD5 requested (maximum 4 MB for downloads)
552+
Response<BlobDownloadInfo> response = await blockBlobClient.DownloadAsync(
553+
range: new HttpRange(length: 4 * Constants.MB), // a range must be provided; here we use transactional download max size
554+
rangeGetContentHash: true);
555+
556+
Stream downloadStream = response.Value.Content;
557+
byte[] transactionalMD5 = response.Value.ContentHash;
558+
// validate stream against hash in your workflow
559+
```
560+
473561
## Additional information
474562

475563
### Samples

sdk/storage/Azure.Storage.Blobs/samples/Sample03_Migrations.cs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
using Azure.Identity;
99
using Azure.Storage;
1010
using Azure.Storage.Blobs;
11+
using Azure.Storage.Blobs.Specialized;
1112
using Azure.Storage.Blobs.Models;
1213
using Azure.Storage.Sas;
1314
using NUnit.Framework;
1415
using System.Text;
1516
using System.Threading.Tasks;
17+
using System.Security.Cryptography;
1618

1719
namespace Azure.Storage.Blobs.Samples
1820
{
@@ -608,5 +610,129 @@ public async Task SasBuilderIdentifier()
608610
await container.DeleteIfExistsAsync();
609611
}
610612
}
613+
614+
[Test]
615+
public async Task BlobContentHash()
616+
{
617+
string data = "hello world";
618+
using Stream contentStream = new MemoryStream(Encoding.UTF8.GetBytes(data));
619+
620+
// precalculate hash for sample
621+
byte[] precalculatedContentHash;
622+
using (var md5 = MD5.Create())
623+
{
624+
precalculatedContentHash = md5.ComputeHash(contentStream);
625+
}
626+
contentStream.Position = 0;
627+
628+
// setup blob
629+
string containerName = Randomize("sample-container");
630+
string blobName = Randomize("sample-file");
631+
var containerClient = new BlobContainerClient(ConnectionString, containerName);
632+
633+
try
634+
{
635+
containerClient.Create();
636+
var blobClient = containerClient.GetBlobClient(blobName);
637+
638+
#region Snippet:SampleSnippetsBlobMigration_BlobContentMD5
639+
// upload with blob content hash
640+
await blobClient.UploadAsync(
641+
contentStream,
642+
new BlobUploadOptions()
643+
{
644+
HttpHeaders = new BlobHttpHeaders()
645+
{
646+
ContentHash = precalculatedContentHash
647+
}
648+
});
649+
650+
// download whole blob and validate against stored blob content hash
651+
Response<BlobDownloadInfo> response = await blobClient.DownloadAsync();
652+
653+
Stream downloadStream = response.Value.Content;
654+
byte[] blobContentMD5 = response.Value.Details.BlobContentHash ?? response.Value.ContentHash;
655+
// validate stream against hash in your workflow
656+
#endregion
657+
658+
byte[] downloadedBytes;
659+
using (var memStream = new MemoryStream())
660+
{
661+
await downloadStream.CopyToAsync(memStream);
662+
downloadedBytes = memStream.ToArray();
663+
}
664+
665+
Assert.AreEqual(data, Encoding.UTF8.GetString(downloadedBytes));
666+
Assert.IsTrue(Enumerable.SequenceEqual(precalculatedContentHash, blobContentMD5));
667+
}
668+
finally
669+
{
670+
await containerClient.DeleteIfExistsAsync();
671+
}
672+
}
673+
674+
[Test]
675+
public async Task TransactionalMD5()
676+
{
677+
string data = "hello world";
678+
string blockId = Convert.ToBase64String(Guid.NewGuid().ToByteArray());
679+
List<string> blockList = new List<string> { blockId };
680+
using Stream blockContentStream = new MemoryStream(Encoding.UTF8.GetBytes(data));
681+
682+
// precalculate hash for sample
683+
byte[] precalculatedBlockHash;
684+
using (var md5 = MD5.Create())
685+
{
686+
precalculatedBlockHash = md5.ComputeHash(blockContentStream);
687+
}
688+
blockContentStream.Position = 0;
689+
690+
// setup blob
691+
string containerName = Randomize("sample-container");
692+
string blobName = Randomize("sample-file");
693+
var containerClient = new BlobContainerClient(ConnectionString, containerName);
694+
695+
try
696+
{
697+
containerClient.Create();
698+
var blockBlobClient = containerClient.GetBlockBlobClient(blobName);
699+
700+
#region Snippet:SampleSnippetsBlobMigration_TransactionalMD5
701+
// upload a block with transactional hash calculated by user
702+
await blockBlobClient.StageBlockAsync(
703+
blockId,
704+
blockContentStream,
705+
transactionalContentHash: precalculatedBlockHash);
706+
707+
// upload more blocks as needed
708+
709+
// commit block list
710+
await blockBlobClient.CommitBlockListAsync(blockList);
711+
712+
// download any range of blob with transactional MD5 requested (maximum 4 MB for downloads)
713+
Response<BlobDownloadInfo> response = await blockBlobClient.DownloadAsync(
714+
range: new HttpRange(length: 4 * Constants.MB), // a range must be provided; here we use transactional download max size
715+
rangeGetContentHash: true);
716+
717+
Stream downloadStream = response.Value.Content;
718+
byte[] transactionalMD5 = response.Value.ContentHash;
719+
// validate stream against hash in your workflow
720+
#endregion
721+
722+
byte[] downloadedBytes;
723+
using (var memStream = new MemoryStream())
724+
{
725+
await downloadStream.CopyToAsync(memStream);
726+
downloadedBytes = memStream.ToArray();
727+
}
728+
729+
Assert.AreEqual(data, Encoding.UTF8.GetString(downloadedBytes));
730+
Assert.IsTrue(Enumerable.SequenceEqual(precalculatedBlockHash, transactionalMD5));
731+
}
732+
finally
733+
{
734+
await containerClient.DeleteIfExistsAsync();
735+
}
736+
}
611737
}
612738
}

0 commit comments

Comments
 (0)