Fixed concurrency issue in buffered upload that caused large files to… (Azure#20964)

gapra-msft · web-flow · commit 1f7ea0d661f2 · 2021-04-28T13:19:11.000-07:00
diff --git a/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobAsyncClient.java b/sdk/storage/azure-storage-blob/src/main/java/com/azure/storage/blob/BlobAsyncClient.java
@@ -33,7 +33,7 @@
 import com.azure.storage.common.Utility;
 import com.azure.storage.common.implementation.Constants;
 import com.azure.storage.common.implementation.StorageImplUtils;
-import com.azure.storage.common.implementation.UploadBufferPool;
+import com.azure.storage.common.implementation.BufferStagingArea;
 import com.azure.storage.common.implementation.UploadUtils;
 import reactor.core.publisher.Flux;
 import reactor.core.publisher.Mono;
@@ -573,21 +573,19 @@ private Mono<Response<BlockBlobItem>> uploadInChunks(BlockBlobAsyncClient blockB
         Lock progressLock = new ReentrantLock();
 
         // Validation done in the constructor.
-        /*
-        We use maxConcurrency + 1 for the number of buffers because one buffer will typically be being filled while the
-        others are being sent.
-         */
-        UploadBufferPool pool = new UploadBufferPool(parallelTransferOptions.getMaxConcurrency() + 1,
-            parallelTransferOptions.getBlockSizeLong(), BlockBlobClient.MAX_STAGE_BLOCK_BYTES_LONG);
+        BufferStagingArea pool = new BufferStagingArea(parallelTransferOptions.getBlockSizeLong(),
+            BlockBlobClient.MAX_STAGE_BLOCK_BYTES_LONG);
 
         Flux<ByteBuffer> chunkedSource = UploadUtils.chunkSource(data,
             ModelHelper.wrapBlobOptions(parallelTransferOptions));
 
         /*
          Write to the pool and upload the output.
+         maxConcurrency = 1 when writing means only 1 BufferAggregator will be accumulating at a time.
+         parallelTransferOptions.getMaxConcurrency() appends will be happening at once, so we guarantee buffering of
+         only concurrency + 1 chunks at a time.
          */
-        return chunkedSource.concatMap(pool::write)
-            .limitRate(parallelTransferOptions.getMaxConcurrency()) // This guarantees that concatMap will only buffer maxConcurrency * chunkSize data
+        return chunkedSource.flatMapSequential(pool::write, 1)
             .concatWith(Flux.defer(pool::flush))
             .flatMapSequential(bufferAggregator -> {
                 // Report progress as necessary.
@@ -605,7 +603,6 @@ private Mono<Response<BlockBlobItem>> uploadInChunks(BlockBlobAsyncClient blockB
                     // We only care about the stageBlock insofar as it was successful,
                     // but we need to collect the ids.
                     .map(x -> blockId)
-                    .doFinally(x -> pool.returnBuffer(bufferAggregator))
                     .flux();
             }, parallelTransferOptions.getMaxConcurrency())
             .collect(Collectors.toList())
diff --git a/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/specialized/BlockBlobAPITest.groovy b/sdk/storage/azure-storage-blob/src/test/java/com/azure/storage/blob/specialized/BlockBlobAPITest.groovy
@@ -1230,7 +1230,6 @@ class BlockBlobAPITest extends APISpec {
     // Only run these tests in live mode as they use variables that can't be captured.
     @Unroll
     @Requires({ liveMode() })
-    @Ignore("Timeouts")
     def "Async buffered upload"() {
         setup:
         def blobAsyncClient = getPrimaryServiceClientForWrites(bufferSize)
@@ -1379,7 +1378,6 @@ class BlockBlobAPITest extends APISpec {
     // Only run these tests in live mode as they use variables that can't be captured.
     @Unroll
     @Requires({ liveMode() })
-    @Ignore("Timeouts")
     def "Buffered upload chunked source"() {
         /*
         This test should validate that the upload should work regardless of what format the passed data is in because
diff --git a/sdk/storage/azure-storage-common/src/main/java/com/azure/storage/common/implementation/BufferStagingArea.java b/sdk/storage/azure-storage-common/src/main/java/com/azure/storage/common/implementation/BufferStagingArea.java
@@ -3,12 +3,9 @@
 
 package com.azure.storage.common.implementation;
 
-import com.azure.core.util.logging.ClientLogger;
 import reactor.core.publisher.Flux;
 
 import java.nio.ByteBuffer;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.LinkedBlockingQueue;
 
 /**
  * This type is to support the implementation of buffered upload only. It is mandatory that the caller has broken the
@@ -32,49 +29,21 @@
  *
  * RESERVED FOR INTERNAL USE ONLY
  */
-public final class UploadBufferPool {
-    private final ClientLogger logger = new ClientLogger(UploadBufferPool.class);
-
-    /*
-    Note that a blocking on a synchronized object is not the same as blocking on a reactive operation; blocking on this
-    queue will not compromise the async nature of this workflow. Fluxes themselves are internally synchronized to ensure
-    only one call to onNext happens at a time.
-     */
-    private final BlockingQueue<BufferAggregator> buffers;
-
-    private final int maxBuffs;
-
-    // The number of buffs we have allocated. We can query the queue for how many are available.
-    private int numBuffs;
+public final class BufferStagingArea {
 
     private final long buffSize;
 
     private BufferAggregator currentBuf;
 
     /**
      * Creates a new instance of UploadBufferPool
-     * @param numBuffs The number of buffers in the buffer pool.
      * @param buffSize The size of the buffers
+     * @param maxBuffSize The max size of the buffers
      */
-    public UploadBufferPool(final int numBuffs, final long buffSize, long maxBuffSize) {
-        /*
-        We require at least two buffers because it is possible that a given write will spill over into a second buffer.
-        We only need one overflow buffer because the max size of a ByteBuffer is assumed to be the size as a buffer in
-        the pool.
-         */
-        StorageImplUtils.assertInBounds("numBuffs", numBuffs, 2, Integer.MAX_VALUE);
-        this.maxBuffs = numBuffs;
-        buffers = new LinkedBlockingQueue<>(numBuffs);
-
-
+    public BufferStagingArea(final long buffSize, long maxBuffSize) {
         // These buffers will be used in calls to stageBlock, so they must be no greater than block size.
         StorageImplUtils.assertInBounds("buffSize", buffSize, 1, maxBuffSize);
         this.buffSize = buffSize;
-
-        // We prep the queue with two buffers in case there is overflow.
-        buffers.add(new BufferAggregator(this.buffSize));
-        buffers.add(new BufferAggregator(this.buffSize));
-        this.numBuffs = 2;
     }
 
     /*
@@ -88,9 +57,10 @@ public UploadBufferPool(final int numBuffs, final long buffSize, long maxBuffSiz
      * @return The {@code Flux<BufferAggregator>}
      */
     public Flux<BufferAggregator> write(ByteBuffer buf) {
+
         // Check if there's a buffer holding any data from a previous call to write. If not, get a new one.
         if (this.currentBuf == null) {
-            this.currentBuf = this.getBuffer();
+            this.currentBuf = new BufferAggregator(this.buffSize);
         }
 
         Flux<BufferAggregator> result;
@@ -126,37 +96,10 @@ public Flux<BufferAggregator> write(ByteBuffer buf) {
             means we'll only have to over flow once, and the buffer we overflow into will not be filled. This is the
             buffer we will write to on the next call to write().
              */
-            this.currentBuf = this.getBuffer();
+            this.currentBuf = new BufferAggregator(this.buffSize);
             this.currentBuf.append(buf);
         }
-        return result;
-    }
-
-    /*
-    Note that the upload method will be calling write sequentially as there is only one worker reading from the source
-    and calling write. Hence there is only one worker calling getBuffer at any time.
-     */
-    private BufferAggregator getBuffer() {
-        BufferAggregator result;
-        /*
-         There are no buffers in the queue and we have space to allocate one. We do not add the new buffer to the queue
-         because we want to make immediate use of it. This is effectively equivalent to a buffers.add(newBuffer) and
-         then result = buffers.pop()--because we only get here when the queue is empty, the buffer returned is the one
-         we just created. The new buffer will be added to buffers when it is returned to the pool.
-         */
-        if (this.buffers.isEmpty() && this.numBuffs < this.maxBuffs) {
-            result = new BufferAggregator(this.buffSize);
-            this.numBuffs++;
-        } else {
-            try {
-                // If empty, this will wait for an upload to finish and return a buffer.
-                result = this.buffers.take();
 
-            } catch (InterruptedException e) {
-                throw logger.logExceptionAsError(new IllegalStateException("BufferedUpload thread interrupted. Thread:"
-                    + Thread.currentThread().getId()));
-            }
-        }
         return result;
     }
 
@@ -177,19 +120,4 @@ public Flux<BufferAggregator> flush() {
         }
         return Flux.empty();
     }
-
-    /**
-     * Returns the ByteBuffer
-     * @param b The ByteBuffer to reset and return
-     */
-    public void returnBuffer(BufferAggregator b) {
-        // Reset the buffer aggregator.
-        b.reset();
-
-        try {
-            this.buffers.put(new BufferAggregator(this.buffSize));
-        } catch (InterruptedException e) {
-            throw logger.logExceptionAsError(new IllegalStateException("UploadFromStream thread interrupted."));
-        }
-    }
 }
diff --git a/sdk/storage/azure-storage-file-datalake/src/main/java/com/azure/storage/file/datalake/DataLakeFileAsyncClient.java b/sdk/storage/azure-storage-file-datalake/src/main/java/com/azure/storage/file/datalake/DataLakeFileAsyncClient.java
@@ -23,7 +23,7 @@
 import com.azure.storage.common.implementation.BufferAggregator;
 import com.azure.storage.common.implementation.Constants;
 import com.azure.storage.common.implementation.StorageImplUtils;
-import com.azure.storage.common.implementation.UploadBufferPool;
+import com.azure.storage.common.implementation.BufferStagingArea;
 import com.azure.storage.common.implementation.UploadUtils;
 import com.azure.storage.file.datalake.implementation.models.LeaseAccessConditions;
 import com.azure.storage.file.datalake.implementation.models.ModifiedAccessConditions;
@@ -35,13 +35,13 @@
 import com.azure.storage.file.datalake.models.DownloadRetryOptions;
 import com.azure.storage.file.datalake.models.FileExpirationOffset;
 import com.azure.storage.file.datalake.models.FileQueryAsyncResponse;
-import com.azure.storage.file.datalake.options.FileParallelUploadOptions;
-import com.azure.storage.file.datalake.options.FileQueryOptions;
 import com.azure.storage.file.datalake.models.FileRange;
 import com.azure.storage.file.datalake.models.FileReadAsyncResponse;
 import com.azure.storage.file.datalake.models.PathHttpHeaders;
 import com.azure.storage.file.datalake.models.PathInfo;
 import com.azure.storage.file.datalake.models.PathProperties;
+import com.azure.storage.file.datalake.options.FileParallelUploadOptions;
+import com.azure.storage.file.datalake.options.FileQueryOptions;
 import com.azure.storage.file.datalake.options.FileScheduleDeletionOptions;
 import reactor.core.publisher.Flux;
 import reactor.core.publisher.Mono;
@@ -349,20 +349,17 @@ private Mono<Response<PathInfo>> uploadInChunks(Flux<ByteBuffer> data, long file
         Lock progressLock = new ReentrantLock();
 
         // Validation done in the constructor.
-        /*
-        We use maxConcurrency + 1 for the number of buffers because one buffer will typically be being filled while the
-        others are being sent.
-         */
-        UploadBufferPool pool = new UploadBufferPool(parallelTransferOptions.getMaxConcurrency() + 1,
-            parallelTransferOptions.getBlockSizeLong(), MAX_APPEND_FILE_BYTES);
+        BufferStagingArea pool = new BufferStagingArea(parallelTransferOptions.getBlockSizeLong(), MAX_APPEND_FILE_BYTES);
 
         Flux<ByteBuffer> chunkedSource = UploadUtils.chunkSource(data, parallelTransferOptions);
 
         /*
          Write to the pool and upload the output.
+         maxConcurrency = 1 when writing means only 1 BufferAggregator will be accumulating at a time.
+         parallelTransferOptions.getMaxConcurrency() appends will be happening at once, so we guarantee buffering of
+         only concurrency + 1 chunks at a time.
          */
-        return chunkedSource.concatMap(pool::write)
-            .limitRate(parallelTransferOptions.getMaxConcurrency()) // This guarantees that concatMap will only buffer maxConcurrency * chunkSize data
+        return chunkedSource.flatMapSequential(pool::write, 1)
             .concatWith(Flux.defer(pool::flush))
             /* Map the data to a tuple 3, of buffer, buffer length, buffer offset */
             .map(bufferAggregator -> Tuples.of(bufferAggregator, bufferAggregator.length(), 0L))
@@ -389,10 +386,10 @@ private Mono<Response<PathInfo>> uploadInChunks(Flux<ByteBuffer> data, long file
                 Flux<ByteBuffer> progressData = ProgressReporter.addParallelProgressReporting(
                     bufferAggregator.asFlux(), parallelTransferOptions.getProgressReceiver(),
                     progressLock, totalProgress);
+                final long offset = currentBufferLength + currentOffset;
                 return appendWithResponse(progressData, currentOffset, currentBufferLength, null,
                     requestConditions.getLeaseId())
-                    .doFinally(x -> pool.returnBuffer(bufferAggregator))
-                    .map(resp -> currentBufferLength + currentOffset) /* End of file after append to pass to flush. */
+                    .map(resp -> offset) /* End of file after append to pass to flush. */
                     .flux();
             }, parallelTransferOptions.getMaxConcurrency())
             .last()
diff --git a/sdk/storage/azure-storage-file-datalake/src/test/java/com/azure/storage/file/datalake/FileAPITest.groovy b/sdk/storage/azure-storage-file-datalake/src/test/java/com/azure/storage/file/datalake/FileAPITest.groovy
@@ -7,14 +7,13 @@ import com.azure.identity.DefaultAzureCredentialBuilder
 import com.azure.storage.blob.BlobUrlParts
 import com.azure.storage.blob.models.BlobErrorCode
 import com.azure.storage.blob.models.BlobStorageException
-import com.azure.storage.blob.models.BlockListType
-import com.azure.storage.file.datalake.models.DownloadRetryOptions
 import com.azure.storage.common.ParallelTransferOptions
 import com.azure.storage.common.ProgressReceiver
 import com.azure.storage.common.implementation.Constants
 import com.azure.storage.file.datalake.models.AccessTier
 import com.azure.storage.file.datalake.models.DataLakeRequestConditions
 import com.azure.storage.file.datalake.models.DataLakeStorageException
+import com.azure.storage.file.datalake.models.DownloadRetryOptions
 import com.azure.storage.file.datalake.models.FileExpirationOffset
 import com.azure.storage.file.datalake.models.FileQueryArrowField
 import com.azure.storage.file.datalake.models.FileQueryArrowFieldType
@@ -36,11 +35,11 @@ import com.azure.storage.file.datalake.models.RolePermissions
 import com.azure.storage.file.datalake.options.FileParallelUploadOptions
 import com.azure.storage.file.datalake.options.FileQueryOptions
 import com.azure.storage.file.datalake.options.FileScheduleDeletionOptions
-import spock.lang.Ignore
 import reactor.core.Exceptions
 import reactor.core.publisher.Flux
 import reactor.core.publisher.Hooks
 import reactor.test.StepVerifier
+import spock.lang.Ignore
 import spock.lang.Requires
 import spock.lang.Unroll
 
@@ -2296,7 +2295,6 @@ class FileAPITest extends APISpec {
 
     @Unroll
     @Requires({ liveMode() }) // Test uploads large amount of data
-    @Ignore("Timeouts")
     def "Async buffered upload"() {
         setup:
         DataLakeFileAsyncClient facWrite = getPrimaryServiceClientForWrites(bufferSize)
@@ -2368,7 +2366,6 @@ class FileAPITest extends APISpec {
 
     @Unroll
     @Requires({ liveMode() })
-    @Ignore // Hanging in pipeline
     def "Buffered upload with reporter"() {
         setup:
         DataLakeFileAsyncClient fac = fscAsync.getFileAsyncClient(generatePathName())
@@ -2402,7 +2399,6 @@ class FileAPITest extends APISpec {
 
     @Unroll
     @Requires({liveMode()}) // Test uploads large amount of data
-    @Ignore("Timeouts")
     def "Buffered upload chunked source"() {
         setup:
         DataLakeFileAsyncClient facWrite = getPrimaryServiceClientForWrites(bufferSize)
@@ -2624,7 +2620,6 @@ class FileAPITest extends APISpec {
 
     @Unroll
     @Requires({ liveMode() })
-//    @Ignore("failing in ci")
     def "Buffered upload options"() {
         setup:
         DataLakeFileAsyncClient fac = fscAsync.getFileAsyncClient(generatePathName())