Skip to content

Commit 21bd526

Browse files
Fix retry policy for GoneException when writes fail with 410/0 from Service (Azure#17172)
1 parent c1a8768 commit 21bd526

File tree

5 files changed

+77
-6
lines changed

5 files changed

+77
-6
lines changed

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GoneException.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
*/
1919
public class GoneException extends CosmosException {
2020

21+
private boolean basedOn410ResponseFromService = false;
22+
2123
/**
2224
* Instantiates a new Gone exception.
2325
*
@@ -159,4 +161,12 @@ public GoneException(String message,
159161
String requestUriString) {
160162
super(message, innerException, headers, HttpConstants.StatusCodes.GONE, requestUriString);
161163
}
164+
165+
public boolean isBasedOn410ResponseFromService() {
166+
return this.basedOn410ResponseFromService;
167+
}
168+
169+
public void setIsBasedOn410ResponseFromService() {
170+
this.basedOn410ResponseFromService = true;
171+
}
162172
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,12 @@ public Mono<ShouldRetryResult> shouldRetry(Exception exception) {
163163
return Mono.just(ShouldRetryResult.noRetry());
164164
} else if (exception instanceof GoneException &&
165165
!request.isReadOnly() &&
166-
BridgeInternal.hasSendingRequestStarted((CosmosException)exception)) {
166+
BridgeInternal.hasSendingRequestStarted((CosmosException)exception) &&
167+
!((GoneException)exception).isBasedOn410ResponseFromService()) {
167168

168169
logger.warn(
169-
"Operation will NOT be retried. Write operations can not be retried safely when sending the request " +
170+
"Operation will NOT be retried. Write operations which failed due to transient transport errors " +
171+
"can not be retried safely when sending the request " +
170172
"to the service because they aren't idempotent. Current attempt {}, Exception: ",
171173
this.attemptCount,
172174
exception);

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/HttpTransportClient.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -863,15 +863,19 @@ private Mono<StoreResponse> createErrorResponseFromHttpResponse(String resourceA
863863
break;
864864
} else {
865865
// Have the request URL in the exception message for debugging purposes.
866-
exception = new GoneException(
866+
GoneException goneExceptionFromService = new GoneException(
867867
String.format(
868868
RMResources.ExceptionMessage,
869869
RMResources.Gone),
870870
response.headers(),
871871
request.uri());
872+
goneExceptionFromService.setIsBasedOn410ResponseFromService();
872873

873-
exception.getResponseHeaders().put(HttpConstants.HttpHeaders.ACTIVITY_ID,
874-
activityId);
874+
goneExceptionFromService.getResponseHeaders().put(
875+
HttpConstants.HttpHeaders.ACTIVITY_ID,
876+
activityId);
877+
878+
exception = goneExceptionFromService;
875879
break;
876880
}
877881
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -791,7 +791,10 @@ private void messageReceived(final ChannelHandlerContext context, final RntbdRes
791791
cause = new PartitionKeyRangeGoneException(error, lsn, partitionKeyRangeId, responseHeaders);
792792
break;
793793
default:
794-
cause = new GoneException(error, lsn, partitionKeyRangeId, responseHeaders);
794+
GoneException goneExceptionFromService =
795+
new GoneException(error, lsn, partitionKeyRangeId, responseHeaders);
796+
goneExceptionFromService.setIsBasedOn410ResponseFromService();
797+
cause = goneExceptionFromService;
795798
break;
796799
}
797800
break;

sdk/cosmos/azure-cosmos/src/test/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicyTest.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,58 @@ public void shouldNotRetryFlushedWriteWithGoneExceptionButForceAddressRefresh()
151151
assertThat(shouldRetryResult.backOffTime).isNull();
152152
}
153153

154+
/**
155+
* GoneException for write which is already sent to the wire but based on receiving
156+
* an actual response from the Service with 410 Status Code and SubStatusCode 0
157+
* should result in retry
158+
* shouldRetryResult. ShouldRetryResult
159+
*/
160+
@Test(groups = { "unit" }, timeOut = TIMEOUT)
161+
public void shouldRetryFlushedWriteWithGoneExceptionFromService() {
162+
RxDocumentServiceRequest request = RxDocumentServiceRequest.create(
163+
mockDiagnosticsClientContext(),
164+
OperationType.Create,
165+
ResourceType.Document);
166+
GoneAndRetryWithRetryPolicy goneAndRetryWithRetryPolicy =
167+
new GoneAndRetryWithRetryPolicy(request, 30);
168+
169+
Supplier<GoneException> goneExceptionForFlushedRequestSupplier = () -> {
170+
GoneException goneExceptionForFlushedRequest = new GoneException();
171+
BridgeInternal.setSendingRequestStarted(goneExceptionForFlushedRequest, true);
172+
goneExceptionForFlushedRequest.setIsBasedOn410ResponseFromService();
173+
return goneExceptionForFlushedRequest;
174+
};
175+
176+
Mono<IRetryPolicy.ShouldRetryResult> singleShouldRetry = goneAndRetryWithRetryPolicy
177+
.shouldRetry(goneExceptionForFlushedRequestSupplier.get());
178+
IRetryPolicy.ShouldRetryResult shouldRetryResult = singleShouldRetry.block();
179+
assertThat(shouldRetryResult.shouldRetry).isTrue();
180+
assertThat(shouldRetryResult.policyArg.getValue0()).isTrue();
181+
assertThat(shouldRetryResult.policyArg.getValue3()).isEqualTo(1);
182+
assertThat(shouldRetryResult.backOffTime.getSeconds()).isEqualTo(0);
183+
184+
singleShouldRetry = goneAndRetryWithRetryPolicy.shouldRetry(goneExceptionForFlushedRequestSupplier.get());
185+
shouldRetryResult = singleShouldRetry.block();
186+
assertThat(shouldRetryResult.shouldRetry).isTrue();
187+
assertThat(shouldRetryResult.policyArg.getValue0()).isTrue();
188+
assertThat(shouldRetryResult.policyArg.getValue3()).isEqualTo(2);
189+
assertThat(shouldRetryResult.backOffTime.getSeconds()).isEqualTo(1);
190+
191+
singleShouldRetry = goneAndRetryWithRetryPolicy.shouldRetry(goneExceptionForFlushedRequestSupplier.get());
192+
shouldRetryResult = singleShouldRetry.block();
193+
assertThat(shouldRetryResult.shouldRetry).isTrue();
194+
assertThat(shouldRetryResult.policyArg.getValue0()).isTrue();
195+
assertThat(shouldRetryResult.policyArg.getValue3()).isEqualTo(3);
196+
assertThat(shouldRetryResult.backOffTime.getSeconds()).isEqualTo(2);
197+
198+
singleShouldRetry = goneAndRetryWithRetryPolicy.shouldRetry(goneExceptionForFlushedRequestSupplier.get());
199+
shouldRetryResult = singleShouldRetry.block();
200+
assertThat(shouldRetryResult.shouldRetry).isTrue();
201+
assertThat(shouldRetryResult.policyArg.getValue0()).isTrue();
202+
assertThat(shouldRetryResult.policyArg.getValue3()).isEqualTo(4);
203+
assertThat(shouldRetryResult.backOffTime.getSeconds()).isEqualTo(4);
204+
}
205+
154206
/**
155207
* RequestTimeoutExceptions should not be retried for read or write - no address cache refresh expected
156208
* shouldRetryResult. ShouldRetryResult

0 commit comments

Comments
 (0)