Skip to content

Commit 02608fe

Browse files
Adding retry context on query diagnostics by redesigning retry context logic (Azure#20746)
* Adding request level diagnostic information for queries * Removing unused variable * Fixing spot bugs Refactoring spotbug suppressions as the class ClientSideRequestStatistics is moved to implementation * test fixes * PR comments and minor improvements * Fixing tests * Retry context refactor and adding on query diagnostics * fixing null pointer * incremental changes * adding test cases * fixing warning * cleaning code for PR * fixing test case * resolving comments and fixing test cases * resolving comments * resolving comments * adding session failure test * fixing session test * adding null check again in BridgeInternal.java for non document resources * adding null check for readablity uniformity Co-authored-by: Bhaskar Mallapragada <bhaskar.mallapragada@microsoft.com>
1 parent 5280a5c commit 02608fe

38 files changed

+1097
-328
lines changed

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/BridgeInternal.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import com.azure.cosmos.implementation.RequestTimeline;
2222
import com.azure.cosmos.implementation.Resource;
2323
import com.azure.cosmos.implementation.ResourceResponse;
24+
import com.azure.cosmos.implementation.RetryContext;
2425
import com.azure.cosmos.implementation.RxDocumentServiceRequest;
2526
import com.azure.cosmos.implementation.RxDocumentServiceResponse;
2627
import com.azure.cosmos.implementation.SerializationDiagnosticsContext;
@@ -159,7 +160,6 @@ public static <T> FeedResponse<T> createFeedResponseWithQueryMetrics(
159160
boolean useEtagAsContinuation,
160161
boolean isNoChangesResponse,
161162
CosmosDiagnostics cosmosDiagnostics) {
162-
163163
FeedResponse<T> feedResponseWithQueryMetrics = ModelBridgeInternal.createFeedResponseWithQueryMetrics(
164164
results,
165165
headers,
@@ -551,9 +551,8 @@ public static void recordResponse(CosmosDiagnostics cosmosDiagnostics,
551551
}
552552

553553
@Warning(value = INTERNAL_USE_ONLY_WARNING)
554-
public static void recordRetryContext(CosmosDiagnostics cosmosDiagnostics,
555-
RxDocumentServiceRequest request) {
556-
cosmosDiagnostics.clientSideRequestStatistics().recordRetryContext(request);
554+
public static void recordRetryContextEndTime(CosmosDiagnostics cosmosDiagnostics) {
555+
cosmosDiagnostics.clientSideRequestStatistics().recordRetryContextEndTime();
557556
}
558557

559558
@Warning(value = INTERNAL_USE_ONLY_WARNING)
@@ -809,4 +808,13 @@ public static SqlQuerySpec getOfferQuerySpecFromResourceId(CosmosAsyncContainer
809808
public static CosmosAsyncContainer getControlContainerFromThroughputGlobalControlConfig(GlobalThroughputControlConfig globalControlConfig) {
810809
return globalControlConfig.getControlContainer();
811810
}
811+
812+
@Warning(value = INTERNAL_USE_ONLY_WARNING)
813+
public static RetryContext getRetryContext(CosmosDiagnostics cosmosDiagnostics) {
814+
if(cosmosDiagnostics != null && cosmosDiagnostics.clientSideRequestStatistics() != null) {
815+
return cosmosDiagnostics.clientSideRequestStatistics().getRetryContext();
816+
} else {
817+
return null;
818+
}
819+
}
812820
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClearingSessionContainerClientRetryPolicy.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ public void onBeforeSendRequest(RxDocumentServiceRequest request) {
3737
this.retryPolicy.onBeforeSendRequest(request);
3838
}
3939

40+
@Override
41+
public RetryContext getRetryContext() {
42+
return this.retryPolicy.getRetryContext();
43+
}
44+
4045
@Override
4146
public Mono<ShouldRetryResult> shouldRetry(Exception e) {
4247

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Licensed under the MIT License.
33
package com.azure.cosmos.implementation;
44

5+
import com.azure.cosmos.BridgeInternal;
56
import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList;
67
import com.azure.cosmos.implementation.directconnectivity.WebExceptionUtility;
78
import com.azure.cosmos.CosmosException;
@@ -52,15 +53,16 @@ public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext,
5253
boolean enableEndpointDiscovery,
5354
ThrottlingRetryOptions throttlingRetryOptions) {
5455

55-
this.throttlingRetry = new ResourceThrottleRetryPolicy(
56-
throttlingRetryOptions.getMaxRetryAttemptsOnThrottledRequests(),
57-
throttlingRetryOptions.getMaxRetryWaitTime());
5856
this.globalEndpointManager = globalEndpointManager;
5957
this.failoverRetryCount = 0;
6058
this.enableEndpointDiscovery = enableEndpointDiscovery;
6159
this.sessionTokenRetryCount = 0;
6260
this.canUseMultipleWriteLocations = false;
6361
this.cosmosDiagnostics = diagnosticsClientContext.createDiagnostics();
62+
this.throttlingRetry = new ResourceThrottleRetryPolicy(
63+
throttlingRetryOptions.getMaxRetryAttemptsOnThrottledRequests(),
64+
throttlingRetryOptions.getMaxRetryWaitTime(),
65+
BridgeInternal.getRetryContext(this.getCosmosDiagnostics()));
6466
}
6567

6668
@Override
@@ -289,6 +291,16 @@ public void onBeforeSendRequest(RxDocumentServiceRequest request) {
289291
request.requestContext.routeToLocation(this.locationEndpoint);
290292
}
291293
}
294+
295+
@Override
296+
public com.azure.cosmos.implementation.RetryContext getRetryContext() {
297+
return BridgeInternal.getRetryContext(this.getCosmosDiagnostics());
298+
}
299+
300+
CosmosDiagnostics getCosmosDiagnostics() {
301+
return cosmosDiagnostics;
302+
}
303+
292304
private static class RetryContext {
293305

294306
public int retryCount;

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public ClientSideRequestStatistics(DiagnosticsClientContext diagnosticsClientCon
6363
this.connectionMode = ConnectionMode.DIRECT;
6464
this.metadataDiagnosticsContext = new MetadataDiagnosticsContext();
6565
this.serializationDiagnosticsContext = new SerializationDiagnosticsContext();
66+
this.retryContext = new RetryContext();
6667
}
6768

6869
public Duration getDuration() {
@@ -82,7 +83,6 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResult storeRe
8283

8384
URI locationEndPoint = null;
8485
if (request.requestContext != null) {
85-
this.retryContext = new RetryContext(request.requestContext.retryContext);
8686
if (request.requestContext.locationEndpointToRoute != null) {
8787
locationEndPoint = request.requestContext.locationEndpointToRoute;
8888
}
@@ -121,11 +121,8 @@ public void recordGatewayResponse(
121121
URI locationEndPoint = null;
122122
if (rxDocumentServiceRequest != null && rxDocumentServiceRequest.requestContext != null) {
123123
locationEndPoint = rxDocumentServiceRequest.requestContext.locationEndpointToRoute;
124-
if (rxDocumentServiceRequest.requestContext.retryContext != null) {
125-
rxDocumentServiceRequest.requestContext.retryContext.retryEndTime = Instant.now();
126-
this.retryContext = new RetryContext(rxDocumentServiceRequest.requestContext.retryContext);
127-
}
128124
}
125+
this.recordRetryContextEndTime();
129126

130127
if (locationEndPoint != null) {
131128
this.regionsContacted.add(locationEndPoint);
@@ -224,11 +221,12 @@ public SerializationDiagnosticsContext getSerializationDiagnosticsContext(){
224221
return this.serializationDiagnosticsContext;
225222
}
226223

227-
public void recordRetryContext(RxDocumentServiceRequest request) {
228-
if(request.requestContext.retryContext != null) {
229-
request.requestContext.retryContext.retryEndTime = Instant.now();
230-
this.retryContext = new RetryContext(request.requestContext.retryContext);
231-
}
224+
public void recordRetryContextEndTime() {
225+
this.retryContext.updateEndTime();
226+
}
227+
228+
public RetryContext getRetryContext() {
229+
return retryContext;
232230
}
233231

234232
public static class StoreResponseStatistics {

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentClientRetryPolicy.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* While this class is public, but it is not part of our published public APIs.
77
* This is meant to be internally used only by our sdk.
88
*/
9-
public abstract class DocumentClientRetryPolicy extends RetryPolicyWithDiagnostics {
9+
public abstract class DocumentClientRetryPolicy implements IRetryPolicy {
1010

1111
// TODO: this is just a place holder for now. As .Net has this method.
1212
// I have to spend more time figure out what's the right pattern for this (if method needed)

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,8 @@ public class DocumentServiceRequestContext implements Cloneable {
3838
public volatile PartitionKeyInternal effectivePartitionKey;
3939
public volatile CosmosDiagnostics cosmosDiagnostics;
4040
public volatile String resourcePhysicalAddress;
41-
public RetryContext retryContext;
4241

4342
public DocumentServiceRequestContext() {
44-
retryContext = new RetryContext();
4543
}
4644

4745
/**
@@ -78,52 +76,6 @@ public void clearRouteToLocation() {
7876
this.usePreferredLocations = null;
7977
}
8078

81-
public void updateRetryContext(IRetryPolicy retryPolicy, boolean isGenericRetry) {
82-
if (isGenericRetry) {
83-
if (this.retryContext.directRetrySpecificStatusAndSubStatusCodes != null && this.retryContext.directRetrySpecificStatusAndSubStatusCodes.size() > 0) {
84-
for (int i = this.retryContext.directRetrySpecificStatusAndSubStatusCodes.size() - 1; i >= 0; i--) {
85-
retryPolicy.incrementRetry();
86-
retryPolicy.addStatusAndSubStatusCode(0, this.retryContext.directRetrySpecificStatusAndSubStatusCodes.get(i)[0],
87-
this.retryContext.directRetrySpecificStatusAndSubStatusCodes.get(i)[1]);
88-
}
89-
this.retryContext.directRetrySpecificStatusAndSubStatusCodes.clear();
90-
}
91-
92-
if (retryPolicy.getStatusAndSubStatusCodes() != null) {
93-
this.retryContext.genericRetrySpecificStatusAndSubStatusCodes = Collections.synchronizedList(new ArrayList<>(retryPolicy.getStatusAndSubStatusCodes()));
94-
} else {
95-
this.retryContext.genericRetrySpecificStatusAndSubStatusCodes = Collections.synchronizedList(new ArrayList<>());
96-
}
97-
this.retryContext.retryCount = retryPolicy.getRetryCount();
98-
this.retryContext.statusAndSubStatusCodes = retryPolicy.getStatusAndSubStatusCodes();
99-
if (this.retryContext.retryStartTime == null) {
100-
this.retryContext.retryStartTime = retryPolicy.getStartTime();
101-
}
102-
this.retryContext.retryEndTime = retryPolicy.getEndTime();
103-
} else {
104-
if (this.retryContext.genericRetrySpecificStatusAndSubStatusCodes != null && this.retryContext.genericRetrySpecificStatusAndSubStatusCodes.size() > 0) {
105-
for (int i = this.retryContext.genericRetrySpecificStatusAndSubStatusCodes.size() - 1; i >= 0; i--) {
106-
retryPolicy.incrementRetry();
107-
retryPolicy.addStatusAndSubStatusCode(0, this.retryContext.genericRetrySpecificStatusAndSubStatusCodes.get(i)[0],
108-
this.retryContext.genericRetrySpecificStatusAndSubStatusCodes.get(i)[1]);
109-
}
110-
this.retryContext.genericRetrySpecificStatusAndSubStatusCodes.clear();
111-
}
112-
113-
if (retryPolicy.getStatusAndSubStatusCodes() != null) {
114-
this.retryContext.directRetrySpecificStatusAndSubStatusCodes = Collections.synchronizedList(new ArrayList<>(retryPolicy.getStatusAndSubStatusCodes()));
115-
} else {
116-
this.retryContext.directRetrySpecificStatusAndSubStatusCodes = Collections.synchronizedList(new ArrayList<>());
117-
}
118-
this.retryContext.retryCount = retryPolicy.getRetryCount();
119-
this.retryContext.statusAndSubStatusCodes = retryPolicy.getStatusAndSubStatusCodes();
120-
if (this.retryContext.retryStartTime == null) {
121-
this.retryContext.retryStartTime = retryPolicy.getStartTime();
122-
}
123-
this.retryContext.retryEndTime = retryPolicy.getEndTime();
124-
}
125-
}
126-
12779
@Override
12880
public DocumentServiceRequestContext clone() {
12981
DocumentServiceRequestContext context = new DocumentServiceRequestContext();
@@ -147,7 +99,6 @@ public DocumentServiceRequestContext clone() {
14799
context.performedBackgroundAddressRefresh = this.performedBackgroundAddressRefresh;
148100
context.cosmosDiagnostics = this.cosmosDiagnostics;
149101
context.resourcePhysicalAddress = this.resourcePhysicalAddress;
150-
context.retryContext = new RetryContext(this.retryContext);
151102

152103
return context;
153104
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IRetryPolicy.java

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,12 @@
55

66
import reactor.core.publisher.Mono;
77

8-
import java.time.Duration;
9-
import java.time.Instant;
10-
import java.util.List;
11-
128
// TODO update documentation
139
/**
1410
* While this class is public, but it is not part of our published public APIs.
1511
* This is meant to be internally used only by our sdk.
1612
*/
17-
public interface IRetryPolicy {
13+
public interface IRetryPolicy {
1814
// this capture all the retry logic
1915
// TODO: design decision should this return a single or an observable?
2016

@@ -26,21 +22,5 @@ public interface IRetryPolicy {
2622
/// <returns>If the retry needs to be attempted or not</returns>
2723
Mono<ShouldRetryResult> shouldRetry(Exception e);
2824

29-
int getRetryCount();
30-
31-
void incrementRetry();
32-
33-
void captureStartTimeIfNotSet();
34-
35-
void updateEndTime();
36-
37-
Duration getRetryLatency();
38-
39-
Instant getStartTime();
40-
41-
Instant getEndTime();
42-
43-
void addStatusAndSubStatusCode(Integer index, int statusCode, int subStatusCode);
44-
45-
List<int[]> getStatusAndSubStatusCodes();
25+
RetryContext getRetryContext();
4626
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IRetryPolicyFactory.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
// Licensed under the MIT License.
33
package com.azure.cosmos.implementation;
44

5+
import com.azure.cosmos.CosmosDiagnostics;
6+
57
/**
68
* While this class is public, but it is not part of our published public APIs.
79
* This is meant to be internally used only by our sdk.
810
*/
911
public interface IRetryPolicyFactory {
1012
DocumentClientRetryPolicy getRequestPolicy();
13+
RetryContext getRetryContext();
1114
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/InvalidPartitionExceptionRetryPolicy.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ public void onBeforeSendRequest(RxDocumentServiceRequest request) {
4545
}
4646
}
4747

48+
@Override
49+
public RetryContext getRetryContext() {
50+
if (this.nextPolicy != null) {
51+
return this.nextPolicy.getRetryContext();
52+
} else {
53+
return null;
54+
}
55+
}
56+
4857
@Override
4958
public Mono<ShouldRetryResult> shouldRetry(Exception e) {
5059
CosmosException clientException = Utils.as(e, CosmosException.class);

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PartitionKeyMismatchRetryPolicy.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,13 @@ public void onBeforeSendRequest(RxDocumentServiceRequest request) {
9292
this.request = request;
9393
this.nextRetryPolicy.onBeforeSendRequest(request);
9494
}
95+
96+
@Override
97+
public RetryContext getRetryContext() {
98+
if (this.nextRetryPolicy != null) {
99+
return this.nextRetryPolicy.getRetryContext();
100+
} else {
101+
return null;
102+
}
103+
}
95104
}

0 commit comments

Comments
 (0)