Skip to content

Commit 957740d

Browse files
Improved diagnostics with new models for StoreResponse, StoreResult and CosmosException (Azure#28620)
* Improved diagnostics with new models for StoreResponse, StoreResult and CosmosException * Fixed spot bugs related to storeResult.getException() * Updated query plan cache to ConcurrentHashMap with fixed size of 1000 to start with * Added exception response headers and message to direct and gateway errors. Also added code for throwing any java.lang.Error * Added unit tests for StoreReader and ConsistencyWriter * Disabled StoreReader unit test for error since it is causing other tests to fail. Will investigate later * Commented out the broken test * Reverted StoreReaderTest * Removed mockito-inline * Fixed StoreReaderTest static mocking * Fixed ConsistencyWriterTest static mocking * Code review comments and changelog addition * Fixed some test cases
1 parent 60cf956 commit 957740d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+725
-489
lines changed

sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ protected void performWorkload(BaseSubscriber<Document> baseSubscriber, long i)
153153
} catch (Throwable error) {
154154
concurrencyControlSemaphore.release();
155155
logger.error("subscription failed due to ", error);
156+
if (error instanceof Error) {
157+
throw (Error) error;
158+
}
156159
}
157160
}
158161

sdk/cosmos/azure-cosmos/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77
#### Breaking Changes
88

99
#### Bugs Fixed
10+
* Fixed bubbling of Errors in case of any `java.lang.Error` - See [PR 28620](https://github.com/Azure/azure-sdk-for-java/pull/28620)
1011

1112
#### Other Changes
13+
* Added `exceptionMessage` and `exceptionResponseHeaders` to `CosmosDiagnostics` in case of any exceptions - See [PR 28620](https://github.com/Azure/azure-sdk-for-java/pull/28620)
14+
* Improved performance of `query plan` cache by using `ConcurrentHashMap` with a fixed size of 1000 - See [PR 28537](https://github.com/Azure/azure-sdk-for-java/pull/28537)
1215
* Changed 429 (Throttling) retry policy to have an upper bound for the back-off time of 5 seconds - See [PR 28764](https://github.com/Azure/azure-sdk-for-java/pull/28764)
1316

1417
### 4.29.1 (2022-04-27)

sdk/cosmos/azure-cosmos/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,12 @@ Licensed under the MIT License.
241241
<version>4.0.0</version> <!-- {x-version-update;org.mockito:mockito-core;external_dependency} -->
242242
<scope>test</scope>
243243
</dependency>
244+
<dependency>
245+
<groupId>org.mockito</groupId>
246+
<artifactId>mockito-inline</artifactId>
247+
<version>4.0.0</version> <!-- {x-version-update;org.mockito:mockito-inline;external_dependency} -->
248+
<scope>test</scope>
249+
</dependency>
244250

245251
</dependencies>
246252

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/BridgeInternal.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@
3030
import com.azure.cosmos.implementation.TracerProvider;
3131
import com.azure.cosmos.implementation.Warning;
3232
import com.azure.cosmos.implementation.directconnectivity.StoreResponse;
33+
import com.azure.cosmos.implementation.directconnectivity.StoreResponseDiagnostics;
3334
import com.azure.cosmos.implementation.directconnectivity.StoreResult;
35+
import com.azure.cosmos.implementation.directconnectivity.StoreResultDiagnostics;
3436
import com.azure.cosmos.implementation.directconnectivity.Uri;
3537
import com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdChannelAcquisitionTimeline;
3638
import com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdEndpointStatistics;
@@ -561,18 +563,13 @@ public static ClientSideRequestStatistics getClientSideRequestStatics(CosmosDiag
561563
return clientSideRequestStatistics;
562564
}
563565

564-
@Warning(value = INTERNAL_USE_ONLY_WARNING)
565-
public static void setGatewayRequestTimelineOnDiagnostics(CosmosDiagnostics cosmosDiagnostics,
566-
RequestTimeline requestTimeline) {
567-
cosmosDiagnostics.clientSideRequestStatistics().setGatewayRequestTimeline(requestTimeline);
568-
}
569-
570566
@Warning(value = INTERNAL_USE_ONLY_WARNING)
571567
public static void recordResponse(CosmosDiagnostics cosmosDiagnostics,
572568
RxDocumentServiceRequest request,
573569
StoreResult storeResult,
574570
GlobalEndpointManager globalEndpointManager) {
575-
cosmosDiagnostics.clientSideRequestStatistics().recordResponse(request, storeResult, globalEndpointManager);
571+
StoreResultDiagnostics storeResultDiagnostics = StoreResultDiagnostics.createStoreResultDiagnostics(storeResult);
572+
cosmosDiagnostics.clientSideRequestStatistics().recordResponse(request, storeResultDiagnostics, globalEndpointManager);
576573
}
577574

578575
@Warning(value = INTERNAL_USE_ONLY_WARNING)
@@ -602,9 +599,19 @@ public static SerializationDiagnosticsContext getSerializationDiagnosticsContext
602599
public static void recordGatewayResponse(CosmosDiagnostics cosmosDiagnostics,
603600
RxDocumentServiceRequest rxDocumentServiceRequest,
604601
StoreResponse storeResponse,
605-
CosmosException exception,
606602
GlobalEndpointManager globalEndpointManager) {
607-
cosmosDiagnostics.clientSideRequestStatistics().recordGatewayResponse(rxDocumentServiceRequest, storeResponse, exception, globalEndpointManager);
603+
StoreResponseDiagnostics storeResponseDiagnostics = StoreResponseDiagnostics.createStoreResponseDiagnostics(storeResponse);
604+
cosmosDiagnostics.clientSideRequestStatistics().recordGatewayResponse(rxDocumentServiceRequest, storeResponseDiagnostics, globalEndpointManager);
605+
}
606+
607+
@Warning(value = INTERNAL_USE_ONLY_WARNING)
608+
public static void recordGatewayResponse(CosmosDiagnostics cosmosDiagnostics,
609+
RxDocumentServiceRequest rxDocumentServiceRequest,
610+
CosmosException cosmosException,
611+
GlobalEndpointManager globalEndpointManager) {
612+
StoreResponseDiagnostics storeResponseDiagnostics = StoreResponseDiagnostics.createStoreResponseDiagnostics(cosmosException);
613+
cosmosDiagnostics.clientSideRequestStatistics().recordGatewayResponse(rxDocumentServiceRequest, storeResponseDiagnostics, globalEndpointManager);
614+
cosmosException.setDiagnostics(cosmosDiagnostics);
608615
}
609616

610617
@Warning(value = INTERNAL_USE_ONLY_WARNING)

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosException.java

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ protected CosmosException(int statusCode, String message, Map<String, String> re
154154
this.statusCode = statusCode;
155155
this.responseHeaders = new ConcurrentHashMap<>();
156156

157+
// Since ConcurrentHashMap only takes non-null entries, so filtering them before putting them in.
157158
if (responseHeaders != null) {
158159
for (Map.Entry<String, String> entry: responseHeaders.entrySet()) {
159160
if (entry.getKey() != null && entry.getValue() != null) {
@@ -553,31 +554,6 @@ void setRntbdPendingRequestQueueSize(int rntbdPendingRequestQueueSize) {
553554
public CosmosException createCosmosException(int statusCode, Exception innerException) {
554555
return new CosmosException(statusCode, innerException);
555556
}
556-
557-
@Override
558-
public CosmosException createSerializableCosmosException(CosmosException cosmosException) {
559-
if (cosmosException == null) {
560-
return null;
561-
}
562-
CosmosException exception = new CosmosException(cosmosException.statusCode,
563-
cosmosException.cosmosError, cosmosException.getResponseHeaders());
564-
exception.requestTimeline = cosmosException.requestTimeline;
565-
exception.channelAcquisitionTimeline = cosmosException.channelAcquisitionTimeline;
566-
exception.rntbdChannelTaskQueueSize = cosmosException.rntbdChannelTaskQueueSize;
567-
exception.rntbdEndpointStatistics = cosmosException.rntbdEndpointStatistics;
568-
exception.lsn = cosmosException.lsn;
569-
exception.partitionKeyRangeId = cosmosException.partitionKeyRangeId;
570-
exception.requestUri = cosmosException.requestUri;
571-
exception.resourceAddress = cosmosException.resourceAddress;
572-
exception.requestPayloadLength = cosmosException.requestPayloadLength;
573-
exception.rntbdPendingRequestQueueSize = cosmosException.rntbdPendingRequestQueueSize;
574-
exception.rntbdRequestLength = cosmosException.rntbdRequestLength;
575-
exception.rntbdResponseLength = cosmosException.rntbdResponseLength;
576-
exception.sendingRequestHasStarted = cosmosException.sendingRequestHasStarted;
577-
exception.requestHeaders = null;
578-
exception.cosmosDiagnostics = null;
579-
return exception;
580-
}
581557
});
582558
}
583559
}

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java

Lines changed: 36 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,10 @@
22
// Licensed under the MIT License.
33
package com.azure.cosmos.implementation;
44

5-
import com.azure.cosmos.CosmosException;
65
import com.azure.cosmos.implementation.apachecommons.lang.StringUtils;
76
import com.azure.cosmos.implementation.cpu.CpuMemoryMonitor;
8-
import com.azure.cosmos.implementation.directconnectivity.DirectBridgeInternal;
9-
import com.azure.cosmos.implementation.directconnectivity.StoreResponse;
10-
import com.azure.cosmos.implementation.directconnectivity.StoreResult;
7+
import com.azure.cosmos.implementation.directconnectivity.StoreResponseDiagnostics;
8+
import com.azure.cosmos.implementation.directconnectivity.StoreResultDiagnostics;
119
import com.fasterxml.jackson.core.JsonGenerator;
1210
import com.fasterxml.jackson.databind.SerializerProvider;
1311
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
@@ -43,7 +41,6 @@ public class ClientSideRequestStatistics {
4341
private Set<URI> locationEndpointsContacted;
4442
private RetryContext retryContext;
4543
private GatewayStatistics gatewayStatistics;
46-
private RequestTimeline gatewayRequestTimeline;
4744
private MetadataDiagnosticsContext metadataDiagnosticsContext;
4845
private SerializationDiagnosticsContext serializationDiagnosticsContext;
4946

@@ -93,13 +90,13 @@ public DiagnosticsClientContext.DiagnosticsClientConfig getDiagnosticsClientConf
9390
return diagnosticsClientConfig;
9491
}
9592

96-
public void recordResponse(RxDocumentServiceRequest request, StoreResult storeResult, GlobalEndpointManager globalEndpointManager) {
93+
public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnostics storeResultDiagnostics, GlobalEndpointManager globalEndpointManager) {
9794
Objects.requireNonNull(request, "request is required and cannot be null.");
9895
Instant responseTime = Instant.now();
9996

10097
StoreResponseStatistics storeResponseStatistics = new StoreResponseStatistics();
10198
storeResponseStatistics.requestResponseTimeUTC = responseTime;
102-
storeResponseStatistics.storeResult = StoreResult.createSerializableStoreResult(storeResult);
99+
storeResponseStatistics.storeResult = storeResultDiagnostics;
103100
storeResponseStatistics.requestOperationType = request.getOperationType();
104101
storeResponseStatistics.requestResourceType = request.getResourceType();
105102
activityId = request.getActivityId().toString();
@@ -117,8 +114,7 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResult storeRe
117114
this.requestEndTimeUTC = responseTime;
118115
}
119116

120-
// TODO (kuthapar): globalEndpointManager != null check is just for safety for hotfix. Remove it after further investigation
121-
if (locationEndPoint != null && globalEndpointManager != null) {
117+
if (locationEndPoint != null) {
122118
this.regionsContacted.add(globalEndpointManager.getRegionName(locationEndPoint, request.getOperationType()));
123119
this.locationEndpointsContacted.add(locationEndPoint);
124120
}
@@ -133,8 +129,7 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResult storeRe
133129
}
134130

135131
public void recordGatewayResponse(
136-
RxDocumentServiceRequest rxDocumentServiceRequest, StoreResponse storeResponse,
137-
CosmosException exception, GlobalEndpointManager globalEndpointManager) {
132+
RxDocumentServiceRequest rxDocumentServiceRequest, StoreResponseDiagnostics storeResponseDiagnostics, GlobalEndpointManager globalEndpointManager) {
138133
Instant responseTime = Instant.now();
139134

140135
synchronized (this) {
@@ -148,8 +143,7 @@ public void recordGatewayResponse(
148143
}
149144
this.recordRetryContextEndTime();
150145

151-
// TODO (kuthapar): globalEndpointManager != null check is just for safety for hotfix. Remove it after further investigation
152-
if (locationEndPoint != null && globalEndpointManager != null) {
146+
if (locationEndPoint != null) {
153147
this.regionsContacted.add(globalEndpointManager.getRegionName(locationEndPoint, rxDocumentServiceRequest.getOperationType()));
154148
this.locationEndpointsContacted.add(locationEndPoint);
155149
}
@@ -159,34 +153,18 @@ public void recordGatewayResponse(
159153
this.gatewayStatistics.operationType = rxDocumentServiceRequest.getOperationType();
160154
this.gatewayStatistics.resourceType = rxDocumentServiceRequest.getResourceType();
161155
}
162-
if (storeResponse != null) {
163-
this.gatewayStatistics.statusCode = storeResponse.getStatus();
164-
this.gatewayStatistics.subStatusCode = DirectBridgeInternal.getSubStatusCode(storeResponse);
165-
this.gatewayStatistics.sessionToken = storeResponse
166-
.getHeaderValue(HttpConstants.HttpHeaders.SESSION_TOKEN);
167-
this.gatewayStatistics.requestCharge = storeResponse
168-
.getHeaderValue(HttpConstants.HttpHeaders.REQUEST_CHARGE);
169-
this.gatewayStatistics.requestTimeline = DirectBridgeInternal.getRequestTimeline(storeResponse);
170-
this.gatewayStatistics.partitionKeyRangeId = storeResponse.getPartitionKeyRangeId();
171-
this.activityId= storeResponse.getHeaderValue(HttpConstants.HttpHeaders.ACTIVITY_ID);
172-
} else if (exception != null) {
173-
this.gatewayStatistics.statusCode = exception.getStatusCode();
174-
this.gatewayStatistics.subStatusCode = exception.getSubStatusCode();
175-
this.gatewayStatistics.requestTimeline = this.gatewayRequestTimeline;
176-
this.gatewayStatistics.requestCharge= String.valueOf(exception.getRequestCharge());
177-
this.activityId=exception.getActivityId();
178-
}
156+
this.gatewayStatistics.statusCode = storeResponseDiagnostics.getStatusCode();
157+
this.gatewayStatistics.subStatusCode = storeResponseDiagnostics.getSubStatusCode();
158+
this.gatewayStatistics.sessionToken = storeResponseDiagnostics.getSessionTokenAsString();
159+
this.gatewayStatistics.requestCharge = storeResponseDiagnostics.getRequestCharge();
160+
this.gatewayStatistics.requestTimeline = storeResponseDiagnostics.getRequestTimeline();
161+
this.gatewayStatistics.partitionKeyRangeId = storeResponseDiagnostics.getPartitionKeyRangeId();
162+
this.gatewayStatistics.exceptionMessage = storeResponseDiagnostics.getExceptionMessage();
163+
this.gatewayStatistics.exceptionResponseHeaders = storeResponseDiagnostics.getExceptionResponseHeaders();
164+
this.activityId = storeResponseDiagnostics.getActivityId();
179165
}
180166
}
181167

182-
public void setGatewayRequestTimeline(RequestTimeline transportRequestTimeline) {
183-
this.gatewayRequestTimeline = transportRequestTimeline;
184-
}
185-
186-
public RequestTimeline getGatewayRequestTimeline() {
187-
return this.gatewayRequestTimeline;
188-
}
189-
190168
public String recordAddressResolutionStart(
191169
URI targetEndpoint,
192170
boolean forceRefresh,
@@ -209,7 +187,7 @@ public String recordAddressResolutionStart(
209187
return identifier;
210188
}
211189

212-
public void recordAddressResolutionEnd(String identifier, String errorMessage) {
190+
public void recordAddressResolutionEnd(String identifier, String exceptionMessage) {
213191
if (StringUtils.isEmpty(identifier)) {
214192
return;
215193
}
@@ -227,7 +205,7 @@ public void recordAddressResolutionEnd(String identifier, String errorMessage) {
227205

228206
AddressResolutionStatistics resolutionStatistics = this.addressResolutionStatistics.get(identifier);
229207
resolutionStatistics.endTimeUTC = responseTime;
230-
resolutionStatistics.errorMessage = errorMessage;
208+
resolutionStatistics.exceptionMessage = exceptionMessage;
231209
resolutionStatistics.inflightRequest = false;
232210
}
233211
}
@@ -297,16 +275,16 @@ public GatewayStatistics getGatewayStatistics() {
297275
}
298276

299277
public static class StoreResponseStatistics {
300-
@JsonSerialize(using = StoreResult.StoreResultSerializer.class)
301-
private StoreResult storeResult;
278+
@JsonSerialize(using = StoreResultDiagnostics.StoreResultDiagnosticsSerializer.class)
279+
private StoreResultDiagnostics storeResult;
302280
@JsonSerialize(using = DiagnosticsInstantSerializer.class)
303281
private Instant requestResponseTimeUTC;
304282
@JsonSerialize
305283
private ResourceType requestResourceType;
306284
@JsonSerialize
307285
private OperationType requestOperationType;
308286

309-
public StoreResult getStoreResult() {
287+
public StoreResultDiagnostics getStoreResult() {
310288
return storeResult;
311289
}
312290

@@ -409,7 +387,7 @@ public static class AddressResolutionStatistics {
409387
@JsonSerialize
410388
private String targetEndpoint;
411389
@JsonSerialize
412-
private String errorMessage;
390+
private String exceptionMessage;
413391
@JsonSerialize
414392
private boolean forceRefresh;
415393
@JsonSerialize
@@ -433,8 +411,8 @@ public String getTargetEndpoint() {
433411
return targetEndpoint;
434412
}
435413

436-
public String getErrorMessage() {
437-
return errorMessage;
414+
public String getExceptionMessage() {
415+
return exceptionMessage;
438416
}
439417

440418
public boolean isInflightRequest() {
@@ -456,9 +434,11 @@ public static class GatewayStatistics {
456434
private ResourceType resourceType;
457435
private int statusCode;
458436
private int subStatusCode;
459-
private String requestCharge;
437+
private double requestCharge;
460438
private RequestTimeline requestTimeline;
461439
private String partitionKeyRangeId;
440+
private String exceptionMessage;
441+
private String exceptionResponseHeaders;
462442

463443
public String getSessionToken() {
464444
return sessionToken;
@@ -476,7 +456,7 @@ public int getSubStatusCode() {
476456
return subStatusCode;
477457
}
478458

479-
public String getRequestCharge() {
459+
public double getRequestCharge() {
480460
return requestCharge;
481461
}
482462

@@ -491,6 +471,14 @@ public ResourceType getResourceType() {
491471
public String getPartitionKeyRangeId() {
492472
return partitionKeyRangeId;
493473
}
474+
475+
public String getExceptionMessage() {
476+
return exceptionMessage;
477+
}
478+
479+
public String getExceptionResponseHeaders() {
480+
return exceptionResponseHeaders;
481+
}
494482
}
495483

496484
public static SystemInformation fetchSystemInformation() {

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Exceptions.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ public static boolean isSubStatusCode(CosmosException e, int subStatus) {
1818
return subStatus == e.getSubStatusCode();
1919
}
2020

21+
public static boolean isGone(CosmosException e) {
22+
return isStatusCode(e, HttpConstants.StatusCodes.GONE);
23+
}
24+
25+
public static boolean isNotFound(CosmosException e) {
26+
return isStatusCode(e, HttpConstants.StatusCodes.NOTFOUND);
27+
}
28+
2129
public static boolean isPartitionSplit(CosmosException e) {
2230
return isStatusCode(e, HttpConstants.StatusCodes.GONE)
2331
&& isSubStatusCode(e, HttpConstants.SubStatusCodes.PARTITION_KEY_RANGE_GONE);

sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,6 @@ public static void setCosmosExceptionAccessor(final CosmosExceptionAccessor newA
855855

856856
public interface CosmosExceptionAccessor {
857857
CosmosException createCosmosException(int statusCode, Exception innerException);
858-
CosmosException createSerializableCosmosException(CosmosException cosmosException);
859858
}
860859
}
861860

0 commit comments

Comments
 (0)