Skip to content

Commit a157593

Browse files
rpanackalJonas-Isr
andauthored
docs: [DevOps] ADR logging strategy (#651)
* Apply additional review suggestion * First draft * Remove low value section and improve formatting * cleaning up * Paraphrasing * Review suggestions - adding . at the end - reqId explanation - 'size' renaming in log for clarity - MDc examplee - "object creation" with example instead of vague computaiton - remove mention of "placeholder" warnings * Apply suggestions from code review content improvment Co-authored-by: Jonas-Isr <jonas.israel@sap.com> * Include review suggestion - add formatting rules - caution MDC use on thread switch - logging framework information - clarify what "request" means * fix test * clarify MDC context propagation * remove MDC consumption example * Apply suggestion from @Jonas-Isr Co-authored-by: Jonas-Isr <jonas.israel@sap.com> --------- Co-authored-by: Jonas-Isr <jonas.israel@sap.com>
1 parent e94b1bc commit a157593

File tree

13 files changed

+205
-25
lines changed

13 files changed

+205
-25
lines changed

core/src/main/java/com/sap/ai/sdk/core/AiCoreServiceKeyAccessor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public List<ServiceBinding> getServiceBindings() throws ServiceBindingAccessExce
5252
throw new ServiceBindingAccessException("Failed to load service key from environment", e);
5353
}
5454
if (serviceKey == null) {
55-
log.debug("No service key found in environment variable {}", ENV_VAR_KEY);
55+
log.debug("No service key found in environment variable {}.", ENV_VAR_KEY);
5656
return List.of();
5757
}
5858
final String logMessage =

core/src/main/java/com/sap/ai/sdk/core/DeploymentResolver.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void reloadDeployments(@Nonnull final String resourceGroup) {
4646
try {
4747
val apiClient = new DeploymentApi(service);
4848
val deployments = new HashSet<>(apiClient.query(resourceGroup).getResources());
49-
log.info("Found {} deployments in resource group '{}'", deployments.size(), resourceGroup);
49+
log.info("Found {} deployments in resource group '{}'.", deployments.size(), resourceGroup);
5050
cache.put(resourceGroup, deployments);
5151
} catch (final RuntimeException e) {
5252
throw new DeploymentResolutionException(

core/src/main/java/com/sap/ai/sdk/core/common/ClientResponseHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ private T parseSuccess(@Nonnull final ClassicHttpResponse response) throws E {
9191
try {
9292
return objectMapper.readValue(content, successType);
9393
} catch (final JsonProcessingException e) {
94-
log.error("Failed to parse response to type {}", successType);
94+
log.error("Failed to parse response to type {}.", successType);
9595
throw exceptionFactory.build("Failed to parse response", e).setHttpResponse(response);
9696
}
9797
}

core/src/main/java/com/sap/ai/sdk/core/common/ClientStreamingHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public Stream<D> handleStreamingResponse(@Nonnull final ClassicHttpResponse resp
8585
}
8686
return objectMapper.treeToValue(jsonNode, successType);
8787
} catch (final IOException e) {
88-
log.error("Failed to parse delta chunk to type {}", successType);
88+
log.error("Failed to parse delta chunk to type {}.", successType);
8989
final String message = "Failed to parse delta chunk";
9090
throw exceptionFactory.build(message, e).setHttpResponse(response);
9191
}

core/src/main/java/com/sap/ai/sdk/core/common/IterableStreamConverter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ class IterableStreamConverter<T> implements Iterator<T> {
3535
static final int BUFFER_SIZE = 8192;
3636

3737
private static final String ERR_CONTENT = "Failed to read response content.";
38-
private static final String ERR_INTERRUPTED = "Parsing response content was interrupted";
39-
private static final String ERR_CLOSE = "Could not close input stream with error: {} (ignored)";
38+
private static final String ERR_INTERRUPTED = "Parsing response content was interrupted.";
39+
private static final String ERR_CLOSE = "Could not close input stream with error: {} (ignored).";
4040

4141
/** Read next entry for Stream or {@code null} when no further entry can be read. */
4242
private final Callable<T> readHandler;
@@ -70,7 +70,7 @@ public boolean hasNext() {
7070
} catch (final Exception e) {
7171
isDone = true;
7272
stopHandler.run();
73-
log.debug("Reading next element failed with error {})", e.getClass().getSimpleName());
73+
log.debug("Reading next element failed with error {}.", e.getClass().getSimpleName());
7474
throw errorHandler.apply(e);
7575
}
7676
return !isDone;

core/src/main/java/com/sap/ai/sdk/core/common/RequestLogContext.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
@Beta
2424
public class RequestLogContext {
2525

26-
private static void setRequestId(@Nonnull final String requestId) {
27-
MDC.put(MdcKeys.REQUEST_ID, requestId);
26+
private static void setCallId(@Nonnull final String callId) {
27+
MDC.put(MdcKeys.CALL_ID, callId);
2828
}
2929

3030
/**
@@ -65,7 +65,7 @@ public static void setService(@Nonnull final Service service) {
6565

6666
/** Clear all MDC request context information. */
6767
public static void clear() {
68-
MDC.remove(MdcKeys.REQUEST_ID);
68+
MDC.remove(MdcKeys.CALL_ID);
6969
MDC.remove(MdcKeys.ENDPOINT);
7070
MDC.remove(MdcKeys.DESTINATION);
7171
MDC.remove(MdcKeys.MODE);
@@ -74,13 +74,13 @@ public static void clear() {
7474

7575
/** Log the start of a request with generated request ID. */
7676
public static void logRequestStart() {
77-
val reqId = UUID.randomUUID().toString().substring(0, 8);
78-
RequestLogContext.setRequestId(reqId);
77+
val callId = UUID.randomUUID().toString().substring(0, 8);
78+
RequestLogContext.setCallId(callId);
7979

80-
val message = "[reqId={}] Starting {} {} request to {}, destination={}";
80+
val message = "[callId={}] Starting {} {} request to {}, destination={}.";
8181
log.debug(
8282
message,
83-
reqId,
83+
callId,
8484
MDC.get(MdcKeys.SERVICE),
8585
MDC.get(MdcKeys.MODE),
8686
MDC.get(MdcKeys.ENDPOINT),
@@ -105,13 +105,13 @@ public static void logResponseSuccess(@Nonnull final ClassicHttpResponse respons
105105
.filter(length -> length >= 0)
106106
.map(length -> "%.1fKB".formatted(length / 1024.0))
107107
.orElse("unknown");
108-
val message = "[reqId={}] {} request completed successfully with duration={}, size={}.";
109-
log.debug(message, MDC.get(MdcKeys.REQUEST_ID), MDC.get(MdcKeys.SERVICE), duration, sizeInfo);
108+
val message = "[callId={}] {} request completed successfully with duration={}, size={}.";
109+
log.debug(message, MDC.get(MdcKeys.CALL_ID), MDC.get(MdcKeys.SERVICE), duration, sizeInfo);
110110
}
111111

112112
@UtilityClass
113113
private static class MdcKeys {
114-
private static final String REQUEST_ID = "reqId";
114+
private static final String CALL_ID = "callId";
115115
private static final String ENDPOINT = "endpoint";
116116
private static final String DESTINATION = "destination";
117117
private static final String MODE = "mode";

core/src/test/java/com/sap/ai/sdk/core/common/IterableStreamConverterTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ void testLinesThrows() {
106106
final var sut = IterableStreamConverter.lines(response, new TestClientExceptionFactory());
107107
assertThatThrownBy(sut::count)
108108
.isInstanceOf(TestClientException.class)
109-
.hasMessage("Parsing response content was interrupted")
109+
.hasMessage("Parsing response content was interrupted.")
110110
.cause()
111111
.isInstanceOf(IOException.class)
112112
.hasMessage("Ups!");

core/src/test/java/com/sap/ai/sdk/core/common/RequestLogContextTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ void testRequestLogContextLifecycle() {
2525
assertThat(MDC.get("destination")).isEqualTo("http://localhost:8000");
2626

2727
RequestLogContext.logRequestStart();
28-
assertThat(MDC.get("reqId")).isNotNull().hasSize(8);
28+
assertThat(MDC.get("callId")).isNotNull().hasSize(8);
2929

3030
RequestLogContext.clear();
3131
assertThat(MDC.get("service")).isNull();
3232
assertThat(MDC.get("mode")).isNull();
3333
assertThat(MDC.get("endpoint")).isNull();
3434
assertThat(MDC.get("destination")).isNull();
35-
assertThat(MDC.get("reqId")).isNull();
35+
assertThat(MDC.get("callId")).isNull();
3636
assertThat(MDC.get("consumer-key")).isEqualTo("consumer-value");
3737
}
3838
}

docs/adrs/006-logging-strategy.md

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# Logging Strategy
2+
3+
## Status
4+
5+
Accepted
6+
7+
## Context
8+
9+
The AI SDK identified some challenges with debugging and problem resolution that can be addressed with better logging.
10+
Typically, users have to enable wire logs for troubleshooting which, along with helpful logs, surfaces a large volume of unrelated noise.
11+
Additionally, we identified the need to improve visibility into the application progress for the user.
12+
13+
Key drivers for this decision include:
14+
15+
- **Debugging difficulties**: Limited visibility into request flows.
16+
- **Security risks**: Accidental logging of sensitive data.
17+
- **Poor user experience**: Lack of insight into application progress.
18+
- **Troubleshooting dependency**: Reliance on external parties for critical details.
19+
20+
## Decision
21+
22+
We will implement and enforce comprehensive logging guidelines that prioritize **debugging capability** and **user visibility**.
23+
This approach mandates descriptive, human-readable logs with structured request tracking through Mapped Diagnostic Context (MDC).
24+
The SDK uses SLF4J API for all logging statements.
25+
26+
## Guidelines
27+
28+
### 1. Log Content and Security
29+
30+
* **Do not log sensitive information.**
31+
Never log full request or response bodies.
32+
Ensure that personally identifiable or confidential data — such as names, IDs, tokens, or payload content — is always excluded from logs.
33+
34+
* **Write concise and relevant logs.**
35+
Every log must convey meaningful information.
36+
Avoid verbose, repetitive, or purely cosmetic details.
37+
38+
* **Use descriptive, human-readable and standard formats.**
39+
- Logs must be clear enough for a developer to understand what happened without checking the code.
40+
- Start log messages with a capital letter (exceptions include case-sensitive identifiers) and end with a period (`.`).
41+
- Avoid newlines (`\n`) and emojis within log messages to prevent parsing and encoding concerns.
42+
- Use the `metric=value` pattern to include structured details with extensibility in mind.
43+
44+
```
45+
[callId=e3eaa45c] OpenAI request completed successfully with duration=1628ms, responseSize=1.2KB.
46+
```
47+
48+
* **Correlate logs.**
49+
Include a call identifier (e.g., `callId`) in outgoing per-request logs to assist with correlation and debugging.
50+
Throughout this document, the term 'request' refers to a single SDK operation that calls an AI service (e.g., `OrchestrationClient.chatCompletion()`, `OpenAiClient.embed()`), distinct from HTTP requests to the user's application.
51+
52+
* **Exception logging.**
53+
When logging exceptions, use standard logging methods (e.g., `log.debug("Connection lost.", exception)`) rather than serializing exception objects.
54+
Exception objects may contain custom fields with sensitive data that could be exposed through JSON serialization or custom `toString()` implementations.
55+
56+
* **Logging framework**
57+
Write all logging statements against the SLF4J API.
58+
The AI SDK relies on `logback-classic` as the provided runtime implementation.
59+
60+
---
61+
62+
### 2. Log Levels and Scope
63+
64+
* **Per-request logs.**
65+
Keep per-request logs **below INFO level** (e.g., DEBUG or TRACE) to prevent cluttering normal application output.
66+
67+
* **Application runtime logs.**
68+
Prefer **INFO level** only for one-time or startup/shutdown logs that occur once per application run.
69+
70+
* **Avoid unnecessary warnings.**
71+
Use the WARN level only for actionable or genuinely concerning conditions.
72+
Expected transient states (retries, fallbacks, cache misses) should not generate a warning.
73+
74+
* **Explicit request logging.**
75+
Always log at **request start** to provide immediate visibility that an operation has begun.
76+
This helps users understand that their request is being processed even before a result is available.
77+
Do not rely solely on response-time logging — requests may fail, hang, or take long durations.
78+
This approach also avoids the need for stack-trace investigation when surface error responses are ambiguous.
79+
80+
```
81+
[callId=e3eaa45c] Starting OpenAI synchronous request to /v1/chat/completions, destination=<some-uri>.
82+
```
83+
84+
* **Performance-aware logging.**
85+
If a log statement requires computation (such as object creation or method invocation), guard it with a log-level check to prevent performance degradation.
86+
87+
``` java
88+
Optional<Destination> maybeDestination;
89+
Destination fallbackDestination = /* existing instance */;
90+
91+
// Bad: Creates objects or invokes methods even when logging is disabled
92+
log.debug("Destination: {}", maybeDestination.toString());
93+
log.debug("Destination: {}", maybeDestination.orElseGet(() -> new Destination()));
94+
log.debug("Destination: {}", maybeDestination.orElseGet(this::getFallback));
95+
log.debug("Destination: {}", maybeDestination.orElse(getFallback()));
96+
97+
// Good: No additional computation
98+
log.debug("Destination: {}", maybeDestination);
99+
log.debug("Destination: {}", maybeDestination.orElse(fallbackDestination));
100+
101+
// Good: Guard object creation
102+
if (log.isDebugEnabled()) {
103+
log.debug("Destination: {}", maybeDestination.orElse(getFallback()));
104+
}
105+
106+
// Exception: Singletons require no guarding (no object creation)
107+
Optional<List<Destination>> maybeDestinations;
108+
log.debug("Destinations: {}", maybeDestinations.orElse(Collections.emptyList()));
109+
```
110+
111+
---
112+
113+
### 3. MDC (Mapped Diagnostic Context)
114+
115+
* **Purpose and usage.**
116+
MDC is used to carry contextual information (e.g., `callId`, `endpoint`, `service`) across execution blocks within the same thread.
117+
118+
* **Setting and clearing context.**
119+
Set MDC values deliberately and close to their scope of relevance.
120+
Per-request MDC context must be cleared appropriately when the response completes.
121+
Avoid setting per-request values in long-lived objects that outlive the request lifecycle, as this can result in corrupted or incomplete log context.
122+
123+
* **Centralized MDC management.**
124+
Avoid using magic strings for MDC keys or values.
125+
Define them in a dedicated structure or utility (e.g., `RequestLogContext` class) to ensure discoverability and prevent errors during refactoring.
126+
```java
127+
// Bad: Magic strings scattered in code
128+
MDC.put("service", "OpenAI");
129+
log.debug("Service {}", MDC.get("service"));
130+
131+
// Good: Centralized in utility class
132+
RequestLogContext.setService(Service.OPENAI);
133+
log.debug("Service {}", RequestLogContext.get(MdcKeys.SERVICE));
134+
```
135+
136+
* **Granular clearing only.**
137+
Never clear the entire MDC context, as this will remove entries set by the application developer or other libraries.
138+
Instead, remove entries key-by-key to preserve unrelated context items that may remain valid for longer periods.
139+
```java
140+
// Bad: Risk clearing useful context entries from other components
141+
MDC.clear();
142+
143+
// Good: Clear only your own entries, centralized in utility class
144+
class RequestLogContext {
145+
//...
146+
static void clear(){
147+
MDC.remove(MdcKeys.CALL_ID);
148+
MDC.remove(MdcKeys.SERVICE);
149+
}
150+
}
151+
```
152+
153+
* **Responsibility and ownership.**
154+
The component or class that sets MDC context values is also responsible for clearing them.
155+
This maintains clarity and ensures proper lifecycle management.
156+
157+
* **Safe consumption.**
158+
Since MDC uses `ThreadLocal` storage, any new thread (created implicitly or explicitly) will not have access to the parent thread's MDC context.
159+
160+
```java
161+
// Thread A
162+
RequestLogContext.setCallId("abc123");
163+
log.debug("[callId={}] Starting request", RequestLogContext.get(MdcKeys.CALL_ID));
164+
165+
// Problem: Async callback runs in Thread B without original MDC context
166+
client.executeAsync(() -> {
167+
// Thread B: RequestLogContext.get(MdcKeys.CALL_ID) returns null
168+
log.debug("[callId={}] Processing", RequestLogContext.get(MdcKeys.CALL_ID));
169+
});
170+
```
171+
Be vigilant about thread switches through async operations and resilience patterns, as you may have to manually propagate MDC context to maintain logging continuity.
172+
Refer to the following resource for more information on [MDC handling with thread pools](https://stackoverflow.com/questions/6073019/how-to-use-mdc-with-thread-pools).
173+
174+
---
175+
176+
### 4. Logging Boundaries and Generation
177+
178+
* **Deliberate logging boundaries.**
179+
Generated code (such as those in modules like *document-grounding* or *prompt-registry*) should log minimally or preferably be avoided entirely.
180+
Logging should be centralized in higher-level components to maintain consistency and reduce noise.

foundation-models/openai/src/main/java/com/sap/ai/sdk/foundationmodels/openai/OpenAiChatCompletionResponse.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public List<OpenAiToolMessage> executeTools() {
116116
final var tools = Optional.ofNullable(originalRequest.getToolsExecutable()).orElseGet(List::of);
117117
if (log.isDebugEnabled() && !tools.isEmpty()) {
118118
final var toolNames = tools.stream().map(OpenAiTool::getName).toList();
119-
log.debug("Executing {} tool call(s) - {}", toolNames.size(), toolNames);
119+
log.debug("Executing {} tool call(s) - {}.", toolNames.size(), toolNames);
120120
}
121121
return OpenAiTool.execute(tools, getMessage());
122122
}

0 commit comments

Comments
 (0)