docs: [DevOps] ADR logging strategy (#651)

rpanackal · Jonas-Isr · web-flow · commit a1575930929c · 2025-11-24T17:28:21.000+01:00
* Apply additional review suggestion * First draft * Remove low value section and improve formatting * cleaning up * Paraphrasing * Review suggestions - adding . at the end - reqId explanation - 'size' renaming in log for clarity - MDc examplee - "object creation" with example instead of vague computaiton - remove mention of "placeholder" warnings * Apply suggestions from code review content improvment Co-authored-by: Jonas-Isr <jonas.israel@sap.com> * Include review suggestion - add formatting rules - caution MDC use on thread switch - logging framework information - clarify what "request" means * fix test * clarify MDC context propagation * remove MDC consumption example * Apply suggestion from @Jonas-Isr Co-authored-by: Jonas-Isr <jonas.israel@sap.com> --------- Co-authored-by: Jonas-Isr <jonas.israel@sap.com>
diff --git a/core/src/main/java/com/sap/ai/sdk/core/AiCoreServiceKeyAccessor.java b/core/src/main/java/com/sap/ai/sdk/core/AiCoreServiceKeyAccessor.java
@@ -52,7 +52,7 @@ public List<ServiceBinding> getServiceBindings() throws ServiceBindingAccessExce
       throw new ServiceBindingAccessException("Failed to load service key from environment", e);
     }
     if (serviceKey == null) {
-      log.debug("No service key found in environment variable {}", ENV_VAR_KEY);
+      log.debug("No service key found in environment variable {}.", ENV_VAR_KEY);
       return List.of();
     }
     final String logMessage =
diff --git a/core/src/main/java/com/sap/ai/sdk/core/DeploymentResolver.java b/core/src/main/java/com/sap/ai/sdk/core/DeploymentResolver.java
@@ -46,7 +46,7 @@ void reloadDeployments(@Nonnull final String resourceGroup) {
     try {
       val apiClient = new DeploymentApi(service);
       val deployments = new HashSet<>(apiClient.query(resourceGroup).getResources());
-      log.info("Found {} deployments in resource group '{}'", deployments.size(), resourceGroup);
+      log.info("Found {} deployments in resource group '{}'.", deployments.size(), resourceGroup);
       cache.put(resourceGroup, deployments);
     } catch (final RuntimeException e) {
       throw new DeploymentResolutionException(
diff --git a/core/src/main/java/com/sap/ai/sdk/core/common/ClientResponseHandler.java b/core/src/main/java/com/sap/ai/sdk/core/common/ClientResponseHandler.java
@@ -91,7 +91,7 @@ private T parseSuccess(@Nonnull final ClassicHttpResponse response) throws E {
     try {
       return objectMapper.readValue(content, successType);
     } catch (final JsonProcessingException e) {
-      log.error("Failed to parse response to type {}", successType);
+      log.error("Failed to parse response to type {}.", successType);
       throw exceptionFactory.build("Failed to parse response", e).setHttpResponse(response);
     }
   }
diff --git a/core/src/main/java/com/sap/ai/sdk/core/common/ClientStreamingHandler.java b/core/src/main/java/com/sap/ai/sdk/core/common/ClientStreamingHandler.java
@@ -85,7 +85,7 @@ public Stream<D> handleStreamingResponse(@Nonnull final ClassicHttpResponse resp
                 }
                 return objectMapper.treeToValue(jsonNode, successType);
               } catch (final IOException e) {
-                log.error("Failed to parse delta chunk to type {}", successType);
+                log.error("Failed to parse delta chunk to type {}.", successType);
                 final String message = "Failed to parse delta chunk";
                 throw exceptionFactory.build(message, e).setHttpResponse(response);
               }
diff --git a/core/src/main/java/com/sap/ai/sdk/core/common/IterableStreamConverter.java b/core/src/main/java/com/sap/ai/sdk/core/common/IterableStreamConverter.java
@@ -35,8 +35,8 @@ class IterableStreamConverter<T> implements Iterator<T> {
   static final int BUFFER_SIZE = 8192;
 
   private static final String ERR_CONTENT = "Failed to read response content.";
-  private static final String ERR_INTERRUPTED = "Parsing response content was interrupted";
-  private static final String ERR_CLOSE = "Could not close input stream with error: {} (ignored)";
+  private static final String ERR_INTERRUPTED = "Parsing response content was interrupted.";
+  private static final String ERR_CLOSE = "Could not close input stream with error: {} (ignored).";
 
   /** Read next entry for Stream or {@code null} when no further entry can be read. */
   private final Callable<T> readHandler;
@@ -70,7 +70,7 @@ public boolean hasNext() {
     } catch (final Exception e) {
       isDone = true;
       stopHandler.run();
-      log.debug("Reading next element failed with error {})", e.getClass().getSimpleName());
+      log.debug("Reading next element failed with error {}.", e.getClass().getSimpleName());
       throw errorHandler.apply(e);
     }
     return !isDone;
diff --git a/core/src/main/java/com/sap/ai/sdk/core/common/RequestLogContext.java b/core/src/main/java/com/sap/ai/sdk/core/common/RequestLogContext.java
@@ -23,8 +23,8 @@
 @Beta
 public class RequestLogContext {
 
-  private static void setRequestId(@Nonnull final String requestId) {
-    MDC.put(MdcKeys.REQUEST_ID, requestId);
+  private static void setCallId(@Nonnull final String callId) {
+    MDC.put(MdcKeys.CALL_ID, callId);
   }
 
   /**
@@ -65,7 +65,7 @@ public static void setService(@Nonnull final Service service) {
 
   /** Clear all MDC request context information. */
   public static void clear() {
-    MDC.remove(MdcKeys.REQUEST_ID);
+    MDC.remove(MdcKeys.CALL_ID);
     MDC.remove(MdcKeys.ENDPOINT);
     MDC.remove(MdcKeys.DESTINATION);
     MDC.remove(MdcKeys.MODE);
@@ -74,13 +74,13 @@ public static void clear() {
 
   /** Log the start of a request with generated request ID. */
   public static void logRequestStart() {
-    val reqId = UUID.randomUUID().toString().substring(0, 8);
-    RequestLogContext.setRequestId(reqId);
+    val callId = UUID.randomUUID().toString().substring(0, 8);
+    RequestLogContext.setCallId(callId);
 
-    val message = "[reqId={}] Starting {} {} request to {}, destination={}";
+    val message = "[callId={}] Starting {} {} request to {}, destination={}.";
     log.debug(
         message,
-        reqId,
+        callId,
         MDC.get(MdcKeys.SERVICE),
         MDC.get(MdcKeys.MODE),
         MDC.get(MdcKeys.ENDPOINT),
@@ -105,13 +105,13 @@ public static void logResponseSuccess(@Nonnull final ClassicHttpResponse respons
             .filter(length -> length >= 0)
             .map(length -> "%.1fKB".formatted(length / 1024.0))
             .orElse("unknown");
-    val message = "[reqId={}] {} request completed successfully with duration={}, size={}.";
-    log.debug(message, MDC.get(MdcKeys.REQUEST_ID), MDC.get(MdcKeys.SERVICE), duration, sizeInfo);
+    val message = "[callId={}] {} request completed successfully with duration={}, size={}.";
+    log.debug(message, MDC.get(MdcKeys.CALL_ID), MDC.get(MdcKeys.SERVICE), duration, sizeInfo);
   }
 
   @UtilityClass
   private static class MdcKeys {
-    private static final String REQUEST_ID = "reqId";
+    private static final String CALL_ID = "callId";
     private static final String ENDPOINT = "endpoint";
     private static final String DESTINATION = "destination";
     private static final String MODE = "mode";
diff --git a/core/src/test/java/com/sap/ai/sdk/core/common/IterableStreamConverterTest.java b/core/src/test/java/com/sap/ai/sdk/core/common/IterableStreamConverterTest.java
@@ -106,7 +106,7 @@ void testLinesThrows() {
     final var sut = IterableStreamConverter.lines(response, new TestClientExceptionFactory());
     assertThatThrownBy(sut::count)
         .isInstanceOf(TestClientException.class)
-        .hasMessage("Parsing response content was interrupted")
+        .hasMessage("Parsing response content was interrupted.")
         .cause()
         .isInstanceOf(IOException.class)
         .hasMessage("Ups!");
diff --git a/core/src/test/java/com/sap/ai/sdk/core/common/RequestLogContextTest.java b/core/src/test/java/com/sap/ai/sdk/core/common/RequestLogContextTest.java
@@ -25,14 +25,14 @@ void testRequestLogContextLifecycle() {
     assertThat(MDC.get("destination")).isEqualTo("http://localhost:8000");
 
     RequestLogContext.logRequestStart();
-    assertThat(MDC.get("reqId")).isNotNull().hasSize(8);
+    assertThat(MDC.get("callId")).isNotNull().hasSize(8);
 
     RequestLogContext.clear();
     assertThat(MDC.get("service")).isNull();
     assertThat(MDC.get("mode")).isNull();
     assertThat(MDC.get("endpoint")).isNull();
     assertThat(MDC.get("destination")).isNull();
-    assertThat(MDC.get("reqId")).isNull();
+    assertThat(MDC.get("callId")).isNull();
     assertThat(MDC.get("consumer-key")).isEqualTo("consumer-value");
   }
 }
diff --git a/docs/adrs/006-logging-strategy.md b/docs/adrs/006-logging-strategy.md
@@ -0,0 +1,180 @@
+# Logging Strategy
+
+## Status
+
+Accepted
+
+## Context
+
+The AI SDK identified some challenges with debugging and problem resolution that can be addressed with better logging.
+Typically, users have to enable wire logs for troubleshooting which, along with helpful logs, surfaces a large volume of unrelated noise.
+Additionally, we identified the need to improve visibility into the application progress for the user.
+
+Key drivers for this decision include:
+
+- **Debugging difficulties**: Limited visibility into request flows.
+- **Security risks**: Accidental logging of sensitive data.
+- **Poor user experience**: Lack of insight into application progress.
+- **Troubleshooting dependency**: Reliance on external parties for critical details.
+
+## Decision
+
+We will implement and enforce comprehensive logging guidelines that prioritize **debugging capability** and **user visibility**.
+This approach mandates descriptive, human-readable logs with structured request tracking through Mapped Diagnostic Context (MDC).
+The SDK uses SLF4J API for all logging statements.
+
+## Guidelines
+
+### 1. Log Content and Security
+
+* **Do not log sensitive information.**
+  Never log full request or response bodies.
+  Ensure that personally identifiable or confidential data — such as names, IDs, tokens, or payload content — is always excluded from logs.
+
+* **Write concise and relevant logs.**
+  Every log must convey meaningful information.
+  Avoid verbose, repetitive, or purely cosmetic details.
+
+* **Use descriptive, human-readable and standard formats.**
+  - Logs must be clear enough for a developer to understand what happened without checking the code.
+  - Start log messages with a capital letter (exceptions include case-sensitive identifiers) and end with a period (`.`).
+  - Avoid newlines (`\n`) and emojis within log messages to prevent parsing and encoding concerns.
+  - Use the `metric=value` pattern to include structured details with extensibility in mind.
+
+    ```
+    [callId=e3eaa45c] OpenAI request completed successfully with duration=1628ms, responseSize=1.2KB.
+    ```
+
+* **Correlate logs.**
+  Include a call identifier (e.g., `callId`) in outgoing per-request logs to assist with correlation and debugging.
+  Throughout this document, the term 'request' refers to a single SDK operation that calls an AI service (e.g., `OrchestrationClient.chatCompletion()`, `OpenAiClient.embed()`), distinct from HTTP requests to the user's application.
+
+* **Exception logging.**
+  When logging exceptions, use standard logging methods (e.g., `log.debug("Connection lost.", exception)`) rather than serializing exception objects.
+  Exception objects may contain custom fields with sensitive data that could be exposed through JSON serialization or custom `toString()` implementations.
+
+* **Logging framework**
+  Write all logging statements against the SLF4J API.
+  The AI SDK relies on `logback-classic` as the provided runtime implementation.
+
+---
+
+### 2. Log Levels and Scope
+
+* **Per-request logs.**
+  Keep per-request logs **below INFO level** (e.g., DEBUG or TRACE) to prevent cluttering normal application output.
+
+* **Application runtime logs.**
+  Prefer **INFO level** only for one-time or startup/shutdown logs that occur once per application run.
+
+* **Avoid unnecessary warnings.**
+  Use the WARN level only for actionable or genuinely concerning conditions.
+  Expected transient states (retries, fallbacks, cache misses) should not generate a warning.
+
+* **Explicit request logging.**
+  Always log at **request start** to provide immediate visibility that an operation has begun.
+  This helps users understand that their request is being processed even before a result is available.
+  Do not rely solely on response-time logging — requests may fail, hang, or take long durations.
+  This approach also avoids the need for stack-trace investigation when surface error responses are ambiguous.
+
+  ```
+  [callId=e3eaa45c] Starting OpenAI synchronous request to /v1/chat/completions, destination=<some-uri>.
+  ```
+
+* **Performance-aware logging.**
+  If a log statement requires computation (such as object creation or method invocation), guard it with a log-level check to prevent performance degradation.
+
+  ``` java
+  Optional<Destination> maybeDestination;
+  Destination fallbackDestination = /* existing instance */;
+  
+  // Bad: Creates objects or invokes methods even when logging is disabled
+  log.debug("Destination: {}", maybeDestination.toString());
+  log.debug("Destination: {}", maybeDestination.orElseGet(() -> new Destination()));
+  log.debug("Destination: {}", maybeDestination.orElseGet(this::getFallback));
+  log.debug("Destination: {}", maybeDestination.orElse(getFallback()));
+  
+  // Good: No additional computation
+  log.debug("Destination: {}", maybeDestination);
+  log.debug("Destination: {}", maybeDestination.orElse(fallbackDestination));
+  
+  // Good: Guard object creation
+  if (log.isDebugEnabled()) {
+    log.debug("Destination: {}", maybeDestination.orElse(getFallback()));
+  }
+  
+  // Exception: Singletons require no guarding (no object creation)
+  Optional<List<Destination>> maybeDestinations;
+  log.debug("Destinations: {}", maybeDestinations.orElse(Collections.emptyList()));
+  ```
+
+---
+
+### 3. MDC (Mapped Diagnostic Context)
+
+* **Purpose and usage.**
+  MDC is used to carry contextual information (e.g., `callId`, `endpoint`, `service`) across execution blocks within the same thread.
+
+* **Setting and clearing context.**
+  Set MDC values deliberately and close to their scope of relevance.
+  Per-request MDC context must be cleared appropriately when the response completes.
+  Avoid setting per-request values in long-lived objects that outlive the request lifecycle, as this can result in corrupted or incomplete log context.
+
+* **Centralized MDC management.**
+  Avoid using magic strings for MDC keys or values.
+  Define them in a dedicated structure or utility (e.g., `RequestLogContext` class) to ensure discoverability and prevent errors during refactoring.
+  ```java
+  // Bad: Magic strings scattered in code
+  MDC.put("service", "OpenAI");
+  log.debug("Service {}", MDC.get("service"));
+  
+  // Good: Centralized in utility class
+  RequestLogContext.setService(Service.OPENAI);
+  log.debug("Service {}", RequestLogContext.get(MdcKeys.SERVICE));
+  ```
+
+* **Granular clearing only.**
+  Never clear the entire MDC context, as this will remove entries set by the application developer or other libraries.
+  Instead, remove entries key-by-key to preserve unrelated context items that may remain valid for longer periods.
+  ```java
+  // Bad: Risk clearing useful context entries from other components
+  MDC.clear();
+
+  // Good: Clear only your own entries, centralized in utility class
+  class RequestLogContext { 
+     //...
+     static void clear(){
+      MDC.remove(MdcKeys.CALL_ID);
+      MDC.remove(MdcKeys.SERVICE);
+    }
+  }
+  ```
+
+* **Responsibility and ownership.**
+  The component or class that sets MDC context values is also responsible for clearing them.
+  This maintains clarity and ensures proper lifecycle management.
+
+* **Safe consumption.**
+  Since MDC uses `ThreadLocal` storage, any new thread (created implicitly or explicitly) will not have access to the parent thread's MDC context.
+
+  ```java
+  // Thread A
+  RequestLogContext.setCallId("abc123");
+  log.debug("[callId={}] Starting request", RequestLogContext.get(MdcKeys.CALL_ID)); 
+  
+  // Problem: Async callback runs in Thread B without original MDC context
+  client.executeAsync(() -> {
+      // Thread B: RequestLogContext.get(MdcKeys.CALL_ID) returns null
+      log.debug("[callId={}] Processing", RequestLogContext.get(MdcKeys.CALL_ID)); 
+  });
+  ```
+  Be vigilant about thread switches through async operations and resilience patterns, as you may have to manually propagate MDC context to maintain logging continuity.
+  Refer to the following resource for more information on [MDC handling with thread pools](https://stackoverflow.com/questions/6073019/how-to-use-mdc-with-thread-pools).
+
+---
+
+### 4. Logging Boundaries and Generation
+
+* **Deliberate logging boundaries.**
+  Generated code (such as those in modules like *document-grounding* or *prompt-registry*) should log minimally or preferably be avoided entirely.
+  Logging should be centralized in higher-level components to maintain consistency and reduce noise.
diff --git a/foundation-models/openai/src/main/java/com/sap/ai/sdk/foundationmodels/openai/OpenAiChatCompletionResponse.java b/foundation-models/openai/src/main/java/com/sap/ai/sdk/foundationmodels/openai/OpenAiChatCompletionResponse.java
@@ -116,7 +116,7 @@ public List<OpenAiToolMessage> executeTools() {
     final var tools = Optional.ofNullable(originalRequest.getToolsExecutable()).orElseGet(List::of);
     if (log.isDebugEnabled() && !tools.isEmpty()) {
       final var toolNames = tools.stream().map(OpenAiTool::getName).toList();
-      log.debug("Executing {} tool call(s) - {}", toolNames.size(), toolNames);
+      log.debug("Executing {} tool call(s) - {}.", toolNames.size(), toolNames);
     }
     return OpenAiTool.execute(tools, getMessage());
   }
diff --git a/foundation-models/openai/src/main/java/com/sap/ai/sdk/foundationmodels/openai/OpenAiTool.java b/foundation-models/openai/src/main/java/com/sap/ai/sdk/foundationmodels/openai/OpenAiTool.java
@@ -193,7 +193,7 @@ private static Map<OpenAiFunctionCall, Object> executeInternal(
       if (toolCall instanceof OpenAiFunctionCall functionCall) {
         final var tool = toolMap.get(functionCall.getName());
         if (tool == null) {
-          log.warn("Tool not found for function call: {}", functionCall.getName());
+          log.warn("Tool not found for function call: {}.", functionCall.getName());
           continue;
         }
         final var toolResult = executeFunction(tool, functionCall);
diff --git a/foundation-models/openai/src/main/java/com/sap/ai/sdk/foundationmodels/openai/spring/OpenAiChatModel.java b/foundation-models/openai/src/main/java/com/sap/ai/sdk/foundationmodels/openai/spring/OpenAiChatModel.java
@@ -71,7 +71,7 @@ public ChatResponse call(@Nonnull final Prompt prompt) {
     if (options != null && isInternalToolExecutionEnabled(options) && response.hasToolCalls()) {
       val toolCalls =
           response.getResult().getOutput().getToolCalls().stream().map(ToolCall::name).toList();
-      log.info("Executing {} tool call(s) - {}", toolCalls.size(), toolCalls);
+      log.info("Executing {} tool call(s) - {}.", toolCalls.size(), toolCalls);
       val toolExecutionResult = toolCallingManager.executeToolCalls(prompt, response);
       // Send the tool execution result back to the model.
       log.debug("Re-invoking model with tool execution results.");
@@ -219,7 +219,7 @@ private static List<ChatCompletionTool> extractTools(final ToolCallingChatOption
         val tool = new ChatCompletionTool().type(toolType).function(toolFunction);
         tools.add(tool);
       } catch (JsonProcessingException e) {
-        log.warn("Failed to add tool to the chat request: {}", e.getMessage());
+        log.warn("Failed to add tool to the chat request: {}.", e.getMessage());
       }
     }
     return tools;
diff --git a/orchestration/src/main/java/com/sap/ai/sdk/orchestration/spring/OrchestrationChatModel.java b/orchestration/src/main/java/com/sap/ai/sdk/orchestration/spring/OrchestrationChatModel.java
@@ -77,7 +77,7 @@ public ChatResponse call(@Nonnull final Prompt prompt) {
         if (log.isDebugEnabled()) {
           val tools = response.getResult().getOutput().getToolCalls();
           val toolsStr = tools.stream().map(ToolCall::name).collect(Collectors.joining(", "));
-          log.debug("Executing {} tool call(s) - {}", tools.size(), toolsStr);
+          log.debug("Executing {} tool call(s) - {}.", tools.size(), toolsStr);
         }
 
         val toolExecutionResult = toolCallingManager.executeToolCalls(prompt, response);

Original file line number	Diff line number	Diff line change
`@@ -52,7 +52,7 @@ public List<ServiceBinding> getServiceBindings() throws ServiceBindingAccessExce`
`52`	`52`	`throw new ServiceBindingAccessException("Failed to load service key from environment", e);`
`53`	`53`	`}`
`54`	`54`	`if (serviceKey == null) {`
`55`		`- log.debug("No service key found in environment variable {}", ENV_VAR_KEY);`
	`55`	`+ log.debug("No service key found in environment variable {}.", ENV_VAR_KEY);`
`56`	`56`	`return List.of();`
`57`	`57`	`}`
`58`	`58`	`final String logMessage =`
Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ private T parseSuccess(@Nonnull final ClassicHttpResponse response) throws E {`
`91`	`91`	`try {`
`92`	`92`	`return objectMapper.readValue(content, successType);`
`93`	`93`	`} catch (final JsonProcessingException e) {`
`94`		`- log.error("Failed to parse response to type {}", successType);`
	`94`	`+ log.error("Failed to parse response to type {}.", successType);`
`95`	`95`	`throw exceptionFactory.build("Failed to parse response", e).setHttpResponse(response);`
`96`	`96`	`}`
`97`	`97`	`}`
Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,7 @@ public Stream<D> handleStreamingResponse(@Nonnull final ClassicHttpResponse resp`
`85`	`85`	`}`
`86`	`86`	`return objectMapper.treeToValue(jsonNode, successType);`
`87`	`87`	`} catch (final IOException e) {`
`88`		`- log.error("Failed to parse delta chunk to type {}", successType);`
	`88`	`+ log.error("Failed to parse delta chunk to type {}.", successType);`
`89`	`89`	`final String message = "Failed to parse delta chunk";`
`90`	`90`	`throw exceptionFactory.build(message, e).setHttpResponse(response);`
`91`	`91`	`}`
Original file line number	Diff line number	Diff line change
`@@ -116,7 +116,7 @@ public List<OpenAiToolMessage> executeTools() {`
`116`	`116`	`final var tools = Optional.ofNullable(originalRequest.getToolsExecutable()).orElseGet(List::of);`
`117`	`117`	`if (log.isDebugEnabled() && !tools.isEmpty()) {`
`118`	`118`	`final var toolNames = tools.stream().map(OpenAiTool::getName).toList();`
`119`		`- log.debug("Executing {} tool call(s) - {}", toolNames.size(), toolNames);`
	`119`	`+ log.debug("Executing {} tool call(s) - {}.", toolNames.size(), toolNames);`
`120`	`120`	`}`
`121`	`121`	`return OpenAiTool.execute(tools, getMessage());`
`122`	`122`	`}`