feat(vertex-gemini): Add safety ratings to response metadata

markpollack · spring-builds · commit f4eb3750482d · 2025-12-02T22:29:44.000Z
Extract safety ratings from Gemini response candidates and include them in AssistantMessage metadata. Update Google Cloud BOM to 26.72.0. Fixes #687 (cherry picked from commit 2710cab)
diff --git a/models/spring-ai-vertex-ai-gemini/src/main/java/org/springframework/ai/vertexai/gemini/VertexAiGeminiChatModel.java b/models/spring-ai-vertex-ai-gemini/src/main/java/org/springframework/ai/vertexai/gemini/VertexAiGeminiChatModel.java
@@ -87,6 +87,7 @@
 import org.springframework.ai.tool.definition.ToolDefinition;
 import org.springframework.ai.vertexai.gemini.api.VertexAiGeminiApi;
 import org.springframework.ai.vertexai.gemini.common.VertexAiGeminiConstants;
+import org.springframework.ai.vertexai.gemini.common.VertexAiGeminiSafetyRating;
 import org.springframework.ai.vertexai.gemini.common.VertexAiGeminiSafetySetting;
 import org.springframework.ai.vertexai.gemini.schema.VertexAiSchemaConverter;
 import org.springframework.ai.vertexai.gemini.schema.VertexToolCallingManager;
@@ -594,8 +595,16 @@ protected List<Generation> responseCandidateToGeneration(Candidate candidate) {
 		VertexAiGeminiApi.LogProbs logprobs = new VertexAiGeminiApi.LogProbs(candidate.getAvgLogprobs(), topCandidates,
 				chosenCandidates);
 
+		// Extract safety ratings from the candidate
+		List<VertexAiGeminiSafetyRating> safetyRatings = candidate.getSafetyRatingsList()
+			.stream()
+			.map(sr -> new VertexAiGeminiSafetyRating(toSafetyRatingHarmCategory(sr.getCategory()),
+					toSafetyRatingHarmProbability(sr.getProbability()), sr.getBlocked(), sr.getProbabilityScore(),
+					toSafetyRatingHarmSeverity(sr.getSeverity()), sr.getSeverityScore()))
+			.toList();
+
 		Map<String, Object> messageMetadata = Map.of("candidateIndex", candidateIndex, "finishReason",
-				candidateFinishReason, "logprobs", logprobs);
+				candidateFinishReason, "logprobs", logprobs, "safetyRatings", safetyRatings);
 
 		ChatGenerationMetadata chatGenerationMetadata = ChatGenerationMetadata.builder()
 			.finishReason(candidateFinishReason.name())
@@ -633,6 +642,42 @@ private DefaultUsage getDefaultUsage(GenerateContentResponse.UsageMetadata usage
 				usageMetadata.getTotalTokenCount(), usageMetadata);
 	}
 
+	private VertexAiGeminiSafetyRating.HarmCategory toSafetyRatingHarmCategory(
+			com.google.cloud.vertexai.api.HarmCategory category) {
+		return switch (category) {
+			case HARM_CATEGORY_HATE_SPEECH -> VertexAiGeminiSafetyRating.HarmCategory.HARM_CATEGORY_HATE_SPEECH;
+			case HARM_CATEGORY_DANGEROUS_CONTENT ->
+				VertexAiGeminiSafetyRating.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT;
+			case HARM_CATEGORY_HARASSMENT -> VertexAiGeminiSafetyRating.HarmCategory.HARM_CATEGORY_HARASSMENT;
+			case HARM_CATEGORY_SEXUALLY_EXPLICIT ->
+				VertexAiGeminiSafetyRating.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT;
+			case HARM_CATEGORY_CIVIC_INTEGRITY -> VertexAiGeminiSafetyRating.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY;
+			default -> VertexAiGeminiSafetyRating.HarmCategory.HARM_CATEGORY_UNSPECIFIED;
+		};
+	}
+
+	private VertexAiGeminiSafetyRating.HarmProbability toSafetyRatingHarmProbability(
+			com.google.cloud.vertexai.api.SafetyRating.HarmProbability probability) {
+		return switch (probability) {
+			case NEGLIGIBLE -> VertexAiGeminiSafetyRating.HarmProbability.NEGLIGIBLE;
+			case LOW -> VertexAiGeminiSafetyRating.HarmProbability.LOW;
+			case MEDIUM -> VertexAiGeminiSafetyRating.HarmProbability.MEDIUM;
+			case HIGH -> VertexAiGeminiSafetyRating.HarmProbability.HIGH;
+			default -> VertexAiGeminiSafetyRating.HarmProbability.HARM_PROBABILITY_UNSPECIFIED;
+		};
+	}
+
+	private VertexAiGeminiSafetyRating.HarmSeverity toSafetyRatingHarmSeverity(
+			com.google.cloud.vertexai.api.SafetyRating.HarmSeverity severity) {
+		return switch (severity) {
+			case HARM_SEVERITY_NEGLIGIBLE -> VertexAiGeminiSafetyRating.HarmSeverity.HARM_SEVERITY_NEGLIGIBLE;
+			case HARM_SEVERITY_LOW -> VertexAiGeminiSafetyRating.HarmSeverity.HARM_SEVERITY_LOW;
+			case HARM_SEVERITY_MEDIUM -> VertexAiGeminiSafetyRating.HarmSeverity.HARM_SEVERITY_MEDIUM;
+			case HARM_SEVERITY_HIGH -> VertexAiGeminiSafetyRating.HarmSeverity.HARM_SEVERITY_HIGH;
+			default -> VertexAiGeminiSafetyRating.HarmSeverity.HARM_SEVERITY_UNSPECIFIED;
+		};
+	}
+
 	private VertexAiGeminiChatOptions vertexAiGeminiChatOptions(Prompt prompt) {
 		VertexAiGeminiChatOptions updatedRuntimeOptions = VertexAiGeminiChatOptions.builder().build();
 		if (prompt.getOptions() != null) {
diff --git a/models/spring-ai-vertex-ai-gemini/src/main/java/org/springframework/ai/vertexai/gemini/common/VertexAiGeminiSafetyRating.java b/models/spring-ai-vertex-ai-gemini/src/main/java/org/springframework/ai/vertexai/gemini/common/VertexAiGeminiSafetyRating.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2024-2025 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.springframework.ai.vertexai.gemini.common;
+
+/**
+ * Represents a safety rating returned by the Vertex AI Gemini API for generated content.
+ * Safety ratings indicate the probability and severity of harmful content in a specific
+ * category.
+ *
+ * @author Mark Pollack
+ * @since 1.1.1
+ * @see VertexAiGeminiSafetySetting
+ */
+public record VertexAiGeminiSafetyRating(HarmCategory category, HarmProbability probability, boolean blocked,
+		float probabilityScore, HarmSeverity severity, float severityScore) {
+
+	/**
+	 * Enum representing different categories of harmful content.
+	 */
+	public enum HarmCategory {
+
+		HARM_CATEGORY_UNSPECIFIED, HARM_CATEGORY_HATE_SPEECH, HARM_CATEGORY_DANGEROUS_CONTENT, HARM_CATEGORY_HARASSMENT,
+		HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_CIVIC_INTEGRITY
+
+	}
+
+	/**
+	 * Enum representing the probability levels of harmful content.
+	 */
+	public enum HarmProbability {
+
+		HARM_PROBABILITY_UNSPECIFIED, NEGLIGIBLE, LOW, MEDIUM, HIGH
+
+	}
+
+	/**
+	 * Enum representing the severity levels of harmful content.
+	 */
+	public enum HarmSeverity {
+
+		HARM_SEVERITY_UNSPECIFIED, HARM_SEVERITY_NEGLIGIBLE, HARM_SEVERITY_LOW, HARM_SEVERITY_MEDIUM, HARM_SEVERITY_HIGH
+
+	}
+
+}
diff --git a/models/spring-ai-vertex-ai-gemini/src/test/java/org/springframework/ai/vertexai/gemini/VertexAiGeminiChatModelIT.java b/models/spring-ai-vertex-ai-gemini/src/test/java/org/springframework/ai/vertexai/gemini/VertexAiGeminiChatModelIT.java
@@ -47,6 +47,7 @@
 import org.springframework.ai.tool.annotation.Tool;
 import org.springframework.ai.vertexai.gemini.VertexAiGeminiChatModel.ChatModel;
 import org.springframework.ai.vertexai.gemini.api.VertexAiGeminiApi;
+import org.springframework.ai.vertexai.gemini.common.VertexAiGeminiSafetyRating;
 import org.springframework.ai.vertexai.gemini.common.VertexAiGeminiSafetySetting;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
@@ -247,6 +248,50 @@ void logprobs() {
 		assertThat(logprobs.chosenCandidates()).isNotEmpty();
 	}
 
+	@Test
+	@SuppressWarnings("unchecked")
+	void safetyRatingsMetadataIsPresent() {
+		// Use safety settings with BLOCK_LOW_AND_ABOVE to ensure safety evaluation occurs
+		// and ratings are returned (similar to Python SDK example)
+		List<VertexAiGeminiSafetySetting> safetySettings = List.of(
+				VertexAiGeminiSafetySetting.builder()
+					.withCategory(VertexAiGeminiSafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT)
+					.withThreshold(VertexAiGeminiSafetySetting.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE)
+					.build(),
+				VertexAiGeminiSafetySetting.builder()
+					.withCategory(VertexAiGeminiSafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH)
+					.withThreshold(VertexAiGeminiSafetySetting.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE)
+					.build(),
+				VertexAiGeminiSafetySetting.builder()
+					.withCategory(VertexAiGeminiSafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT)
+					.withThreshold(VertexAiGeminiSafetySetting.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE)
+					.build(),
+				VertexAiGeminiSafetySetting.builder()
+					.withCategory(VertexAiGeminiSafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT)
+					.withThreshold(VertexAiGeminiSafetySetting.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE)
+					.build());
+
+		// Use a prompt that should trigger safety evaluation
+		String prompt = "Write a list of 5 disrespectful things that I might say to the universe after stubbing my toe in the dark:";
+
+		ChatResponse response = this.chatModel
+			.call(new Prompt(prompt, VertexAiGeminiChatOptions.builder().safetySettings(safetySettings).build()));
+
+		// Safety ratings should be present in the AssistantMessage metadata
+		var safetyRatings = (List<VertexAiGeminiSafetyRating>) response.getResult()
+			.getOutput()
+			.getMetadata()
+			.get("safetyRatings");
+
+		assertThat(safetyRatings).isNotNull();
+		assertThat(safetyRatings).isNotEmpty();
+
+		// Verify safety rating structure
+		VertexAiGeminiSafetyRating firstRating = safetyRatings.get(0);
+		assertThat(firstRating.category()).isNotNull();
+		assertThat(firstRating.probability()).isNotNull();
+	}
+
 	@Test
 	void beanStreamOutputConverterRecords() {
 
diff --git a/pom.xml b/pom.xml
@@ -286,7 +286,7 @@
 		<djl.version>0.32.0</djl.version>
 		<onnxruntime.version>1.19.2</onnxruntime.version>
 		<oci-sdk-version>3.63.1</oci-sdk-version>
-		<com.google.cloud.version>26.60.0</com.google.cloud.version>
+		<com.google.cloud.version>26.72.0</com.google.cloud.version>
 		<com.google.genai.version>1.28.0</com.google.genai.version>
 		<ibm.sdk.version>9.20.0</ibm.sdk.version>
 		<jsonschema.version>4.38.0</jsonschema.version>
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/vertexai-gemini-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/vertexai-gemini-chat.adoc
@@ -215,6 +215,85 @@ var response = this.chatModel.call(new Prompt(List.of(userMessage)));
 ----
 
 
+== Safety Settings and Safety Ratings
+
+The Vertex AI Gemini API provides safety filtering capabilities to help you control harmful content in both prompts and responses.
+For more details, see the https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-filters[Vertex AI Safety Filters documentation].
+
+=== Configuring Safety Settings
+
+You can configure safety settings to control the threshold at which content is blocked for different harm categories.
+The available harm categories are:
+
+* `HARM_CATEGORY_HATE_SPEECH` - Hate speech content
+* `HARM_CATEGORY_DANGEROUS_CONTENT` - Dangerous content
+* `HARM_CATEGORY_HARASSMENT` - Harassment content
+* `HARM_CATEGORY_SEXUALLY_EXPLICIT` - Sexually explicit content
+* `HARM_CATEGORY_CIVIC_INTEGRITY` - Civic integrity content
+
+The available threshold levels are:
+
+* `BLOCK_LOW_AND_ABOVE` - Block when low, medium, or high probability of unsafe content
+* `BLOCK_MEDIUM_AND_ABOVE` - Block when medium or high probability of unsafe content
+* `BLOCK_ONLY_HIGH` - Block only when high probability of unsafe content
+* `BLOCK_NONE` - Never block (use with caution)
+
+[source,java]
+----
+List<VertexAiGeminiSafetySetting> safetySettings = List.of(
+    VertexAiGeminiSafetySetting.builder()
+        .withCategory(VertexAiGeminiSafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT)
+        .withThreshold(VertexAiGeminiSafetySetting.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE)
+        .build(),
+    VertexAiGeminiSafetySetting.builder()
+        .withCategory(VertexAiGeminiSafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH)
+        .withThreshold(VertexAiGeminiSafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE)
+        .build());
+
+ChatResponse response = chatModel.call(new Prompt("Your prompt here",
+    VertexAiGeminiChatOptions.builder()
+        .safetySettings(safetySettings)
+        .build()));
+----
+
+=== Accessing Safety Ratings in Responses
+
+When safety settings are configured, the Gemini API returns safety ratings for each response candidate.
+These ratings indicate the probability and severity of harmful content in each category.
+
+Safety ratings are available in the `AssistantMessage` metadata under the key `"safetyRatings"`:
+
+[source,java]
+----
+ChatResponse response = chatModel.call(new Prompt(prompt,
+    VertexAiGeminiChatOptions.builder()
+        .safetySettings(safetySettings)
+        .build()));
+
+// Access safety ratings from the response
+List<VertexAiGeminiSafetyRating> safetyRatings =
+    (List<VertexAiGeminiSafetyRating>) response.getResult()
+        .getOutput()
+        .getMetadata()
+        .get("safetyRatings");
+
+for (VertexAiGeminiSafetyRating rating : safetyRatings) {
+    System.out.println("Category: " + rating.category());
+    System.out.println("Probability: " + rating.probability());
+    System.out.println("Severity: " + rating.severity());
+    System.out.println("Blocked: " + rating.blocked());
+}
+----
+
+The `VertexAiGeminiSafetyRating` record contains:
+
+* `category` - The harm category (e.g., `HARM_CATEGORY_HARASSMENT`)
+* `probability` - The probability level (`NEGLIGIBLE`, `LOW`, `MEDIUM`, `HIGH`)
+* `blocked` - Whether the content was blocked due to this rating
+* `probabilityScore` - The raw probability score (0.0 to 1.0)
+* `severity` - The severity level (`HARM_SEVERITY_NEGLIGIBLE`, `HARM_SEVERITY_LOW`, `HARM_SEVERITY_MEDIUM`, `HARM_SEVERITY_HIGH`)
+* `severityScore` - The raw severity score (0.0 to 1.0)
+
 == Sample Controller
 
 https://start.spring.io/[Create] a new Spring Boot project and add the `spring-ai-starter-model-vertex-ai-gemini` to your pom (or gradle) dependencies.