Skip to content

Commit ef2eb1f

Browse files
OCI GenAI Cohere Chat Model
Signed-off-by: Anders Swanson <anders.swanson@oracle.com>
1 parent 6d2b22f commit ef2eb1f

File tree

16 files changed

+1194
-73
lines changed

16 files changed

+1194
-73
lines changed

models/spring-ai-oci-genai/src/main/java/org/springframework/ai/oci/OCIEmbeddingModel.java

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,8 @@
2222
import java.util.concurrent.atomic.AtomicInteger;
2323

2424
import com.oracle.bmc.generativeaiinference.GenerativeAiInference;
25-
import com.oracle.bmc.generativeaiinference.model.DedicatedServingMode;
2625
import com.oracle.bmc.generativeaiinference.model.EmbedTextDetails;
2726
import com.oracle.bmc.generativeaiinference.model.EmbedTextResult;
28-
import com.oracle.bmc.generativeaiinference.model.OnDemandServingMode;
2927
import com.oracle.bmc.generativeaiinference.model.ServingMode;
3028
import com.oracle.bmc.generativeaiinference.requests.EmbedTextRequest;
3129
import io.micrometer.observation.ObservationRegistry;
@@ -128,15 +126,6 @@ private EmbeddingResponse embedAllWithContext(List<EmbedTextRequest> embedTextRe
128126
return embeddingResponse;
129127
}
130128

131-
private ServingMode servingMode(OCIEmbeddingOptions embeddingOptions) {
132-
return switch (embeddingOptions.getServingMode()) {
133-
case "dedicated" -> DedicatedServingMode.builder().endpointId(embeddingOptions.getModel()).build();
134-
case "on-demand" -> OnDemandServingMode.builder().modelId(embeddingOptions.getModel()).build();
135-
default -> throw new IllegalArgumentException(
136-
"unknown serving mode for OCI embedding model: " + embeddingOptions.getServingMode());
137-
};
138-
}
139-
140129
private List<EmbedTextRequest> createRequests(List<String> inputs, OCIEmbeddingOptions embeddingOptions) {
141130
int size = inputs.size();
142131
List<EmbedTextRequest> requests = new ArrayList<>();
@@ -148,8 +137,9 @@ private List<EmbedTextRequest> createRequests(List<String> inputs, OCIEmbeddingO
148137
}
149138

150139
private EmbedTextRequest createRequest(List<String> inputs, OCIEmbeddingOptions embeddingOptions) {
140+
ServingMode servingMode = ServingModeHelper.get(options.getServingMode(), options.getModel());
151141
EmbedTextDetails embedTextDetails = EmbedTextDetails.builder()
152-
.servingMode(servingMode(embeddingOptions))
142+
.servingMode(servingMode)
153143
.compartmentId(embeddingOptions.getCompartment())
154144
.inputs(inputs)
155145
.truncate(Objects.requireNonNullElse(embeddingOptions.getTruncate(), EmbedTextDetails.Truncate.End))
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.springframework.ai.oci;
17+
18+
import com.oracle.bmc.generativeaiinference.model.DedicatedServingMode;
19+
import com.oracle.bmc.generativeaiinference.model.OnDemandServingMode;
20+
import com.oracle.bmc.generativeaiinference.model.ServingMode;
21+
22+
/**
23+
* Helper class to load the OCI Gen AI
24+
* {@link com.oracle.bmc.generativeaiinference.model.ServingMode}
25+
*
26+
* @author Anders Swanson
27+
*/
28+
public final class ServingModeHelper {
29+
30+
public static ServingMode get(String servingMode, String model) {
31+
return switch (servingMode) {
32+
case "dedicated" -> DedicatedServingMode.builder().endpointId(model).build();
33+
case "on-demand" -> OnDemandServingMode.builder().modelId(model).build();
34+
default -> throw new IllegalArgumentException(String.format(
35+
"Unknown serving mode for OCI Gen AI: %s. Supported options are 'dedicated' and 'on-demand'",
36+
servingMode));
37+
};
38+
}
39+
40+
}
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/*
2+
* Copyright 2024 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.springframework.ai.oci.cohere;
17+
18+
import java.util.ArrayList;
19+
import java.util.List;
20+
import java.util.Map;
21+
22+
import com.oracle.bmc.generativeaiinference.GenerativeAiInference;
23+
import com.oracle.bmc.generativeaiinference.model.BaseChatRequest;
24+
import com.oracle.bmc.generativeaiinference.model.BaseChatResponse;
25+
import com.oracle.bmc.generativeaiinference.model.ChatDetails;
26+
import com.oracle.bmc.generativeaiinference.model.CohereChatBotMessage;
27+
import com.oracle.bmc.generativeaiinference.model.CohereChatRequest;
28+
import com.oracle.bmc.generativeaiinference.model.CohereChatResponse;
29+
import com.oracle.bmc.generativeaiinference.model.CohereMessage;
30+
import com.oracle.bmc.generativeaiinference.model.CohereSystemMessage;
31+
import com.oracle.bmc.generativeaiinference.model.CohereToolCall;
32+
import com.oracle.bmc.generativeaiinference.model.CohereToolMessage;
33+
import com.oracle.bmc.generativeaiinference.model.CohereToolResult;
34+
import com.oracle.bmc.generativeaiinference.model.CohereUserMessage;
35+
import com.oracle.bmc.generativeaiinference.model.ServingMode;
36+
import com.oracle.bmc.generativeaiinference.requests.ChatRequest;
37+
import io.micrometer.observation.ObservationRegistry;
38+
import org.springframework.ai.chat.messages.AssistantMessage;
39+
import org.springframework.ai.chat.messages.Message;
40+
import org.springframework.ai.chat.messages.ToolResponseMessage;
41+
import org.springframework.ai.chat.metadata.ChatGenerationMetadata;
42+
import org.springframework.ai.chat.metadata.ChatResponseMetadata;
43+
import org.springframework.ai.chat.model.ChatModel;
44+
import org.springframework.ai.chat.model.ChatResponse;
45+
import org.springframework.ai.chat.model.Generation;
46+
import org.springframework.ai.chat.observation.ChatModelObservationContext;
47+
import org.springframework.ai.chat.observation.ChatModelObservationConvention;
48+
import org.springframework.ai.chat.observation.ChatModelObservationDocumentation;
49+
import org.springframework.ai.chat.observation.DefaultChatModelObservationConvention;
50+
import org.springframework.ai.chat.prompt.ChatOptions;
51+
import org.springframework.ai.chat.prompt.Prompt;
52+
import org.springframework.ai.model.ModelOptionsUtils;
53+
import org.springframework.ai.observation.conventions.AiProvider;
54+
import org.springframework.ai.oci.ServingModeHelper;
55+
import org.springframework.util.Assert;
56+
import org.springframework.util.StringUtils;
57+
58+
import static java.util.Objects.requireNonNullElse;
59+
60+
/**
61+
* {@link ChatModel} implementation that uses the OCI GenAI Chat API.
62+
*
63+
* @author Anders Swanson
64+
* @since 1.0.0
65+
*/
66+
public class OCICohereChatModel implements ChatModel {
67+
68+
private static final ChatModelObservationConvention DEFAULT_OBSERVATION_CONVENTION = new DefaultChatModelObservationConvention();
69+
70+
private static final Double DEFAULT_TEMPERATURE = 0.7;
71+
72+
/**
73+
* The {@link GenerativeAiInference} client used to interact with OCI GenAI service.
74+
*/
75+
private final GenerativeAiInference genAi;
76+
77+
/**
78+
* The configuration information for a chat completions request.
79+
*/
80+
private final OCICohereChatOptions defaultOptions;
81+
82+
private final ObservationRegistry observationRegistry;
83+
84+
/**
85+
* Conventions to use for generating observations.
86+
*/
87+
private ChatModelObservationConvention observationConvention = DEFAULT_OBSERVATION_CONVENTION;
88+
89+
public OCICohereChatModel(GenerativeAiInference genAi, OCICohereChatOptions options) {
90+
this(genAi, options, null);
91+
}
92+
93+
public OCICohereChatModel(GenerativeAiInference genAi, OCICohereChatOptions options,
94+
ObservationRegistry observationRegistry) {
95+
Assert.notNull(genAi, "com.oracle.bmc.generativeaiinference.GenerativeAiInference must not be null");
96+
Assert.notNull(options, "OCIChatOptions must not be null");
97+
98+
this.genAi = genAi;
99+
this.defaultOptions = options;
100+
this.observationRegistry = observationRegistry;
101+
}
102+
103+
@Override
104+
public ChatResponse call(Prompt prompt) {
105+
ChatModelObservationContext observationContext = ChatModelObservationContext.builder()
106+
.prompt(prompt)
107+
.provider(AiProvider.OCI_GENAI.value())
108+
.requestOptions(prompt.getOptions() != null ? prompt.getOptions() : this.defaultOptions)
109+
.build();
110+
111+
return ChatModelObservationDocumentation.CHAT_MODEL_OPERATION
112+
.observation(this.observationConvention, DEFAULT_OBSERVATION_CONVENTION, () -> observationContext,
113+
this.observationRegistry)
114+
.observe(() -> {
115+
ChatResponse chatResponse = doChatRequest(prompt);
116+
observationContext.setResponse(chatResponse);
117+
return chatResponse;
118+
});
119+
}
120+
121+
@Override
122+
public ChatOptions getDefaultOptions() {
123+
return OCICohereChatOptions.fromOptions(defaultOptions);
124+
}
125+
126+
/**
127+
* Use the provided convention for reporting observation data
128+
* @param observationConvention The provided convention
129+
*/
130+
public void setObservationConvention(ChatModelObservationConvention observationConvention) {
131+
Assert.notNull(observationConvention, "observationConvention cannot be null");
132+
this.observationConvention = observationConvention;
133+
}
134+
135+
private ChatResponse doChatRequest(Prompt prompt) {
136+
OCICohereChatOptions options = mergeOptions(prompt.getOptions(), this.defaultOptions);
137+
validateChatOptions(options);
138+
139+
ChatResponseMetadata metadata = ChatResponseMetadata.builder()
140+
.withModel(options.getModel())
141+
.withKeyValue("compartment", options.getCompartment())
142+
.build();
143+
return new ChatResponse(getGenerations(prompt, options), metadata);
144+
145+
}
146+
147+
private OCICohereChatOptions mergeOptions(ChatOptions chatOptions, OCICohereChatOptions defaultOptions) {
148+
if (chatOptions instanceof OCICohereChatOptions override) {
149+
OCICohereChatOptions dynamicOptions = ModelOptionsUtils.merge(override, defaultOptions,
150+
OCICohereChatOptions.class);
151+
152+
if (dynamicOptions != null) {
153+
return dynamicOptions;
154+
}
155+
}
156+
return defaultOptions;
157+
}
158+
159+
private void validateChatOptions(OCICohereChatOptions options) {
160+
if (!StringUtils.hasText(options.getModel())) {
161+
throw new IllegalArgumentException("Model is not set!");
162+
}
163+
if (!StringUtils.hasText(options.getCompartment())) {
164+
throw new IllegalArgumentException("Compartment is not set!");
165+
}
166+
if (!StringUtils.hasText(options.getServingMode())) {
167+
throw new IllegalArgumentException("ServingMode is not set!");
168+
}
169+
}
170+
171+
private List<Generation> getGenerations(Prompt prompt, OCICohereChatOptions options) {
172+
com.oracle.bmc.generativeaiinference.responses.ChatResponse cr = genAi
173+
.chat(toCohereChatRequest(prompt, options));
174+
return toGenerations(cr, options);
175+
176+
}
177+
178+
private List<Generation> toGenerations(com.oracle.bmc.generativeaiinference.responses.ChatResponse ociChatResponse,
179+
OCICohereChatOptions options) {
180+
BaseChatResponse cr = ociChatResponse.getChatResult().getChatResponse();
181+
if (cr instanceof CohereChatResponse resp) {
182+
List<Generation> generations = new ArrayList<>();
183+
ChatGenerationMetadata metadata = ChatGenerationMetadata.from(resp.getFinishReason().getValue(), null);
184+
AssistantMessage message = new AssistantMessage(resp.getText(), Map.of());
185+
generations.add(new Generation(message, metadata));
186+
return generations;
187+
}
188+
throw new IllegalStateException(String.format("Unexpected chat response type: %s", cr.getClass().getName()));
189+
}
190+
191+
private ChatRequest toCohereChatRequest(Prompt prompt, OCICohereChatOptions options) {
192+
List<Message> messages = prompt.getInstructions();
193+
Message message = messages.get(0);
194+
List<CohereMessage> chatHistory = getCohereMessages(messages);
195+
return newChatRequest(options, message, chatHistory);
196+
}
197+
198+
private List<CohereMessage> getCohereMessages(List<Message> messages) {
199+
List<CohereMessage> chatHistory = new ArrayList<>();
200+
for (int i = 1; i < messages.size(); i++) {
201+
Message message = messages.get(i);
202+
switch (message.getMessageType()) {
203+
case USER -> chatHistory.add(CohereUserMessage.builder().message(message.getContent()).build());
204+
case ASSISTANT -> chatHistory.add(CohereChatBotMessage.builder().message(message.getContent()).build());
205+
case SYSTEM -> chatHistory.add(CohereSystemMessage.builder().message(message.getContent()).build());
206+
case TOOL -> {
207+
if (message instanceof ToolResponseMessage tm) {
208+
chatHistory.add(toToolMessage(tm));
209+
}
210+
}
211+
}
212+
}
213+
return chatHistory;
214+
}
215+
216+
private CohereToolMessage toToolMessage(ToolResponseMessage tm) {
217+
List<CohereToolResult> results = tm.getResponses().stream().map(r -> {
218+
CohereToolCall call = CohereToolCall.builder().name(r.name()).build();
219+
return CohereToolResult.builder().call(call).outputs(List.of(r.responseData())).build();
220+
}).toList();
221+
return CohereToolMessage.builder().toolResults(results).build();
222+
}
223+
224+
private ChatRequest newChatRequest(OCICohereChatOptions options, Message message, List<CohereMessage> chatHistory) {
225+
BaseChatRequest baseChatRequest = CohereChatRequest.builder()
226+
.frequencyPenalty(options.getFrequencyPenalty())
227+
.presencePenalty(options.getPresencePenalty())
228+
.maxTokens(options.getMaxTokens())
229+
.topK(options.getTopK())
230+
.topP(options.getTopP())
231+
.temperature(requireNonNullElse(options.getTemperature(), DEFAULT_TEMPERATURE))
232+
.preambleOverride(options.getPreambleOverride())
233+
.stopSequences(options.getStopSequences())
234+
.documents(options.getDocuments())
235+
.tools(options.getTools())
236+
.chatHistory(chatHistory)
237+
.message(message.getContent())
238+
.build();
239+
ServingMode servingMode = ServingModeHelper.get(options.getServingMode(), options.getModel());
240+
ChatDetails chatDetails = ChatDetails.builder()
241+
.compartmentId(options.getCompartment())
242+
.servingMode(servingMode)
243+
.chatRequest(baseChatRequest)
244+
.build();
245+
return ChatRequest.builder().body$(chatDetails).build();
246+
}
247+
248+
}

0 commit comments

Comments
 (0)