Skip to content

Commit 1d4c795

Browse files
authored
Introducing a new predefined completion endpoint (.gp-llm-v2-completion) (#138420)
1 parent b382e2c commit 1d4c795

File tree

3 files changed

+36
-5
lines changed

3 files changed

+36
-5
lines changed

x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,23 @@ public static void init() {
4141
public void testGetDefaultEndpoints() throws IOException {
4242
var allModels = getAllModels();
4343
var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION);
44+
var completionModels = getModels("_all", TaskType.COMPLETION);
4445

45-
assertThat(allModels, hasSize(8));
46+
assertThat(allModels, hasSize(9));
4647
assertThat(chatCompletionModels, hasSize(2));
48+
assertThat(completionModels, hasSize(1));
4749

4850
for (var model : chatCompletionModels) {
4951
assertEquals("chat_completion", model.get("task_type"));
5052
}
5153

54+
for (var model : completionModels) {
55+
assertEquals("completion", model.get("task_type"));
56+
}
57+
5258
assertInferenceIdTaskType(allModels, ".rainbow-sprinkles-elastic", TaskType.CHAT_COMPLETION);
5359
assertInferenceIdTaskType(allModels, ".gp-llm-v2-chat_completion", TaskType.CHAT_COMPLETION);
60+
assertInferenceIdTaskType(allModels, ".gp-llm-v2-completion", TaskType.COMPLETION);
5461
assertInferenceIdTaskType(allModels, ".elser-2-elastic", TaskType.SPARSE_EMBEDDING);
5562
assertInferenceIdTaskType(allModels, ".jina-embeddings-v3", TaskType.TEXT_EMBEDDING);
5663
assertInferenceIdTaskType(allModels, ".jina-reranker-v2", TaskType.RERANK);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public class InternalPreconfiguredEndpoints {
3636
// gp-llm-v2
3737
public static final String GP_LLM_V2_MODEL_ID = "gp-llm-v2";
3838
public static final String GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-chat_completion";
39+
public static final String GP_LLM_V2_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-completion";
3940

4041
// elser-2
4142
public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2";
@@ -80,8 +81,7 @@ public record MinimalModel(
8081
DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1,
8182
TaskType.CHAT_COMPLETION,
8283
ElasticInferenceService.NAME,
83-
COMPLETION_SERVICE_SETTINGS,
84-
ChunkingSettingsBuilder.DEFAULT_SETTINGS
84+
COMPLETION_SERVICE_SETTINGS
8585
),
8686
COMPLETION_SERVICE_SETTINGS
8787
)
@@ -93,8 +93,16 @@ public record MinimalModel(
9393
GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID,
9494
TaskType.CHAT_COMPLETION,
9595
ElasticInferenceService.NAME,
96-
GP_LLM_V2_COMPLETION_SERVICE_SETTINGS,
97-
ChunkingSettingsBuilder.DEFAULT_SETTINGS
96+
GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
97+
),
98+
GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
99+
),
100+
new MinimalModel(
101+
new ModelConfigurations(
102+
GP_LLM_V2_COMPLETION_ENDPOINT_ID,
103+
TaskType.COMPLETION,
104+
ElasticInferenceService.NAME,
105+
GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
98106
),
99107
GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
100108
)

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
package org.elasticsearch.xpack.inference.services.elastic;
99

10+
import org.elasticsearch.inference.TaskType;
1011
import org.elasticsearch.test.ESTestCase;
1112

1213
import static org.hamcrest.Matchers.hasSize;
14+
import static org.hamcrest.Matchers.is;
1315

1416
public class InternalPreconfiguredEndpointsTests extends ESTestCase {
1517
public void testGetWithModelName_ReturnsAnEmptyList_IfNameDoesNotExist() {
@@ -20,4 +22,18 @@ public void testGetWithModelName_ReturnsChatCompletionModels() {
2022
var models = InternalPreconfiguredEndpoints.getWithModelName(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V1);
2123
assertThat(models, hasSize(1));
2224
}
25+
26+
public void testGetWithModelName_ReturnsGpLlmV2Models() {
27+
var models = InternalPreconfiguredEndpoints.getWithModelName(InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID);
28+
assertThat(models, hasSize(2));
29+
var taskTypes = models.stream().map(m -> m.configurations().getTaskType()).toList();
30+
assertTrue("Should contain CHAT_COMPLETION", taskTypes.contains(TaskType.CHAT_COMPLETION));
31+
assertTrue("Should contain COMPLETION", taskTypes.contains(TaskType.COMPLETION));
32+
}
33+
34+
public void testGetWithInferenceId_ReturnsGpLlmV2CompletionEndpoint() {
35+
var model = InternalPreconfiguredEndpoints.getWithInferenceId(InternalPreconfiguredEndpoints.GP_LLM_V2_COMPLETION_ENDPOINT_ID);
36+
assertThat(model.configurations().getInferenceEntityId(), is(InternalPreconfiguredEndpoints.GP_LLM_V2_COMPLETION_ENDPOINT_ID));
37+
assertThat(model.configurations().getTaskType(), is(TaskType.COMPLETION));
38+
}
2339
}

0 commit comments

Comments
 (0)