Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions src/common/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ export function warnAboutDeprecatedOrUnknownCliArgs(
if (knownArgs.connectionString) {
usedDeprecatedArgument = true;
warn(
"The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string."
"Warning: The --connectionString argument is deprecated. Prefer using the MDB_MCP_CONNECTION_STRING environment variable or the first positional argument for the connection string."
);
}

Expand All @@ -333,15 +333,15 @@ export function warnAboutDeprecatedOrUnknownCliArgs(
if (!valid) {
usedInvalidArgument = true;
if (suggestion) {
warn(`Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`);
warn(`Warning: Invalid command line argument '${providedKey}'. Did you mean '${suggestion}'?`);
} else {
warn(`Invalid command line argument '${providedKey}'.`);
warn(`Warning: Invalid command line argument '${providedKey}'.`);
}
}
}

if (usedInvalidArgument || usedDeprecatedArgument) {
warn("Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server.");
warn("- Refer to https://www.mongodb.com/docs/mcp-server/get-started/ for setting up the MCP Server.");
}

if (usedInvalidArgument) {
Expand Down Expand Up @@ -372,6 +372,24 @@ export function registerKnownSecretsInRootKeychain(userConfig: Partial<UserConfi
maybeRegister(userConfig.username, "user");
}

export function warnIfVectorSearchNotEnabledCorrectly(config: UserConfig, warn: (message: string) => void): void {
const vectorSearchEnabled = config.previewFeatures.includes("vectorSearch");
const embeddingsProviderConfigured = !!config.voyageApiKey;
if (vectorSearchEnabled && !embeddingsProviderConfigured) {
warn(`\
Warning: Vector search is enabled but no embeddings provider is configured.
- Set an embeddings provider configuration option to enable auto-embeddings during document insertion and text-based queries with $vectorSearch.\
`);
}

if (!vectorSearchEnabled && embeddingsProviderConfigured) {
warn(`\
Warning: An embeddings provider is configured but the 'vectorSearch' preview feature is not enabled.
- Enable vector search by adding 'vectorSearch' to the 'previewFeatures' configuration option, or remove the embeddings provider configuration if not needed.\
`);
}
}

export function setupUserConfig({ cli, env }: { cli: string[]; env: Record<string, unknown> }): UserConfig {
const rawConfig = {
...parseEnvConfig(env),
Expand All @@ -392,6 +410,7 @@ export function setupUserConfig({ cli, env }: { cli: string[]; env: Record<strin
// We don't have as schema defined for all args-parser arguments so we need to merge the raw config with the parsed config.
const userConfig = { ...rawConfig, ...parseResult.data } as UserConfig;

warnIfVectorSearchNotEnabledCorrectly(userConfig, (message) => console.warn(message));
registerKnownSecretsInRootKeychain(userConfig);
return userConfig;
}
Expand Down
117 changes: 45 additions & 72 deletions src/common/search/vectorSearchEmbeddingsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ export type VectorFieldIndexDefinition = {
export type VectorFieldValidationError = {
path: string;
expectedNumDimensions: number;
expectedQuantization: Quantization;
actualNumDimensions: number | "unknown";
actualQuantization: Quantization | "unknown";
error: "dimension-mismatch" | "quantization-mismatch" | "not-a-vector" | "not-numeric";
error: "dimension-mismatch" | "not-a-vector" | "not-numeric";
};

export type EmbeddingNamespace = `${string}.${string}`;
Expand Down Expand Up @@ -116,9 +114,9 @@ export class VectorSearchEmbeddingsManager {
if (embeddingValidationResults.length > 0) {
const embeddingValidationMessages = embeddingValidationResults.map(
(validation) =>
`- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions and ${validation.expectedQuantization}` +
` quantization, and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions}, ` +
`actual quantization: ${validation.actualQuantization}. Error: ${validation.error}`
`- Field ${validation.path} is an embedding with ${validation.expectedNumDimensions} dimensions,` +
` and the provided value is not compatible. Actual dimensions: ${validation.actualNumDimensions},` +
` Error: ${validation.error}`
);

throw new MongoDBError(
Expand Down Expand Up @@ -179,16 +177,36 @@ export class VectorSearchEmbeddingsManager {
let fieldRef: unknown = document;

const constructError = (
details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions" | "actualQuantization">>
details: Partial<Pick<VectorFieldValidationError, "error" | "actualNumDimensions">>
): VectorFieldValidationError => ({
path: definition.path,
expectedNumDimensions: definition.numDimensions,
expectedQuantization: definition.quantization,
actualNumDimensions: details.actualNumDimensions ?? "unknown",
actualQuantization: details.actualQuantization ?? "unknown",
error: details.error ?? "not-a-vector",
});

const extractUnderlyingVector = (fieldRef: unknown): ArrayLike<unknown> | undefined => {
if (fieldRef instanceof BSON.Binary) {
try {
return fieldRef.toFloat32Array();
} catch {
// nothing to do here
}

try {
return fieldRef.toBits();
} catch {
// nothing to do here
}
}

if (Array.isArray(fieldRef)) {
return fieldRef as Array<unknown>;
}

return undefined;
};

for (const field of fieldPath) {
if (fieldRef && typeof fieldRef === "object" && field in fieldRef) {
fieldRef = (fieldRef as Record<string, unknown>)[field];
Expand All @@ -197,70 +215,25 @@ export class VectorSearchEmbeddingsManager {
}
}

switch (definition.quantization) {
// Because quantization is not defined by the user
// we have to trust them in the format they use.
case "none":
return undefined;
case "scalar":
case "binary":
if (fieldRef instanceof BSON.Binary) {
try {
const elements = fieldRef.toFloat32Array();
if (elements.length !== definition.numDimensions) {
return constructError({
actualNumDimensions: elements.length,
actualQuantization: "binary",
error: "dimension-mismatch",
});
}

return undefined;
} catch {
// bits are also supported
try {
const bits = fieldRef.toBits();
if (bits.length !== definition.numDimensions) {
return constructError({
actualNumDimensions: bits.length,
actualQuantization: "binary",
error: "dimension-mismatch",
});
}

return undefined;
} catch {
return constructError({
actualQuantization: "binary",
error: "not-a-vector",
});
}
}
} else {
if (!Array.isArray(fieldRef)) {
return constructError({
error: "not-a-vector",
});
}

if (fieldRef.length !== definition.numDimensions) {
return constructError({
actualNumDimensions: fieldRef.length,
actualQuantization: "scalar",
error: "dimension-mismatch",
});
}

if (!fieldRef.every((e) => this.isANumber(e))) {
return constructError({
actualNumDimensions: fieldRef.length,
actualQuantization: "scalar",
error: "not-numeric",
});
}
}
const maybeVector = extractUnderlyingVector(fieldRef);
if (!maybeVector) {
return constructError({
error: "not-a-vector",
});
}

break;
if (maybeVector.length !== definition.numDimensions) {
return constructError({
actualNumDimensions: maybeVector.length,
error: "dimension-mismatch",
});
}

if (Array.isArray(maybeVector) && maybeVector.some((e) => !this.isANumber(e))) {
return constructError({
actualNumDimensions: maybeVector.length,
error: "not-numeric",
});
}

return undefined;
Expand Down
2 changes: 1 addition & 1 deletion src/tools/mongodb/create/createIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ export class CreateIndexTool extends MongoDBToolBase {
])
)
.describe(
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes"
`The index definition. Use 'classic' for standard indexes${this.isFeatureEnabled("vectorSearch") ? " and 'vectorSearch' for vector search indexes" : ""}.`
),
};

Expand Down
4 changes: 2 additions & 2 deletions src/tools/mongodb/metadata/explain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import type { ToolArgs, OperationType } from "../../tool.js";
import { formatUntrustedData } from "../../tool.js";
import { z } from "zod";
import type { Document } from "mongodb";
import { AggregateArgs } from "../read/aggregate.js";
import { getAggregateArgs } from "../read/aggregate.js";
import { FindArgs } from "../read/find.js";
import { CountArgs } from "../read/count.js";

Expand All @@ -20,7 +20,7 @@ export class ExplainTool extends MongoDBToolBase {
z.discriminatedUnion("name", [
z.object({
name: z.literal("aggregate"),
arguments: z.object(AggregateArgs),
arguments: z.object(getAggregateArgs(this.isFeatureEnabled("vectorSearch"))),
}),
z.object({
name: z.literal("find"),
Expand Down
2 changes: 1 addition & 1 deletion src/tools/mongodb/mongodbSchemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ export type EmbeddingParameters = {
export const zSupportedEmbeddingParameters = zVoyageEmbeddingParameters.extend({ model: zVoyageModels });
export type SupportedEmbeddingParameters = z.infer<typeof zSupportedEmbeddingParameters>;

export const AnyVectorSearchStage = zEJSON();
export const AnyAggregateStage = zEJSON();
export const VectorSearchStage = z.object({
$vectorSearch: z
.object({
Expand Down
27 changes: 17 additions & 10 deletions src/tools/mongodb/read/aggregate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,17 @@ import { collectCursorUntilMaxBytesLimit } from "../../../helpers/collectCursorU
import { operationWithFallback } from "../../../helpers/operationWithFallback.js";
import { AGG_COUNT_MAX_TIME_MS_CAP, ONE_MB, CURSOR_LIMITS_TO_LLM_TEXT } from "../../../helpers/constants.js";
import { LogId } from "../../../common/logger.js";
import { AnyVectorSearchStage, VectorSearchStage } from "../mongodbSchemas.js";
import { AnyAggregateStage, VectorSearchStage } from "../mongodbSchemas.js";
import {
assertVectorSearchFilterFieldsAreIndexed,
type VectorSearchIndex,
} from "../../../helpers/assertVectorSearchFilterFieldsAreIndexed.js";

export const AggregateArgs = {
pipeline: z.array(z.union([AnyVectorSearchStage, VectorSearchStage])).describe(
`An array of aggregation stages to execute.
const pipelineDescriptionWithVectorSearch = `\
An array of aggregation stages to execute.
\`$vectorSearch\` **MUST** be the first stage of the pipeline, or the first stage of a \`$unionWith\` subpipeline.
### Usage Rules for \`$vectorSearch\`
- **Unset embeddings:**
- **Unset embeddings:**
Unless the user explicitly requests the embeddings, add an \`$unset\` stage **at the end of the pipeline** to remove the embedding field and avoid context limits. **The $unset stage in this situation is mandatory**.
- **Pre-filtering:**
If the user requests additional filtering, include filters in \`$vectorSearch.filter\` only for pre-filter fields in the vector index.
Expand All @@ -32,20 +31,28 @@ If the user requests additional filtering, include filters in \`$vectorSearch.fi
For all remaining filters, add a $match stage after $vectorSearch.
### Note to LLM
- If unsure which fields are filterable, use the collection-indexes tool to determine valid prefilter fields.
- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.`
),
responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
- If no requested filters are valid prefilters, omit the filter key from $vectorSearch.\
`;

const genericPipelineDescription = "An array of aggregation stages to execute.";

export const getAggregateArgs = (vectorSearchEnabled: boolean) =>
({
pipeline: z
.array(vectorSearchEnabled ? z.union([AnyAggregateStage, VectorSearchStage]) : AnyAggregateStage)
.describe(vectorSearchEnabled ? pipelineDescriptionWithVectorSearch : genericPipelineDescription),
responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \
Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\
`),
};
}) as const;

export class AggregateTool extends MongoDBToolBase {
public name = "aggregate";
protected description = "Run an aggregation against a MongoDB collection";
protected argsShape = {
...DbOperationArgs,
...AggregateArgs,
...getAggregateArgs(this.isFeatureEnabled("vectorSearch")),
};
public operationType: OperationType = "read";

Expand Down
6 changes: 4 additions & 2 deletions src/tools/mongodb/read/export.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import type { OperationType, ToolArgs } from "../../tool.js";
import { DbOperationArgs, MongoDBToolBase } from "../mongodbTool.js";
import { FindArgs } from "./find.js";
import { jsonExportFormat } from "../../../common/exportsManager.js";
import { AggregateArgs } from "./aggregate.js";
import { getAggregateArgs } from "./aggregate.js";

export class ExportTool extends MongoDBToolBase {
public name = "export";
Expand All @@ -32,7 +32,9 @@ export class ExportTool extends MongoDBToolBase {
name: z
.literal("aggregate")
.describe("The literal name 'aggregate' to represent an aggregation cursor as target."),
arguments: z.object(AggregateArgs).describe("The arguments for 'aggregate' operation."),
arguments: z
.object(getAggregateArgs(this.isFeatureEnabled("vectorSearch")))
.describe("The arguments for 'aggregate' operation."),
}),
])
)
Expand Down
18 changes: 17 additions & 1 deletion tests/integration/tools/mongodb/create/createIndex.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@ import { ObjectId, type Collection, type Document, type IndexDirection } from "m
import { afterEach, beforeEach, describe, expect, it } from "vitest";

describeWithMongoDB("createIndex tool when search is not enabled", (integration) => {
validateToolMetadata(integration, "create-index", "Create an index for a collection", [
...databaseCollectionParameters,
{
name: "definition",
type: "array",
description: "The index definition. Use 'classic' for standard indexes.",
required: true,
},
{
name: "name",
type: "string",
description: "The name of the index",
required: false,
},
]);

it("doesn't allow creating vector search indexes", async () => {
expect(integration.mcpServer().userConfig.previewFeatures).to.not.include("vectorSearch");

Expand Down Expand Up @@ -99,7 +115,7 @@ describeWithMongoDB(
name: "definition",
type: "array",
description:
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes",
"The index definition. Use 'classic' for standard indexes and 'vectorSearch' for vector search indexes.",
required: true,
},
{
Expand Down
Loading
Loading