From a32b3596f0191de7d0e769b4a734439efa8f08be Mon Sep 17 00:00:00 2001 From: bailey Date: Wed, 19 Nov 2025 11:02:55 -0700 Subject: [PATCH 1/4] rename isSDAMUnrecoverableError to match spec's name --- src/error.ts | 8 +------- src/sdam/server.ts | 12 ++++++++++-- test/unit/error.test.ts | 29 ++++++++--------------------- 3 files changed, 19 insertions(+), 30 deletions(-) diff --git a/src/error.ts b/src/error.ts index 9822361e72..eda39b7dc3 100644 --- a/src/error.ts +++ b/src/error.ts @@ -1528,13 +1528,7 @@ export function isNodeShuttingDownError(err: MongoError): boolean { * * @see https://github.com/mongodb/specifications/blob/master/source/server-discovery-and-monitoring/server-discovery-and-monitoring.md#not-writable-primary-and-node-is-recovering */ -export function isSDAMUnrecoverableError(error: MongoError): boolean { - // NOTE: null check is here for a strictly pre-CMAP world, a timeout or - // close event are considered unrecoverable - if (error instanceof MongoParseError || error == null) { - return true; - } - +export function isStateChangeError(error: MongoError): boolean { return isRecoveringError(error) || isNotWritablePrimaryError(error); } diff --git a/src/sdam/server.ts b/src/sdam/server.ts index f14188a0d5..24a5373a7b 100644 --- a/src/sdam/server.ts +++ b/src/sdam/server.ts @@ -22,12 +22,13 @@ import { import { type AnyError, isNodeShuttingDownError, - isSDAMUnrecoverableError, + isStateChangeError, MONGODB_ERROR_CODES, MongoError, MongoErrorLabel, MongoNetworkError, MongoNetworkTimeoutError, + MongoParseError, MongoRuntimeError, MongoServerClosedError, type MongoServerError, @@ -412,7 +413,14 @@ export class Server extends TypedEventEmitter { this.pool.clear({ serviceId: connection.serviceId }); } } else { - if (isSDAMUnrecoverableError(error)) { + // TODO: considering parse errors as SDAM unrecoverable errors seem + // questionable. What if the parse error only comes from an application connection, + // indicating some bytes were lost in transmission? It seems overkill to completely + // kill the server. + // Parse errors from monitoring connections are already handled because the + // error would be wrapped in a ServerHeartbeatFailedEvent, which would mark the + // server unknown and clear the pool. Can we remove this? + if (isStateChangeError(error) || error instanceof MongoParseError) { if (shouldHandleStateChangeError(this, error)) { const shouldClearPool = isNodeShuttingDownError(error); if (this.loadBalanced && connection && shouldClearPool) { diff --git a/test/unit/error.test.ts b/test/unit/error.test.ts index 32e90a0785..c5124c60f9 100644 --- a/test/unit/error.test.ts +++ b/test/unit/error.test.ts @@ -10,7 +10,7 @@ import * as importsFromErrorSrc from '../../src/error'; import { isResumableError, isRetryableReadError, - isSDAMUnrecoverableError, + isStateChangeError, LEGACY_NOT_PRIMARY_OR_SECONDARY_ERROR_MESSAGE, LEGACY_NOT_WRITABLE_PRIMARY_ERROR_MESSAGE, MONGODB_ERROR_CODES, @@ -211,26 +211,13 @@ describe('MongoErrors', () => { }); }); - describe('#isSDAMUnrecoverableError', function () { - context('when the error is a MongoParseError', function () { - it('returns true', function () { - const error = new MongoParseError(''); - expect(isSDAMUnrecoverableError(error)).to.be.true; - }); - }); - - context('when the error is null', function () { - it('returns true', function () { - expect(isSDAMUnrecoverableError(null)).to.be.true; - }); - }); - + describe('#isStateChangeError', function () { context('when the error has a "node is recovering" error code', function () { it('returns true', function () { const error = new MongoError(''); // Code for NotPrimaryOrSecondary error.code = 13436; - expect(isSDAMUnrecoverableError(error)).to.be.true; + expect(isStateChangeError(error)).to.be.true; }); }); @@ -239,7 +226,7 @@ describe('MongoErrors', () => { const error = new MongoError(''); // Code for NotWritablePrimary error.code = 10107; - expect(isSDAMUnrecoverableError(error)).to.be.true; + expect(isStateChangeError(error)).to.be.true; }); }); @@ -250,7 +237,7 @@ describe('MongoErrors', () => { // If the response includes an error code, it MUST be solely used to determine if error is a "node is recovering" or "not writable primary" error. const error = new MongoError(NODE_IS_RECOVERING_ERROR_MESSAGE.source); error.code = 555; - expect(isSDAMUnrecoverableError(error)).to.be.false; + expect(isStateChangeError(error)).to.be.false; }); } ); @@ -262,7 +249,7 @@ describe('MongoErrors', () => { const error = new MongoError( `this is ${LEGACY_NOT_WRITABLE_PRIMARY_ERROR_MESSAGE.source}.` ); - expect(isSDAMUnrecoverableError(error)).to.be.true; + expect(isStateChangeError(error)).to.be.true; }); } ); @@ -272,7 +259,7 @@ describe('MongoErrors', () => { function () { it('returns true', function () { const error = new MongoError(`the ${NODE_IS_RECOVERING_ERROR_MESSAGE} from an error`); - expect(isSDAMUnrecoverableError(error)).to.be.true; + expect(isStateChangeError(error)).to.be.true; }); } ); @@ -284,7 +271,7 @@ describe('MongoErrors', () => { const error = new MongoError( `this is ${LEGACY_NOT_PRIMARY_OR_SECONDARY_ERROR_MESSAGE}, so we have a problem ` ); - expect(isSDAMUnrecoverableError(error)).to.be.true; + expect(isStateChangeError(error)).to.be.true; }); } ); From 65537a15b66bb42d4a0d1ff7c17bcbe501410aa4 Mon Sep 17 00:00:00 2001 From: bailey Date: Wed, 19 Nov 2025 11:11:17 -0700 Subject: [PATCH 2/4] remove shouldHandleStateChangeError --- src/sdam/server.ts | 42 +++++++++++++++++++++-------------------- test/unit/error.test.ts | 1 - 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/sdam/server.ts b/src/sdam/server.ts index 24a5373a7b..e608054736 100644 --- a/src/sdam/server.ts +++ b/src/sdam/server.ts @@ -392,9 +392,7 @@ export class Server extends TypedEventEmitter { return; } - const isStaleError = - error.connectionGeneration && error.connectionGeneration < this.pool.generation; - if (isStaleError) { + if (isStaleError(this, error)) { return; } @@ -421,19 +419,17 @@ export class Server extends TypedEventEmitter { // error would be wrapped in a ServerHeartbeatFailedEvent, which would mark the // server unknown and clear the pool. Can we remove this? if (isStateChangeError(error) || error instanceof MongoParseError) { - if (shouldHandleStateChangeError(this, error)) { - const shouldClearPool = isNodeShuttingDownError(error); - if (this.loadBalanced && connection && shouldClearPool) { - this.pool.clear({ serviceId: connection.serviceId }); - } + const shouldClearPool = isNodeShuttingDownError(error); + if (this.loadBalanced && connection && shouldClearPool) { + this.pool.clear({ serviceId: connection.serviceId }); + } - if (!this.loadBalanced) { - if (shouldClearPool) { - error.addErrorLabel(MongoErrorLabel.ResetPool); - } - markServerUnknown(this, error); - process.nextTick(() => this.requestCheck()); + if (!this.loadBalanced) { + if (shouldClearPool) { + error.addErrorLabel(MongoErrorLabel.ResetPool); } + markServerUnknown(this, error); + process.nextTick(() => this.requestCheck()); } } } @@ -568,12 +564,6 @@ function connectionIsStale(pool: ConnectionPool, connection: Connection) { return connection.generation !== pool.generation; } -function shouldHandleStateChangeError(server: Server, err: MongoError) { - const etv = err.topologyVersion; - const stv = server.description.topologyVersion; - return compareTopologyVersion(stv, etv) < 0; -} - function inActiveTransaction(session: ClientSession | undefined, cmd: Document) { return session && session.inTransaction() && !isTransactionCommand(cmd); } @@ -583,3 +573,15 @@ function inActiveTransaction(session: ClientSession | undefined, cmd: Document) function isRetryableWritesEnabled(topology: Topology) { return topology.s.options.retryWrites !== false; } + +function isStaleError(server: Server, error: MongoError): boolean { + const currentGeneration = server.pool.generation; + const generation = error.connectionGeneration; + + if (generation && generation < currentGeneration) { + return true; + } + + const currentTopologyVersion = server.description.topologyVersion; + return compareTopologyVersion(currentTopologyVersion, error.topologyVersion) >= 0; +} diff --git a/test/unit/error.test.ts b/test/unit/error.test.ts index c5124c60f9..34428b0666 100644 --- a/test/unit/error.test.ts +++ b/test/unit/error.test.ts @@ -26,7 +26,6 @@ import { MongoNetworkError, MongoNetworkTimeoutError, MongoOperationTimeoutError, - MongoParseError, MongoRuntimeError, MongoServerError, MongoSystemError, From 31cecb8d3ea9013ed166705ddf77695c30bd6bde Mon Sep 17 00:00:00 2001 From: bailey Date: Wed, 19 Nov 2025 13:15:56 -0700 Subject: [PATCH 3/4] swap ordering of logic in server --- src/sdam/server.ts | 59 ++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/src/sdam/server.ts b/src/sdam/server.ts index e608054736..44a2727cc1 100644 --- a/src/sdam/server.ts +++ b/src/sdam/server.ts @@ -401,37 +401,46 @@ export class Server extends TypedEventEmitter { const isNetworkTimeoutBeforeHandshakeError = error instanceof MongoNetworkError && error.beforeHandshake; const isAuthHandshakeError = error.hasErrorLabel(MongoErrorLabel.HandshakeError); - if (isNetworkNonTimeoutError || isNetworkTimeoutBeforeHandshakeError || isAuthHandshakeError) { - // In load balanced mode we never mark the server as unknown and always - // clear for the specific service id. + + // TODO: considering parse errors as SDAM unrecoverable errors seem + // questionable. What if the parse error only comes from an application connection, + // indicating some bytes were lost in transmission? It seems overkill to completely + // kill the server. + // Parse errors from monitoring connections are already handled because the + // error would be wrapped in a ServerHeartbeatFailedEvent, which would mark the + // server unknown and clear the pool. Can we remove this? + if (isStateChangeError(error) || error instanceof MongoParseError) { + const shouldClearPool = isNodeShuttingDownError(error); + + // from the SDAM spec: The driver MUST synchronize clearing the pool with updating the topology. + // In load balanced mode: there is no monitoring, so there is no topology to update. We simply clear the pool. + // For other topologies: the `ResetPool` label instructs the topology to clear the server's pool in `updateServer()`. + if (!this.loadBalanced) { + if (shouldClearPool) { + error.addErrorLabel(MongoErrorLabel.ResetPool); + } + markServerUnknown(this, error); + process.nextTick(() => this.requestCheck()); + return; + } + + if (connection && shouldClearPool) { + this.pool.clear({ serviceId: connection.serviceId }); + } + } else if ( + isNetworkNonTimeoutError || + isNetworkTimeoutBeforeHandshakeError || + isAuthHandshakeError + ) { + // from the SDAM spec: The driver MUST synchronize clearing the pool with updating the topology. + // In load balanced mode: there is no monitoring, so there is no topology to update. We simply clear the pool. + // For other topologies: the `ResetPool` label instructs the topology to clear the server's pool in `updateServer()`. if (!this.loadBalanced) { error.addErrorLabel(MongoErrorLabel.ResetPool); markServerUnknown(this, error); } else if (connection) { this.pool.clear({ serviceId: connection.serviceId }); } - } else { - // TODO: considering parse errors as SDAM unrecoverable errors seem - // questionable. What if the parse error only comes from an application connection, - // indicating some bytes were lost in transmission? It seems overkill to completely - // kill the server. - // Parse errors from monitoring connections are already handled because the - // error would be wrapped in a ServerHeartbeatFailedEvent, which would mark the - // server unknown and clear the pool. Can we remove this? - if (isStateChangeError(error) || error instanceof MongoParseError) { - const shouldClearPool = isNodeShuttingDownError(error); - if (this.loadBalanced && connection && shouldClearPool) { - this.pool.clear({ serviceId: connection.serviceId }); - } - - if (!this.loadBalanced) { - if (shouldClearPool) { - error.addErrorLabel(MongoErrorLabel.ResetPool); - } - markServerUnknown(this, error); - process.nextTick(() => this.requestCheck()); - } - } } } From 0b6dce7d5e41225f97ae85f9c19699a83a605a31 Mon Sep 17 00:00:00 2001 From: bailey Date: Fri, 21 Nov 2025 14:41:45 -0700 Subject: [PATCH 4/4] clarifying comment --- src/sdam/server.ts | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/sdam/server.ts b/src/sdam/server.ts index 44a2727cc1..028dad80ff 100644 --- a/src/sdam/server.ts +++ b/src/sdam/server.ts @@ -402,13 +402,7 @@ export class Server extends TypedEventEmitter { error instanceof MongoNetworkError && error.beforeHandshake; const isAuthHandshakeError = error.hasErrorLabel(MongoErrorLabel.HandshakeError); - // TODO: considering parse errors as SDAM unrecoverable errors seem - // questionable. What if the parse error only comes from an application connection, - // indicating some bytes were lost in transmission? It seems overkill to completely - // kill the server. - // Parse errors from monitoring connections are already handled because the - // error would be wrapped in a ServerHeartbeatFailedEvent, which would mark the - // server unknown and clear the pool. Can we remove this? + // Perhaps questionable and divergent from the spec, but considering MongoParseErrors like state change errors was legacy behavior. if (isStateChangeError(error) || error instanceof MongoParseError) { const shouldClearPool = isNodeShuttingDownError(error);