diff --git a/libs/lib-services/src/logger/logger-index.ts b/libs/lib-services/src/logger/logger-index.ts index af7e3686f..74ed83b5f 100644 --- a/libs/lib-services/src/logger/logger-index.ts +++ b/libs/lib-services/src/logger/logger-index.ts @@ -1,2 +1,3 @@ export * from './Logger.js'; export { Logger } from 'winston'; +export { createLogger, format, transports } from 'winston'; diff --git a/libs/lib-services/src/migrations/AbstractMigrationAgent.ts b/libs/lib-services/src/migrations/AbstractMigrationAgent.ts index fa063d360..551296614 100644 --- a/libs/lib-services/src/migrations/AbstractMigrationAgent.ts +++ b/libs/lib-services/src/migrations/AbstractMigrationAgent.ts @@ -1,11 +1,12 @@ import { LockManager } from '../locks/LockManager.js'; -import { logger } from '../logger/Logger.js'; +import { logger as defaultLogger, Logger } from '../logger/logger-index.js'; import * as defs from './migration-definitions.js'; export type MigrationParams = { count?: number; direction: defs.Direction; migrationContext?: Generics['MIGRATION_CONTEXT']; + logger?: Logger; }; type WriteLogsParams = { @@ -20,10 +21,12 @@ export type MigrationAgentGenerics = { export type RunMigrationParams = MigrationParams & { migrations: defs.Migration[]; maxLockWaitMs?: number; + logger?: Logger; }; type ExecuteParams = RunMigrationParams & { state?: defs.MigrationState; + logger: Logger; }; export const DEFAULT_MAX_LOCK_WAIT_MS = 3 * 60 * 1000; // 3 minutes @@ -46,9 +49,11 @@ export abstract class AbstractMigrationAgent { + const logger = params.logger; const internalMigrations = await this.loadInternalMigrations(); let migrations = [...internalMigrations, ...params.migrations]; diff --git a/modules/module-mongodb-storage/src/migrations/db/migrations/1764667093139-current-data-cleanup.ts b/modules/module-mongodb-storage/src/migrations/db/migrations/1764667093139-current-data-cleanup.ts new file mode 100644 index 000000000..68a399172 --- /dev/null +++ b/modules/module-mongodb-storage/src/migrations/db/migrations/1764667093139-current-data-cleanup.ts @@ -0,0 +1,43 @@ +import { migrations } from '@powersync/service-core'; +import * as storage from '../../../storage/storage-index.js'; +import { MongoStorageConfig } from '../../../types/types.js'; + +const INDEX_NAME = 'pending_delete'; + +export const up: migrations.PowerSyncMigrationFunction = async (context) => { + const { + service_context: { configuration } + } = context; + const db = storage.createPowerSyncMongo(configuration.storage as MongoStorageConfig); + + try { + await db.current_data.createIndex( + { + '_id.g': 1, + pending_delete: 1 + }, + { + partialFilterExpression: { pending_delete: { $exists: true } }, + name: INDEX_NAME + } + ); + } finally { + await db.client.close(); + } +}; + +export const down: migrations.PowerSyncMigrationFunction = async (context) => { + const { + service_context: { configuration } + } = context; + + const db = storage.createPowerSyncMongo(configuration.storage as MongoStorageConfig); + + try { + if (await db.current_data.indexExists(INDEX_NAME)) { + await db.current_data.dropIndex(INDEX_NAME); + } + } finally { + await db.client.close(); + } +}; diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts index 5d5766a1e..c8f6f96d9 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts @@ -19,6 +19,7 @@ import { isCompleteRow, SaveOperationTag, storage, + SyncRuleState, utils } from '@powersync/service-core'; import * as timers from 'node:timers/promises'; @@ -28,7 +29,7 @@ import { MongoIdSequence } from './MongoIdSequence.js'; import { batchCreateCustomWriteCheckpoints } from './MongoWriteCheckpointAPI.js'; import { cacheKey, OperationBatch, RecordOperation } from './OperationBatch.js'; import { PersistedBatch } from './PersistedBatch.js'; -import { idPrefixFilter } from '../../utils/util.js'; +import { idPrefixFilter, mongoTableId } from '../../utils/util.js'; /** * 15MB @@ -42,6 +43,8 @@ export const MAX_ROW_SIZE = 15 * 1024 * 1024; // In the future, we can investigate allowing multiple replication streams operating independently. const replicationMutex = new utils.Mutex(); +export const EMPTY_DATA = new bson.Binary(bson.serialize({})); + export interface MongoBucketBatchOptions { db: PowerSyncMongo; syncRules: SqlSyncRules; @@ -49,7 +52,6 @@ export interface MongoBucketBatchOptions { slotName: string; lastCheckpointLsn: string | null; keepaliveOp: InternalOpId | null; - noCheckpointBeforeLsn: string; resumeFromLsn: string | null; storeCurrentData: boolean; /** @@ -93,8 +95,6 @@ export class MongoBucketBatch */ private last_checkpoint_lsn: string | null = null; - private no_checkpoint_before_lsn: string; - private persisted_op: InternalOpId | null = null; /** @@ -123,7 +123,6 @@ export class MongoBucketBatch this.db = options.db; this.group_id = options.groupId; this.last_checkpoint_lsn = options.lastCheckpointLsn; - this.no_checkpoint_before_lsn = options.noCheckpointBeforeLsn; this.resumeFromLsn = options.resumeFromLsn; this.session = this.client.startSession(); this.slot_name = options.slotName; @@ -213,7 +212,7 @@ export class MongoBucketBatch // the order of processing, which then becomes really tricky to manage. // This now takes 2+ queries, but doesn't have any issues with order of operations. const sizeLookups: SourceKey[] = batch.batch.map((r) => { - return { g: this.group_id, t: r.record.sourceTable.id, k: r.beforeId }; + return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; }); sizes = new Map(); @@ -256,7 +255,7 @@ export class MongoBucketBatch continue; } const lookups: SourceKey[] = b.map((r) => { - return { g: this.group_id, t: r.record.sourceTable.id, k: r.beforeId }; + return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId }; }); let current_data_lookup = new Map(); // With skipExistingRows, we only need to know whether or not the row exists. @@ -336,7 +335,7 @@ export class MongoBucketBatch let existing_lookups: bson.Binary[] = []; let new_lookups: bson.Binary[] = []; - const before_key: SourceKey = { g: this.group_id, t: record.sourceTable.id, k: beforeId }; + const before_key: SourceKey = { g: this.group_id, t: mongoTableId(record.sourceTable.id), k: beforeId }; if (this.skipExistingRows) { if (record.tag == SaveOperationTag.INSERT) { @@ -399,7 +398,7 @@ export class MongoBucketBatch let afterData: bson.Binary | undefined; if (afterId != null && !this.storeCurrentData) { - afterData = new bson.Binary(bson.serialize({})); + afterData = EMPTY_DATA; } else if (afterId != null) { try { // This will fail immediately if the record is > 16MB. @@ -548,7 +547,7 @@ export class MongoBucketBatch // 5. TOAST: Update current data and bucket list. if (afterId) { // Insert or update - const after_key: SourceKey = { g: this.group_id, t: sourceTable.id, k: afterId }; + const after_key: SourceKey = { g: this.group_id, t: sourceTable.id as bson.ObjectId, k: afterId }; batch.upsertCurrentData(after_key, { data: afterData, buckets: new_buckets, @@ -564,7 +563,10 @@ export class MongoBucketBatch if (afterId == null || !storage.replicaIdEquals(beforeId, afterId)) { // Either a delete (afterId == null), or replaced the old replication id - batch.deleteCurrentData(before_key); + // Note that this is a soft delete. + // We don't specifically need a new or unique op_id here, but it must be greater than the + // last checkpoint, so we use next(). + batch.softDeleteCurrentData(before_key, opSeq.next()); } return result; } @@ -672,64 +674,7 @@ export class MongoBucketBatch await this.flush(options); - if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) { - // When re-applying transactions, don't create a new checkpoint until - // we are past the last transaction. - this.logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`); - // Cannot create a checkpoint yet - return false - return false; - } - if (lsn < this.no_checkpoint_before_lsn) { - if (Date.now() - this.lastWaitingLogThottled > 5_000) { - this.logger.info( - `Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}` - ); - this.lastWaitingLogThottled = Date.now(); - } - - // Edge case: During initial replication, we have a no_checkpoint_before_lsn set, - // and don't actually commit the snapshot. - // The first commit can happen from an implicit keepalive message. - // That needs the persisted_op to get an accurate checkpoint, so - // we persist that in keepalive_op. - - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, - { - $set: { - keepalive_op: this.persisted_op == null ? null : String(this.persisted_op) - } - }, - { session: this.session } - ); - await this.db.notifyCheckpoint(); - - // Cannot create a checkpoint yet - return false - return false; - } - - if (!createEmptyCheckpoints && this.persisted_op == null) { - // Nothing to commit - also return true - await this.autoActivate(lsn); - return true; - } - const now = new Date(); - const update: Partial = { - last_checkpoint_lsn: lsn, - last_checkpoint_ts: now, - last_keepalive_ts: now, - snapshot_done: true, - last_fatal_error: null, - last_fatal_error_ts: null, - keepalive_op: null - }; - - if (this.persisted_op != null) { - update.last_checkpoint = this.persisted_op; - } // Mark relevant write checkpoints as "processed". // This makes it easier to identify write checkpoints that are "valid" in order. @@ -748,23 +693,157 @@ export class MongoBucketBatch } ); - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, + const can_checkpoint = { + $and: [ + { $eq: ['$snapshot_done', true] }, + { + $or: [{ $eq: ['$last_checkpoint_lsn', null] }, { $lte: ['$last_checkpoint_lsn', { $literal: lsn }] }] + }, + { + $or: [{ $eq: ['$no_checkpoint_before', null] }, { $lte: ['$no_checkpoint_before', { $literal: lsn }] }] + } + ] + }; + + const new_keepalive_op = { + $cond: [ + can_checkpoint, + { $literal: null }, + { + $toString: { + $max: [{ $toLong: '$keepalive_op' }, { $literal: this.persisted_op }] + } + } + ] + }; + + const new_last_checkpoint = { + $cond: [ + can_checkpoint, + { + $max: ['$last_checkpoint', { $literal: this.persisted_op }, { $toLong: '$keepalive_op' }] + }, + '$last_checkpoint' + ] + }; + + let filter: mongo.Filter = { _id: this.group_id }; + if (!createEmptyCheckpoints) { + // Only create checkpoint if we have new data + filter = { + _id: this.group_id, + $expr: { + $or: [{ $ne: ['$keepalive_op', new_keepalive_op] }, { $ne: ['$last_checkpoint', new_last_checkpoint] }] + } + }; + } + + let updateResult = await this.db.sync_rules.findOneAndUpdate( + filter, + [ + { + $set: { + _can_checkpoint: can_checkpoint + } + }, + { + $set: { + last_checkpoint_lsn: { + $cond: ['$_can_checkpoint', { $literal: lsn }, '$last_checkpoint_lsn'] + }, + last_checkpoint_ts: { + $cond: ['$_can_checkpoint', { $literal: now }, '$last_checkpoint_ts'] + }, + last_keepalive_ts: { $literal: now }, + last_fatal_error: { $literal: null }, + last_fatal_error_ts: { $literal: null }, + keepalive_op: new_keepalive_op, + last_checkpoint: new_last_checkpoint, + // Unset snapshot_lsn on checkpoint + snapshot_lsn: { + $cond: ['$_can_checkpoint', { $literal: null }, '$snapshot_lsn'] + } + } + }, + { + $unset: '_can_checkpoint' + } + ], { - $set: update, - $unset: { snapshot_lsn: 1 } - }, - { session: this.session } + session: this.session, + returnDocument: 'after', + projection: { + snapshot_done: 1, + last_checkpoint_lsn: 1, + no_checkpoint_before: 1, + keepalive_op: 1, + last_checkpoint: 1 + } + } ); - await this.autoActivate(lsn); - await this.db.notifyCheckpoint(); - this.persisted_op = null; - this.last_checkpoint_lsn = lsn; + if (updateResult == null) { + const existing = await this.db.sync_rules.findOne( + { _id: this.group_id }, + { + session: this.session, + projection: { + snapshot_done: 1, + last_checkpoint_lsn: 1, + no_checkpoint_before: 1, + keepalive_op: 1, + last_checkpoint: 1 + } + } + ); + if (existing == null) { + throw new ReplicationAssertionError('Failed to load sync_rules document during checkpoint update'); + } + // No-op update - reuse existing document for downstream logic. + // This can happen when last_checkpoint and keepalive_op would remain unchanged. + updateResult = existing; + } + const checkpointCreated = + updateResult.snapshot_done === true && + updateResult.last_checkpoint_lsn === lsn && + updateResult.last_checkpoint != null; + + if (!checkpointCreated) { + // Failed on snapshot_done or no_checkpoint_before. + if (Date.now() - this.lastWaitingLogThottled > 5_000) { + this.logger.info( + `Waiting before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}. Current state: ${JSON.stringify( + { + snapshot_done: updateResult.snapshot_done, + last_checkpoint_lsn: updateResult.last_checkpoint_lsn, + no_checkpoint_before: updateResult.no_checkpoint_before + } + )}` + ); + this.lastWaitingLogThottled = Date.now(); + } + } else { + this.logger.info(`Created checkpoint at ${lsn}/${updateResult.last_checkpoint}`); + await this.autoActivate(lsn); + await this.db.notifyCheckpoint(); + this.persisted_op = null; + this.last_checkpoint_lsn = lsn; + await this.cleanupCurrentData(updateResult.last_checkpoint!); + } return true; } + private async cleanupCurrentData(lastCheckpoint: bigint) { + const result = await this.db.current_data.deleteMany({ + '_id.g': this.group_id, + pending_delete: { $exists: true, $lte: lastCheckpoint } + }); + if (result.deletedCount > 0) { + this.logger.info( + `Cleaned up ${result.deletedCount} pending delete current_data records for checkpoint ${lastCheckpoint}` + ); + } + } + /** * Switch from processing -> active if relevant. * @@ -782,7 +861,7 @@ export class MongoBucketBatch let activated = false; await session.withTransaction(async () => { const doc = await this.db.sync_rules.findOne({ _id: this.group_id }, { session }); - if (doc && doc.state == 'PROCESSING') { + if (doc && doc.state == SyncRuleState.PROCESSING && doc.snapshot_done && doc.last_checkpoint != null) { await this.db.sync_rules.updateOne( { _id: this.group_id @@ -808,68 +887,19 @@ export class MongoBucketBatch { session } ); activated = true; + } else if (doc?.state != SyncRuleState.PROCESSING) { + this.needsActivation = false; } }); if (activated) { this.logger.info(`Activated new sync rules at ${lsn}`); await this.db.notifyCheckpoint(); + this.needsActivation = false; } - this.needsActivation = false; } async keepalive(lsn: string): Promise { - if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) { - // No-op - return false; - } - - if (lsn < this.no_checkpoint_before_lsn) { - return false; - } - - if (this.persisted_op != null) { - // The commit may have been skipped due to "no_checkpoint_before_lsn". - // Apply it now if relevant - this.logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`); - return await this.commit(lsn); - } - - await this.db.write_checkpoints.updateMany( - { - processed_at_lsn: null, - 'lsns.1': { $lte: lsn } - }, - { - $set: { - processed_at_lsn: lsn - } - }, - { - session: this.session - } - ); - - await this.db.sync_rules.updateOne( - { - _id: this.group_id - }, - { - $set: { - last_checkpoint_lsn: lsn, - snapshot_done: true, - last_fatal_error: null, - last_fatal_error_ts: null, - last_keepalive_ts: new Date() - }, - $unset: { snapshot_lsn: 1 } - }, - { session: this.session } - ); - await this.autoActivate(lsn); - await this.db.notifyCheckpoint(); - this.last_checkpoint_lsn = lsn; - - return true; + return await this.commit(lsn); } async setResumeLsn(lsn: string): Promise { @@ -935,7 +965,7 @@ export class MongoBucketBatch await this.withTransaction(async () => { for (let table of sourceTables) { - await this.db.source_tables.deleteOne({ _id: table.id }); + await this.db.source_tables.deleteOne({ _id: mongoTableId(table.id) }); } }); return result; @@ -970,7 +1000,9 @@ export class MongoBucketBatch while (lastBatchCount == BATCH_LIMIT) { await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => { const current_data_filter: mongo.Filter = { - _id: idPrefixFilter({ g: this.group_id, t: sourceTable.id }, ['k']) + _id: idPrefixFilter({ g: this.group_id, t: mongoTableId(sourceTable.id) }, ['k']), + // Skip soft-deleted data + pending_delete: { $exists: false } }; const cursor = this.db.current_data.find(current_data_filter, { @@ -1001,7 +1033,8 @@ export class MongoBucketBatch sourceKey: value._id.k }); - persistedBatch.deleteCurrentData(value._id); + // Since this is not from streaming replication, we can do a hard delete + persistedBatch.hardDeleteCurrentData(value._id); } await persistedBatch.flush(this.db, session); lastBatchCount = batch.length; @@ -1027,7 +1060,7 @@ export class MongoBucketBatch await this.withTransaction(async () => { await this.db.source_tables.updateOne( - { _id: table.id }, + { _id: mongoTableId(table.id) }, { $set: { snapshot_status: { @@ -1044,9 +1077,41 @@ export class MongoBucketBatch return copy; } - async markSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn: string) { + async markAllSnapshotDone(no_checkpoint_before_lsn: string) { + await this.db.sync_rules.updateOne( + { + _id: this.group_id + }, + { + $set: { + snapshot_done: true, + last_keepalive_ts: new Date() + }, + $max: { + no_checkpoint_before: no_checkpoint_before_lsn + } + }, + { session: this.session } + ); + } + + async markTableSnapshotRequired(table: storage.SourceTable): Promise { + await this.db.sync_rules.updateOne( + { + _id: this.group_id + }, + { + $set: { + snapshot_done: false + } + }, + { session: this.session } + ); + } + + async markTableSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn?: string) { const session = this.session; - const ids = tables.map((table) => table.id); + const ids = tables.map((table) => mongoTableId(table.id)); await this.withTransaction(async () => { await this.db.source_tables.updateMany( @@ -1062,17 +1127,17 @@ export class MongoBucketBatch { session } ); - if (no_checkpoint_before_lsn > this.no_checkpoint_before_lsn) { - this.no_checkpoint_before_lsn = no_checkpoint_before_lsn; - + if (no_checkpoint_before_lsn != null) { await this.db.sync_rules.updateOne( { _id: this.group_id }, { $set: { - no_checkpoint_before: no_checkpoint_before_lsn, last_keepalive_ts: new Date() + }, + $max: { + no_checkpoint_before: no_checkpoint_before_lsn } }, { session: this.session } diff --git a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts index c36a27322..3180c39f6 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts @@ -175,7 +175,6 @@ export class MongoSyncBucketStorage slotName: this.slot_name, lastCheckpointLsn: checkpoint_lsn, resumeFromLsn: maxLsn(checkpoint_lsn, doc?.snapshot_lsn), - noCheckpointBeforeLsn: doc?.no_checkpoint_before ?? options.zeroLSN, keepaliveOp: doc?.keepalive_op ? BigInt(doc.keepalive_op) : null, storeCurrentData: options.storeCurrentData, skipExistingRows: options.skipExistingRows ?? false, @@ -562,7 +561,7 @@ export class MongoSyncBucketStorage async clear(options?: storage.ClearStorageOptions): Promise { while (true) { if (options?.signal?.aborted) { - throw new ReplicationAbortedError('Aborted clearing data'); + throw new ReplicationAbortedError('Aborted clearing data', options.signal.reason); } try { await this.clearIteration(); diff --git a/modules/module-mongodb-storage/src/storage/implementation/OperationBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/OperationBatch.ts index 43772a46c..95193042f 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/OperationBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/OperationBatch.ts @@ -2,6 +2,7 @@ import { ToastableSqliteRow } from '@powersync/service-sync-rules'; import * as bson from 'bson'; import { storage } from '@powersync/service-core'; +import { mongoTableId } from '../storage-index.js'; /** * Maximum number of operations in a batch. @@ -86,8 +87,8 @@ export class RecordOperation { const beforeId = record.beforeReplicaId ?? record.afterReplicaId; this.afterId = afterId; this.beforeId = beforeId; - this.internalBeforeKey = cacheKey(record.sourceTable.id, beforeId); - this.internalAfterKey = afterId ? cacheKey(record.sourceTable.id, afterId) : null; + this.internalBeforeKey = cacheKey(mongoTableId(record.sourceTable.id), beforeId); + this.internalAfterKey = afterId ? cacheKey(mongoTableId(record.sourceTable.id), afterId) : null; this.estimatedSize = estimateRowSize(record.before) + estimateRowSize(record.after); } diff --git a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts index be3823aad..1b41fa1f6 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/PersistedBatch.ts @@ -5,7 +5,7 @@ import * as bson from 'bson'; import { Logger, logger as defaultLogger } from '@powersync/lib-services-framework'; import { InternalOpId, storage, utils } from '@powersync/service-core'; -import { currentBucketKey, MAX_ROW_SIZE } from './MongoBucketBatch.js'; +import { currentBucketKey, EMPTY_DATA, MAX_ROW_SIZE } from './MongoBucketBatch.js'; import { MongoIdSequence } from './MongoIdSequence.js'; import { PowerSyncMongo } from './db.js'; import { @@ -16,7 +16,7 @@ import { CurrentDataDocument, SourceKey } from './models.js'; -import { replicaIdToSubkey } from '../../utils/util.js'; +import { mongoTableId, replicaIdToSubkey } from '../../utils/util.js'; /** * Maximum size of operations we write in a single transaction. @@ -132,7 +132,7 @@ export class PersistedBatch { o: op_id }, op: 'PUT', - source_table: options.table.id, + source_table: mongoTableId(options.table.id), source_key: options.sourceKey, table: k.table, row_id: k.id, @@ -159,7 +159,7 @@ export class PersistedBatch { o: op_id }, op: 'REMOVE', - source_table: options.table.id, + source_table: mongoTableId(options.table.id), source_key: options.sourceKey, table: bd.table, row_id: bd.id, @@ -208,7 +208,7 @@ export class PersistedBatch { _id: op_id, key: { g: this.group_id, - t: sourceTable.id, + t: mongoTableId(sourceTable.id), k: sourceKey }, lookup: binLookup, @@ -230,7 +230,7 @@ export class PersistedBatch { _id: op_id, key: { g: this.group_id, - t: sourceTable.id, + t: mongoTableId(sourceTable.id), k: sourceKey }, lookup: lookup, @@ -243,7 +243,7 @@ export class PersistedBatch { } } - deleteCurrentData(id: SourceKey) { + hardDeleteCurrentData(id: SourceKey) { const op: mongo.AnyBulkWriteOperation = { deleteOne: { filter: { _id: id } @@ -253,6 +253,25 @@ export class PersistedBatch { this.currentSize += 50; } + softDeleteCurrentData(id: SourceKey, checkpointGreaterThan: bigint) { + const op: mongo.AnyBulkWriteOperation = { + updateOne: { + filter: { _id: id }, + update: { + $set: { + data: EMPTY_DATA, + buckets: [], + lookups: [], + pending_delete: checkpointGreaterThan + } + }, + upsert: true + } + }; + this.currentData.push(op); + this.currentSize += 50; + } + upsertCurrentData(id: SourceKey, values: Partial) { const op: mongo.AnyBulkWriteOperation = { updateOne: { diff --git a/modules/module-mongodb-storage/src/storage/implementation/models.ts b/modules/module-mongodb-storage/src/storage/implementation/models.ts index be906bbef..ccd45a556 100644 --- a/modules/module-mongodb-storage/src/storage/implementation/models.ts +++ b/modules/module-mongodb-storage/src/storage/implementation/models.ts @@ -35,6 +35,12 @@ export interface CurrentDataDocument { data: bson.Binary; buckets: CurrentBucket[]; lookups: bson.Binary[]; + /** + * If set, this can be deleted, once there is a consistent checkpoint >= pending_delete. + * + * This must only be set if buckets = [], lookups = []. + */ + pending_delete?: bigint; } export interface CurrentBucket { diff --git a/modules/module-mongodb-storage/src/utils/test-utils.ts b/modules/module-mongodb-storage/src/utils/test-utils.ts index eece317f4..2724bff26 100644 --- a/modules/module-mongodb-storage/src/utils/test-utils.ts +++ b/modules/module-mongodb-storage/src/utils/test-utils.ts @@ -1,6 +1,6 @@ import { mongo } from '@powersync/lib-service-mongodb'; import { PowerSyncMongo } from '../storage/implementation/db.js'; -import { TestStorageOptions } from '@powersync/service-core'; +import { TestStorageConfig, TestStorageOptions } from '@powersync/service-core'; import { MongoReportStorage } from '../storage/MongoReportStorage.js'; import { MongoBucketStorage } from '../storage/MongoBucketStorage.js'; import { MongoSyncBucketStorageOptions } from '../storage/implementation/MongoSyncBucketStorage.js'; @@ -12,22 +12,25 @@ export type MongoTestStorageOptions = { }; export function mongoTestStorageFactoryGenerator(factoryOptions: MongoTestStorageOptions) { - return async (options?: TestStorageOptions) => { - const db = connectMongoForTests(factoryOptions.url, factoryOptions.isCI); + return { + factory: async (options?: TestStorageOptions) => { + const db = connectMongoForTests(factoryOptions.url, factoryOptions.isCI); - // None of the tests insert data into this collection, so it was never created - if (!(await db.db.listCollections({ name: db.bucket_parameters.collectionName }).hasNext())) { - await db.db.createCollection('bucket_parameters'); - } + // None of the tests insert data into this collection, so it was never created + if (!(await db.db.listCollections({ name: db.bucket_parameters.collectionName }).hasNext())) { + await db.db.createCollection('bucket_parameters'); + } - // Full migrations are not currently run for tests, so we manually create this - await db.createCheckpointEventsCollection(); + // Full migrations are not currently run for tests, so we manually create this + await db.createCheckpointEventsCollection(); - if (!options?.doNotClear) { - await db.clear(); - } + if (!options?.doNotClear) { + await db.clear(); + } - return new MongoBucketStorage(db, { slot_name_prefix: 'test_' }, factoryOptions.internalOptions); + return new MongoBucketStorage(db, { slot_name_prefix: 'test_' }, factoryOptions.internalOptions); + }, + tableIdStrings: false }; } diff --git a/modules/module-mongodb-storage/src/utils/util.ts b/modules/module-mongodb-storage/src/utils/util.ts index 8b33b5c21..78d28c3c7 100644 --- a/modules/module-mongodb-storage/src/utils/util.ts +++ b/modules/module-mongodb-storage/src/utils/util.ts @@ -92,10 +92,10 @@ export function mapOpEntry(row: BucketDataDocument): utils.OplogEntry { } } -export function replicaIdToSubkey(table: bson.ObjectId, id: storage.ReplicaId): string { +export function replicaIdToSubkey(table: storage.SourceTableId, id: storage.ReplicaId): string { if (storage.isUUID(id)) { // Special case for UUID for backwards-compatiblity - return `${table.toHexString()}/${id.toHexString()}`; + return `${tableIdString(table)}/${id.toHexString()}`; } else { // Hashed UUID from the table and id const repr = bson.serialize({ table, id }); @@ -103,6 +103,21 @@ export function replicaIdToSubkey(table: bson.ObjectId, id: storage.ReplicaId): } } +export function mongoTableId(table: storage.SourceTableId): bson.ObjectId { + if (typeof table == 'string') { + throw new ServiceAssertionError(`Got string table id, expected ObjectId`); + } + return table; +} + +function tableIdString(table: storage.SourceTableId) { + if (typeof table == 'string') { + return table; + } else { + return table.toHexString(); + } +} + export function setSessionSnapshotTime(session: mongo.ClientSession, time: bson.Timestamp) { // This is a workaround for the lack of direct support for snapshot reads in the MongoDB driver. if (!session.snapshotEnabled) { diff --git a/modules/module-mongodb-storage/test/src/__snapshots__/storage.test.ts.snap b/modules/module-mongodb-storage/test/src/__snapshots__/storage.test.ts.snap deleted file mode 100644 index c852d392d..000000000 --- a/modules/module-mongodb-storage/test/src/__snapshots__/storage.test.ts.snap +++ /dev/null @@ -1,25 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`Mongo Sync Bucket Storage - Data > empty storage metrics 1`] = ` -{ - "operations_size_bytes": 0, - "parameters_size_bytes": 0, - "replication_size_bytes": 0, -} -`; - -exports[`Mongo Sync Bucket Storage - split buckets > empty storage metrics 1`] = ` -{ - "operations_size_bytes": 0, - "parameters_size_bytes": 0, - "replication_size_bytes": 0, -} -`; - -exports[`Mongo Sync Bucket Storage - split operations > empty storage metrics 1`] = ` -{ - "operations_size_bytes": 0, - "parameters_size_bytes": 0, - "replication_size_bytes": 0, -} -`; diff --git a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts index a1ddc0944..792431bb7 100644 --- a/modules/module-mongodb-storage/test/src/storage_compacting.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_compacting.test.ts @@ -1,4 +1,4 @@ -import { register, TEST_TABLE, test_utils } from '@powersync/service-core-tests'; +import { register, test_utils } from '@powersync/service-core-tests'; import { describe, expect, test } from 'vitest'; import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; import { storage, SyncRulesBucketStorage } from '@powersync/service-core'; @@ -6,10 +6,14 @@ import { storage, SyncRulesBucketStorage } from '@powersync/service-core'; describe('Mongo Sync Bucket Storage Compact', () => { register.registerCompactTests(INITIALIZED_MONGO_STORAGE_FACTORY); + const TEST_TABLE = test_utils.makeTestTable('test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); + describe('with blank bucket_state', () => { // This can happen when migrating from older service versions, that did not populate bucket_state yet. const populate = async (bucketStorage: SyncRulesBucketStorage) => { await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -37,7 +41,7 @@ describe('Mongo Sync Bucket Storage Compact', () => { }; const setup = async () => { - await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY(); + await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY.factory(); const syncRules = await factory.updateSyncRules({ content: ` bucket_definitions: diff --git a/modules/module-mongodb-storage/test/src/storage_sync.test.ts b/modules/module-mongodb-storage/test/src/storage_sync.test.ts index f49d595bf..eaa636600 100644 --- a/modules/module-mongodb-storage/test/src/storage_sync.test.ts +++ b/modules/module-mongodb-storage/test/src/storage_sync.test.ts @@ -1,10 +1,11 @@ import { storage } from '@powersync/service-core'; -import { register, TEST_TABLE, test_utils } from '@powersync/service-core-tests'; +import { register, test_utils } from '@powersync/service-core-tests'; import { describe, expect, test } from 'vitest'; import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; describe('sync - mongodb', () => { register.registerSyncTests(INITIALIZED_MONGO_STORAGE_FACTORY); + const TEST_TABLE = test_utils.makeTestTable('test', ['id'], INITIALIZED_MONGO_STORAGE_FACTORY); // The split of returned results can vary depending on storage drivers test('large batch (2)', async () => { @@ -19,7 +20,7 @@ describe('sync - mongodb', () => { - SELECT id, description FROM "%" ` ); - await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY(); + await using factory = await INITIALIZED_MONGO_STORAGE_FACTORY.factory(); const bucketStorage = factory.getInstance(sync_rules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { diff --git a/modules/module-mongodb/src/api/MongoRouteAPIAdapter.ts b/modules/module-mongodb/src/api/MongoRouteAPIAdapter.ts index 9a093b2e8..8fa6a544b 100644 --- a/modules/module-mongodb/src/api/MongoRouteAPIAdapter.ts +++ b/modules/module-mongodb/src/api/MongoRouteAPIAdapter.ts @@ -138,7 +138,7 @@ export class MongoRouteAPIAdapter implements api.RouteAPI { patternResult.tables = []; for (let collection of collections) { const sourceTable = new SourceTable({ - id: 0, + id: '', // not used connectionTag: this.connectionTag, objectId: collection.name, schema: schema, @@ -165,7 +165,7 @@ export class MongoRouteAPIAdapter implements api.RouteAPI { } } else { const sourceTable = new SourceTable({ - id: 0, + id: '', // not used connectionTag: this.connectionTag, objectId: tablePattern.name, schema: schema, diff --git a/modules/module-mongodb/src/replication/ChangeStream.ts b/modules/module-mongodb/src/replication/ChangeStream.ts index 0fa47f8a2..791bf077b 100644 --- a/modules/module-mongodb/src/replication/ChangeStream.ts +++ b/modules/module-mongodb/src/replication/ChangeStream.ts @@ -10,7 +10,6 @@ import { ServiceError } from '@powersync/lib-services-framework'; import { - InternalOpId, MetricsEngine, RelationCache, SaveOperationTag, @@ -371,7 +370,7 @@ export class ChangeStream { for (let table of tablesWithStatus) { await this.snapshotTable(batch, table); - await batch.markSnapshotDone([table], MongoLSN.ZERO.comparable); + await batch.markTableSnapshotDone([table]); this.touch(); } @@ -380,7 +379,7 @@ export class ChangeStream { // point before the data can be considered consistent. // We could do this for each individual table, but may as well just do it once for the entire snapshot. const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - await batch.markSnapshotDone([], checkpoint); + await batch.markAllSnapshotDone(checkpoint); // This will not create a consistent checkpoint yet, but will persist the op. // Actual checkpoint will be created when streaming replication caught up. @@ -498,7 +497,7 @@ export class ChangeStream { } if (this.abort_signal.aborted) { - throw new ReplicationAbortedError(`Aborted initial replication`); + throw new ReplicationAbortedError(`Aborted initial replication`, this.abort_signal.reason); } // Pre-fetch next batch, so that we can read and write concurrently @@ -635,7 +634,7 @@ export class ChangeStream { await this.snapshotTable(batch, result.table); const no_checkpoint_before_lsn = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID); - const [table] = await batch.markSnapshotDone([result.table], no_checkpoint_before_lsn); + const [table] = await batch.markTableSnapshotDone([result.table], no_checkpoint_before_lsn); return table; } diff --git a/modules/module-mongodb/test/src/change_stream.test.ts b/modules/module-mongodb/test/src/change_stream.test.ts index c6aacee0a..b9375c935 100644 --- a/modules/module-mongodb/test/src/change_stream.test.ts +++ b/modules/module-mongodb/test/src/change_stream.test.ts @@ -21,7 +21,9 @@ describe('change stream', () => { describeWithStorage({ timeout: 20_000 }, defineChangeStreamTests); }); -function defineChangeStreamTests(factory: storage.TestStorageFactory) { +function defineChangeStreamTests(config: storage.TestStorageConfig) { + const factory = config.factory; + test('replicating basic values', async () => { await using context = await ChangeStreamTestContext.open(factory, { mongoOptions: { postImages: PostImagesOption.READ_ONLY } diff --git a/modules/module-mongodb/test/src/change_stream_utils.ts b/modules/module-mongodb/test/src/change_stream_utils.ts index 122435640..1f54a7810 100644 --- a/modules/module-mongodb/test/src/change_stream_utils.ts +++ b/modules/module-mongodb/test/src/change_stream_utils.ts @@ -22,7 +22,7 @@ import { clearTestDb, TEST_CONNECTION_OPTIONS } from './util.js'; export class ChangeStreamTestContext { private _walStream?: ChangeStream; private abortController = new AbortController(); - private streamPromise?: Promise; + private streamPromise?: Promise>; public storage?: SyncRulesBucketStorage; /** @@ -103,7 +103,7 @@ export class ChangeStreamTestContext { return this.storage!; } - get walStream() { + get streamer() { if (this.storage == null) { throw new Error('updateSyncRules() first'); } @@ -125,7 +125,7 @@ export class ChangeStreamTestContext { } async replicateSnapshot() { - await this.walStream.initReplication(); + await this.streamer.initReplication(); } /** @@ -143,13 +143,21 @@ export class ChangeStreamTestContext { } startStreaming() { - return (this.streamPromise = this.walStream.streamChanges()); + this.streamPromise = this.streamer + .streamChanges() + .then(() => ({ status: 'fulfilled', value: undefined }) satisfies PromiseFulfilledResult) + .catch((reason) => ({ status: 'rejected', reason }) satisfies PromiseRejectedResult); + return this.streamPromise; } async getCheckpoint(options?: { timeout?: number }) { let checkpoint = await Promise.race([ getClientCheckpoint(this.client, this.db, this.factory, { timeout: options?.timeout ?? 15_000 }), - this.streamPromise + this.streamPromise?.then((e) => { + if (e.status == 'rejected') { + throw e.reason; + } + }) ]); if (checkpoint == null) { // This indicates an issue with the test setup - streamingPromise completed instead diff --git a/modules/module-mongodb/test/src/chunked_snapshot.test.ts b/modules/module-mongodb/test/src/chunked_snapshot.test.ts index c7c3d653d..930c82e9c 100644 --- a/modules/module-mongodb/test/src/chunked_snapshot.test.ts +++ b/modules/module-mongodb/test/src/chunked_snapshot.test.ts @@ -1,5 +1,5 @@ import { mongo } from '@powersync/lib-service-mongodb'; -import { reduceBucket, TestStorageFactory } from '@powersync/service-core'; +import { reduceBucket, TestStorageConfig, TestStorageFactory } from '@powersync/service-core'; import { METRICS_HELPER } from '@powersync/service-core-tests'; import { JSONBig } from '@powersync/service-jsonbig'; import { SqliteJsonValue } from '@powersync/service-sync-rules'; @@ -12,7 +12,9 @@ describe('chunked snapshots', () => { describeWithStorage({ timeout: 120_000 }, defineBatchTests); }); -function defineBatchTests(factory: TestStorageFactory) { +function defineBatchTests(config: TestStorageConfig) { + const { factory } = config; + // This is not as sensitive to the id type as postgres, but we still test a couple of cases test('chunked snapshot (int32)', async () => { await testChunkedSnapshot({ diff --git a/modules/module-mongodb/test/src/resume.test.ts b/modules/module-mongodb/test/src/resume.test.ts index 68e9b2540..a58bd8f4b 100644 --- a/modules/module-mongodb/test/src/resume.test.ts +++ b/modules/module-mongodb/test/src/resume.test.ts @@ -1,7 +1,7 @@ import { ChangeStreamInvalidatedError } from '@module/replication/ChangeStream.js'; import { MongoManager } from '@module/replication/MongoManager.js'; import { normalizeConnectionConfig } from '@module/types/types.js'; -import { BucketStorageFactory, TestStorageOptions } from '@powersync/service-core'; +import { TestStorageConfig } from '@powersync/service-core'; import { describe, expect, test } from 'vitest'; import { ChangeStreamTestContext } from './change_stream_utils.js'; import { env } from './env.js'; @@ -11,7 +11,9 @@ describe('mongodb resuming replication', () => { describeWithStorage({}, defineResumeTest); }); -function defineResumeTest(factoryGenerator: (options?: TestStorageOptions) => Promise) { +function defineResumeTest(config: TestStorageConfig) { + const factoryGenerator = config.factory; + test('resuming with a different source database', async () => { await using context = await ChangeStreamTestContext.open(factoryGenerator); const { db } = context; @@ -58,8 +60,9 @@ function defineResumeTest(factoryGenerator: (options?: TestStorageOptions) => Pr context2.storage = factory.getInstance(activeContent!); // If this test times out, it likely didn't throw the expected error here. - const error = await context2.startStreaming().catch((ex) => ex); + const result = await context2.startStreaming(); // The ChangeStreamReplicationJob will detect this and throw a ChangeStreamInvalidatedError - expect(error).toBeInstanceOf(ChangeStreamInvalidatedError); + expect(result.status).toEqual('rejected'); + expect((result as PromiseRejectedResult).reason).toBeInstanceOf(ChangeStreamInvalidatedError); }); } diff --git a/modules/module-mongodb/test/src/resuming_snapshots.test.ts b/modules/module-mongodb/test/src/resuming_snapshots.test.ts index ff06f6d3f..302f5cc7b 100644 --- a/modules/module-mongodb/test/src/resuming_snapshots.test.ts +++ b/modules/module-mongodb/test/src/resuming_snapshots.test.ts @@ -8,14 +8,14 @@ import { env } from './env.js'; import { describeWithStorage } from './util.js'; describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { - describeWithStorage({ timeout: 240_000 }, function (factory) { + describeWithStorage({ timeout: 240_000 }, function (config) { test('resuming initial replication (1)', async () => { // Stop early - likely to not include deleted row in first replication attempt. - await testResumingReplication(factory, 2000); + await testResumingReplication(config.factory, 2000); }); test('resuming initial replication (2)', async () => { // Stop late - likely to include deleted row in first replication attempt. - await testResumingReplication(factory, 8000); + await testResumingReplication(config.factory, 8000); }); }); }); diff --git a/modules/module-mongodb/test/src/slow_tests.test.ts b/modules/module-mongodb/test/src/slow_tests.test.ts index 9e21aaf47..df575ef39 100644 --- a/modules/module-mongodb/test/src/slow_tests.test.ts +++ b/modules/module-mongodb/test/src/slow_tests.test.ts @@ -12,7 +12,9 @@ describe.runIf(env.CI || env.SLOW_TESTS)('change stream slow tests', { timeout: describeWithStorage({}, defineSlowTests); }); -function defineSlowTests(factory: storage.TestStorageFactory) { +function defineSlowTests(config: storage.TestStorageConfig) { + const { factory } = config; + test('replicating snapshot with lots of data', async () => { await using context = await ChangeStreamTestContext.open(factory); // Test with low minSnapshotHistoryWindowInSeconds, to trigger: diff --git a/modules/module-mongodb/test/src/util.ts b/modules/module-mongodb/test/src/util.ts index cda52142e..897b0ce73 100644 --- a/modules/module-mongodb/test/src/util.ts +++ b/modules/module-mongodb/test/src/util.ts @@ -4,7 +4,7 @@ import * as postgres_storage from '@powersync/service-module-postgres-storage'; import * as types from '@module/types/types.js'; import { env } from './env.js'; -import { BSON_DESERIALIZE_DATA_OPTIONS, TestStorageFactory } from '@powersync/service-core'; +import { BSON_DESERIALIZE_DATA_OPTIONS, TestStorageConfig, TestStorageFactory } from '@powersync/service-core'; import { describe, TestOptions } from 'vitest'; export const TEST_URI = env.MONGO_TEST_DATA_URL; @@ -19,11 +19,11 @@ export const INITIALIZED_MONGO_STORAGE_FACTORY = mongo_storage.test_utils.mongoT isCI: env.CI }); -export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.test_utils.postgresTestStorageFactoryGenerator({ +export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.test_utils.postgresTestSetup({ url: env.PG_STORAGE_TEST_URL }); -export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageFactory) => void) { +export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageConfig) => void) { describe.skipIf(!env.TEST_MONGO_STORAGE)(`mongodb storage`, options, function () { fn(INITIALIZED_MONGO_STORAGE_FACTORY); }); diff --git a/modules/module-mysql/src/api/MySQLRouteAPIAdapter.ts b/modules/module-mysql/src/api/MySQLRouteAPIAdapter.ts index ef0b7642c..410f1bfe3 100644 --- a/modules/module-mysql/src/api/MySQLRouteAPIAdapter.ts +++ b/modules/module-mysql/src/api/MySQLRouteAPIAdapter.ts @@ -221,7 +221,7 @@ export class MySQLRouteAPIAdapter implements api.RouteAPI { const idColumns = idColumnsResult?.columns ?? []; const sourceTable = new storage.SourceTable({ - id: 0, + id: '', // not used connectionTag: this.config.tag, objectId: tableName, schema: schema, diff --git a/modules/module-mysql/src/replication/BinLogStream.ts b/modules/module-mysql/src/replication/BinLogStream.ts index ef1dc057b..e1aa15817 100644 --- a/modules/module-mysql/src/replication/BinLogStream.ts +++ b/modules/module-mysql/src/replication/BinLogStream.ts @@ -170,7 +170,7 @@ export class BinLogStream { } finally { connection.release(); } - const [table] = await batch.markSnapshotDone([result.table], gtid.comparable); + const [table] = await batch.markTableSnapshotDone([result.table], gtid.comparable); return table; } @@ -275,10 +275,12 @@ export class BinLogStream { const tables = await this.getQualifiedTableNames(batch, tablePattern); for (let table of tables) { await this.snapshotTable(connection as mysql.Connection, batch, table); - await batch.markSnapshotDone([table], headGTID.comparable); + await batch.markTableSnapshotDone([table], headGTID.comparable); await framework.container.probes.touch(); } } + const snapshotDoneGtid = await common.readExecutedGtid(promiseConnection); + await batch.markAllSnapshotDone(snapshotDoneGtid.comparable); await batch.commit(headGTID.comparable); } ); @@ -322,7 +324,10 @@ export class BinLogStream { for await (let row of stream) { if (this.stopped) { - throw new ReplicationAbortedError('Abort signal received - initial replication interrupted.'); + throw new ReplicationAbortedError( + 'Abort signal received - initial replication interrupted.', + this.abortSignal.reason + ); } if (columns == null) { diff --git a/modules/module-mysql/test/src/BinLogStream.test.ts b/modules/module-mysql/test/src/BinLogStream.test.ts index 5d35428b7..e9de3cb30 100644 --- a/modules/module-mysql/test/src/BinLogStream.test.ts +++ b/modules/module-mysql/test/src/BinLogStream.test.ts @@ -18,7 +18,9 @@ describe('BinLogStream tests', () => { describeWithStorage({ timeout: 20_000 }, defineBinlogStreamTests); }); -function defineBinlogStreamTests(factory: storage.TestStorageFactory) { +function defineBinlogStreamTests(config: storage.TestStorageConfig) { + const factory = config.factory; + test('Replicate basic values', async () => { await using context = await BinlogStreamTestContext.open(factory); const { connectionManager } = context; diff --git a/modules/module-mysql/test/src/schema-changes.test.ts b/modules/module-mysql/test/src/schema-changes.test.ts index 99cb25e93..32d840e30 100644 --- a/modules/module-mysql/test/src/schema-changes.test.ts +++ b/modules/module-mysql/test/src/schema-changes.test.ts @@ -26,7 +26,8 @@ const PUT_T3 = test_utils.putOp('test_data', { id: 't3', description: 'test3' }) const REMOVE_T1 = test_utils.removeOp('test_data', 't1'); const REMOVE_T2 = test_utils.removeOp('test_data', 't2'); -function defineTests(factory: storage.TestStorageFactory) { +function defineTests(config: storage.TestStorageConfig) { + const factory = config.factory; let isMySQL57: boolean = false; beforeAll(async () => { diff --git a/modules/module-mysql/test/src/util.ts b/modules/module-mysql/test/src/util.ts index 4f18cdc53..23eb076bc 100644 --- a/modules/module-mysql/test/src/util.ts +++ b/modules/module-mysql/test/src/util.ts @@ -1,16 +1,16 @@ +import * as common from '@module/common/common-index.js'; +import { MySQLConnectionManager } from '@module/replication/MySQLConnectionManager.js'; +import { BinLogEventHandler, BinLogListener, Row, SchemaChange } from '@module/replication/zongji/BinLogListener.js'; import * as types from '@module/types/types.js'; import { createRandomServerId, getMySQLVersion, isVersionAtLeast } from '@module/utils/mysql-utils.js'; +import { TableMapEntry } from '@powersync/mysql-zongji'; +import { TestStorageConfig } from '@powersync/service-core'; import * as mongo_storage from '@powersync/service-module-mongodb-storage'; import * as postgres_storage from '@powersync/service-module-postgres-storage'; +import { TablePattern } from '@powersync/service-sync-rules'; import mysqlPromise from 'mysql2/promise'; -import { env } from './env.js'; import { describe, TestOptions } from 'vitest'; -import { TestStorageFactory } from '@powersync/service-core'; -import { MySQLConnectionManager } from '@module/replication/MySQLConnectionManager.js'; -import { BinLogEventHandler, BinLogListener, Row, SchemaChange } from '@module/replication/zongji/BinLogListener.js'; -import { TableMapEntry } from '@powersync/mysql-zongji'; -import * as common from '@module/common/common-index.js'; -import { TablePattern } from '@powersync/service-sync-rules'; +import { env } from './env.js'; export const TEST_URI = env.MYSQL_TEST_URI; @@ -24,11 +24,11 @@ export const INITIALIZED_MONGO_STORAGE_FACTORY = mongo_storage.test_utils.mongoT isCI: env.CI }); -export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.test_utils.postgresTestStorageFactoryGenerator({ +export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.test_utils.postgresTestSetup({ url: env.PG_STORAGE_TEST_URL }); -export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageFactory) => void) { +export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageConfig) => void) { describe.skipIf(!env.TEST_MONGO_STORAGE)(`mongodb storage`, options, function () { fn(INITIALIZED_MONGO_STORAGE_FACTORY); }); diff --git a/modules/module-postgres-storage/src/migrations/scripts/1764667093139-current-data-pending-deletes.ts b/modules/module-postgres-storage/src/migrations/scripts/1764667093139-current-data-pending-deletes.ts new file mode 100644 index 000000000..90ff28b94 --- /dev/null +++ b/modules/module-postgres-storage/src/migrations/scripts/1764667093139-current-data-pending-deletes.ts @@ -0,0 +1,34 @@ +import { migrations } from '@powersync/service-core'; +import { openMigrationDB } from '../migration-utils.js'; + +export const up: migrations.PowerSyncMigrationFunction = async (context) => { + const { + service_context: { configuration } + } = context; + await using client = openMigrationDB(configuration.storage); + await client.transaction(async (db) => { + await db.sql` + ALTER TABLE current_data + ADD COLUMN pending_delete BIGINT NULL + `.execute(); + await db.sql` + CREATE INDEX IF NOT EXISTS current_data_pending_deletes ON current_data (group_id, pending_delete) + WHERE + pending_delete IS NOT NULL + `.execute(); + }); +}; + +export const down: migrations.PowerSyncMigrationFunction = async (context) => { + const { + service_context: { configuration } + } = context; + await using client = openMigrationDB(configuration.storage); + await client.transaction(async (db) => { + await db.sql`DROP INDEX IF EXISTS current_data_pending_deletes`.execute(); + await db.sql` + ALTER TABLE current_data + DROP COLUMN pending_delete + `.execute(); + }); +}; diff --git a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts index 73600b0b8..41f65d181 100644 --- a/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts +++ b/modules/module-postgres-storage/src/storage/PostgresSyncRulesStorage.ts @@ -354,7 +354,6 @@ export class PostgresSyncRulesStorage slot_name: this.slot_name, last_checkpoint_lsn: checkpoint_lsn, keep_alive_op: syncRules?.keepalive_op, - no_checkpoint_before_lsn: syncRules?.no_checkpoint_before ?? options.zeroLSN, resumeFromLsn: maxLsn(syncRules?.snapshot_lsn, checkpoint_lsn), store_current_data: options.storeCurrentData, skip_existing_rows: options.skipExistingRows ?? false, diff --git a/modules/module-postgres-storage/src/storage/batch/OperationBatch.ts b/modules/module-postgres-storage/src/storage/batch/OperationBatch.ts index 2b91fab68..b34d7fb6b 100644 --- a/modules/module-postgres-storage/src/storage/batch/OperationBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/OperationBatch.ts @@ -5,6 +5,7 @@ import { storage, utils } from '@powersync/service-core'; import { RequiredOperationBatchLimits } from '../../types/types.js'; +import { postgresTableId } from './PostgresPersistedBatch.js'; /** * Batch of input operations. @@ -89,13 +90,13 @@ export class RecordOperation { /** * In-memory cache key - must not be persisted. */ -export function cacheKey(sourceTableId: string, id: storage.ReplicaId) { +export function cacheKey(sourceTableId: storage.SourceTableId, id: storage.ReplicaId) { return encodedCacheKey(sourceTableId, storage.serializeReplicaId(id)); } /** * Calculates a cache key for a stored ReplicaId. This is usually stored as a bytea/Buffer. */ -export function encodedCacheKey(sourceTableId: string, storedKey: Buffer) { - return `${sourceTableId}.${storedKey.toString('base64')}`; +export function encodedCacheKey(sourceTableId: storage.SourceTableId, storedKey: Buffer) { + return `${postgresTableId(sourceTableId)}.${storedKey.toString('base64')}`; } diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts index 79daf47a0..9ce7765a1 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresBucketBatch.ts @@ -19,7 +19,8 @@ import { NOTIFICATION_CHANNEL, sql } from '../../utils/db.js'; import { pick } from '../../utils/ts-codec.js'; import { batchCreateCustomWriteCheckpoints } from '../checkpoints/PostgresWriteCheckpointAPI.js'; import { cacheKey, encodedCacheKey, OperationBatch, RecordOperation } from './OperationBatch.js'; -import { PostgresPersistedBatch } from './PostgresPersistedBatch.js'; +import { PostgresPersistedBatch, postgresTableId } from './PostgresPersistedBatch.js'; +import { bigint } from '../../types/codecs.js'; export interface PostgresBucketBatchOptions { logger: Logger; @@ -28,7 +29,6 @@ export interface PostgresBucketBatchOptions { group_id: number; slot_name: string; last_checkpoint_lsn: string | null; - no_checkpoint_before_lsn: string; store_current_data: boolean; keep_alive_op?: InternalOpId | null; resumeFromLsn: string | null; @@ -48,6 +48,18 @@ export interface PostgresBucketBatchOptions { const StatefulCheckpoint = models.ActiveCheckpoint.and(t.object({ state: t.Enum(storage.SyncRuleState) })); type StatefulCheckpointDecoded = t.Decoded; +const CheckpointWithStatus = StatefulCheckpoint.and( + t.object({ + snapshot_done: t.boolean, + no_checkpoint_before: t.string.or(t.Null), + can_checkpoint: t.boolean, + keepalive_op: bigint.or(t.Null), + new_last_checkpoint: bigint.or(t.Null), + created_checkpoint: t.boolean + }) +); +type CheckpointWithStatusDecoded = t.Decoded; + /** * 15MB. Currently matches MongoDB. * This could be increased in future. @@ -67,7 +79,6 @@ export class PostgresBucketBatch protected db: lib_postgres.DatabaseClient; protected group_id: number; protected last_checkpoint_lsn: string | null; - protected no_checkpoint_before_lsn: string; protected persisted_op: InternalOpId | null; @@ -85,7 +96,6 @@ export class PostgresBucketBatch this.db = options.db; this.group_id = options.group_id; this.last_checkpoint_lsn = options.last_checkpoint_lsn; - this.no_checkpoint_before_lsn = options.no_checkpoint_before_lsn; this.resumeFromLsn = options.resumeFromLsn; this.write_checkpoint_batch = []; this.sync_rules = options.sync_rules; @@ -187,8 +197,10 @@ export class PostgresBucketBatch WHERE group_id = ${{ type: 'int4', value: this.group_id }} AND source_table = ${{ type: 'varchar', value: sourceTable.id }} + AND pending_delete IS NULL LIMIT ${{ type: 'int4', value: BATCH_LIMIT }} + FOR NO KEY UPDATE `)) { lastBatchCount += rows.length; processedCount += rows.length; @@ -210,7 +222,9 @@ export class PostgresBucketBatch persistedBatch.deleteCurrentData({ // This is serialized since we got it from a DB query serialized_source_key: value.source_key, - source_table_id: sourceTable.id + source_table_id: postgresTableId(sourceTable.id), + // No need for soft delete, since this is not streaming replication + soft: false }); } } @@ -289,92 +303,187 @@ export class PostgresBucketBatch } async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise { - const { createEmptyCheckpoints } = { ...storage.DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS, ...options }; + const createEmptyCheckpoints = options?.createEmptyCheckpoints ?? true; await this.flush(); - if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) { - // When re-applying transactions, don't create a new checkpoint until - // we are past the last transaction. - this.logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`); - // Cannot create a checkpoint yet - return false - return false; + const now = new Date().toISOString(); + + const persisted_op = this.persisted_op ?? null; + + const result = await this.db.sql` + WITH + selected AS ( + SELECT + id, + state, + last_checkpoint, + last_checkpoint_lsn, + snapshot_done, + no_checkpoint_before, + keepalive_op, + ( + snapshot_done = TRUE + AND ( + last_checkpoint_lsn IS NULL + OR last_checkpoint_lsn <= ${{ type: 'varchar', value: lsn }} + ) + AND ( + no_checkpoint_before IS NULL + OR no_checkpoint_before <= ${{ type: 'varchar', value: lsn }} + ) + ) AS can_checkpoint + FROM + sync_rules + WHERE + id = ${{ type: 'int4', value: this.group_id }} + FOR UPDATE + ), + computed AS ( + SELECT + selected.*, + CASE + WHEN selected.can_checkpoint THEN GREATEST( + COALESCE(selected.last_checkpoint, 0), + COALESCE(${{ type: 'int8', value: persisted_op }}, 0), + COALESCE(selected.keepalive_op, 0) + ) + ELSE selected.last_checkpoint + END AS new_last_checkpoint, + CASE + WHEN selected.can_checkpoint THEN NULL + ELSE GREATEST( + COALESCE(selected.keepalive_op, 0), + COALESCE(${{ type: 'int8', value: persisted_op }}, 0) + ) + END AS new_keepalive_op + FROM + selected + ), + updated AS ( + UPDATE sync_rules AS sr + SET + last_checkpoint_lsn = CASE + WHEN computed.can_checkpoint THEN ${{ type: 'varchar', value: lsn }} + ELSE sr.last_checkpoint_lsn + END, + last_checkpoint_ts = CASE + WHEN computed.can_checkpoint THEN ${{ type: 1184, value: now }} + ELSE sr.last_checkpoint_ts + END, + last_keepalive_ts = ${{ type: 1184, value: now }}, + last_fatal_error = CASE + WHEN computed.can_checkpoint THEN NULL + ELSE sr.last_fatal_error + END, + keepalive_op = computed.new_keepalive_op, + last_checkpoint = computed.new_last_checkpoint, + snapshot_lsn = CASE + WHEN computed.can_checkpoint THEN NULL + ELSE sr.snapshot_lsn + END + FROM + computed + WHERE + sr.id = computed.id + AND ( + sr.keepalive_op IS DISTINCT FROM computed.new_keepalive_op + OR sr.last_checkpoint IS DISTINCT FROM computed.new_last_checkpoint + OR ${{ type: 'bool', value: createEmptyCheckpoints }} + ) + RETURNING + sr.id, + sr.state, + sr.last_checkpoint, + sr.last_checkpoint_lsn, + sr.snapshot_done, + sr.no_checkpoint_before, + computed.can_checkpoint, + computed.keepalive_op, + computed.new_last_checkpoint + ) + SELECT + id, + state, + last_checkpoint, + last_checkpoint_lsn, + snapshot_done, + no_checkpoint_before, + can_checkpoint, + keepalive_op, + new_last_checkpoint, + TRUE AS created_checkpoint + FROM + updated + UNION ALL + SELECT + id, + state, + new_last_checkpoint AS last_checkpoint, + last_checkpoint_lsn, + snapshot_done, + no_checkpoint_before, + can_checkpoint, + keepalive_op, + new_last_checkpoint, + FALSE AS created_checkpoint + FROM + computed + WHERE + NOT EXISTS ( + SELECT + 1 + FROM + updated + ) + ` + .decoded(CheckpointWithStatus) + .first(); + + if (result == null) { + throw new ReplicationAssertionError('Failed to update sync_rules during checkpoint'); } - if (lsn < this.no_checkpoint_before_lsn) { - if (Date.now() - this.lastWaitingLogThrottled > 5_000) { + if (!result.can_checkpoint) { + if (Date.now() - this.lastWaitingLogThrottled > 5_000 || true) { this.logger.info( - `Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}` + `Waiting before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}. Current state: ${JSON.stringify( + { + snapshot_done: result.snapshot_done, + last_checkpoint_lsn: result.last_checkpoint_lsn, + no_checkpoint_before: result.no_checkpoint_before + } + )}` ); this.lastWaitingLogThrottled = Date.now(); } + return true; + } - // Edge case: During initial replication, we have a no_checkpoint_before_lsn set, - // and don't actually commit the snapshot. - // The first commit can happen from an implicit keepalive message. - // That needs the persisted_op to get an accurate checkpoint, so - // we persist that in keepalive_op. + if (result.created_checkpoint) { + this.logger.info( + `Created checkpoint at ${lsn}. Persisted op: ${result.last_checkpoint} (${this.persisted_op}). keepalive: ${result.keepalive_op}` + ); await this.db.sql` - UPDATE sync_rules - SET - keepalive_op = ${{ type: 'int8', value: this.persisted_op }} + DELETE FROM current_data WHERE - id = ${{ type: 'int4', value: this.group_id }} + group_id = ${{ type: 'int4', value: this.group_id }} + AND pending_delete IS NOT NULL + AND pending_delete <= ${{ type: 'int8', value: result.last_checkpoint }} `.execute(); - - // Cannot create a checkpoint yet - return false - return false; - } - - // Don't create a checkpoint if there were no changes - if (!createEmptyCheckpoints && this.persisted_op == null) { - // Nothing to commit - return true - await this.autoActivate(lsn); - return true; - } - - const now = new Date().toISOString(); - const update: Partial = { - last_checkpoint_lsn: lsn, - last_checkpoint_ts: now, - last_keepalive_ts: now, - snapshot_done: true, - last_fatal_error: null, - keepalive_op: null - }; - - if (this.persisted_op != null) { - update.last_checkpoint = this.persisted_op.toString(); + } else { + this.logger.info( + `Skipped empty checkpoint at ${lsn}. Persisted op: ${result.last_checkpoint}. keepalive: ${result.keepalive_op}` + ); } - - const doc = await this.db.sql` - UPDATE sync_rules - SET - keepalive_op = ${{ type: 'int8', value: update.keepalive_op }}, - last_fatal_error = ${{ type: 'varchar', value: update.last_fatal_error }}, - snapshot_done = ${{ type: 'bool', value: update.snapshot_done }}, - snapshot_lsn = NULL, - last_keepalive_ts = ${{ type: 1184, value: update.last_keepalive_ts }}, - last_checkpoint = COALESCE( - ${{ type: 'int8', value: update.last_checkpoint }}, - last_checkpoint - ), - last_checkpoint_ts = ${{ type: 1184, value: update.last_checkpoint_ts }}, - last_checkpoint_lsn = ${{ type: 'varchar', value: update.last_checkpoint_lsn }} - WHERE - id = ${{ type: 'int4', value: this.group_id }} - RETURNING - id, - state, - last_checkpoint, - last_checkpoint_lsn - ` - .decoded(StatefulCheckpoint) - .first(); - await this.autoActivate(lsn); - await notifySyncRulesUpdate(this.db, doc!); + await notifySyncRulesUpdate(this.db, { + id: result.id, + state: result.state, + last_checkpoint: result.last_checkpoint, + last_checkpoint_lsn: result.last_checkpoint_lsn + }); this.persisted_op = null; this.last_checkpoint_lsn = lsn; @@ -382,61 +491,53 @@ export class PostgresBucketBatch } async keepalive(lsn: string): Promise { - if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) { - // No-op - return false; - } - - if (lsn < this.no_checkpoint_before_lsn) { - return false; - } - - if (this.persisted_op != null) { - // The commit may have been skipped due to "no_checkpoint_before_lsn". - // Apply it now if relevant - this.logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`); - return await this.commit(lsn); - } + return await this.commit(lsn, { createEmptyCheckpoints: true }); + } - const updated = await this.db.sql` + async setResumeLsn(lsn: string): Promise { + await this.db.sql` UPDATE sync_rules SET - snapshot_done = ${{ type: 'bool', value: true }}, - snapshot_lsn = NULL, - last_checkpoint_lsn = ${{ type: 'varchar', value: lsn }}, - last_fatal_error = ${{ type: 'varchar', value: null }}, - last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }} + snapshot_lsn = ${{ type: 'varchar', value: lsn }} WHERE id = ${{ type: 'int4', value: this.group_id }} - RETURNING - id, - state, - last_checkpoint, - last_checkpoint_lsn - ` - .decoded(StatefulCheckpoint) - .first(); - - await this.autoActivate(lsn); - await notifySyncRulesUpdate(this.db, updated!); + `.execute(); + } - this.last_checkpoint_lsn = lsn; - return true; + async markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise { + await this.db.transaction(async (db) => { + await db.sql` + UPDATE sync_rules + SET + snapshot_done = TRUE, + last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }}, + no_checkpoint_before = CASE + WHEN no_checkpoint_before IS NULL + OR no_checkpoint_before < ${{ type: 'varchar', value: no_checkpoint_before_lsn }} THEN ${{ + type: 'varchar', + value: no_checkpoint_before_lsn + }} + ELSE no_checkpoint_before + END + WHERE + id = ${{ type: 'int4', value: this.group_id }} + `.execute(); + }); } - async setResumeLsn(lsn: string): Promise { + async markTableSnapshotRequired(table: storage.SourceTable): Promise { await this.db.sql` UPDATE sync_rules SET - snapshot_lsn = ${{ type: 'varchar', value: lsn }} + snapshot_done = FALSE WHERE id = ${{ type: 'int4', value: this.group_id }} `.execute(); } - async markSnapshotDone( + async markTableSnapshotDone( tables: storage.SourceTable[], - no_checkpoint_before_lsn: string + no_checkpoint_before_lsn?: string ): Promise { const ids = tables.map((table) => table.id.toString()); @@ -444,7 +545,7 @@ export class PostgresBucketBatch await db.sql` UPDATE source_tables SET - snapshot_done = ${{ type: 'bool', value: true }}, + snapshot_done = TRUE, snapshot_total_estimated_count = NULL, snapshot_replicated_count = NULL, snapshot_last_key = NULL @@ -457,14 +558,19 @@ export class PostgresBucketBatch ); `.execute(); - if (no_checkpoint_before_lsn > this.no_checkpoint_before_lsn) { - this.no_checkpoint_before_lsn = no_checkpoint_before_lsn; - + if (no_checkpoint_before_lsn != null) { await db.sql` UPDATE sync_rules SET - no_checkpoint_before = ${{ type: 'varchar', value: no_checkpoint_before_lsn }}, - last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }} + last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }}, + no_checkpoint_before = CASE + WHEN no_checkpoint_before IS NULL + OR no_checkpoint_before < ${{ type: 'varchar', value: no_checkpoint_before_lsn }} THEN ${{ + type: 'varchar', + value: no_checkpoint_before_lsn + }} + ELSE no_checkpoint_before + END WHERE id = ${{ type: 'int4', value: this.group_id }} `.execute(); @@ -531,7 +637,7 @@ export class PostgresBucketBatch // exceeding memory limits. const sizeLookups = batch.batch.map((r) => { return { - source_table: r.record.sourceTable.id.toString(), + source_table: postgresTableId(r.record.sourceTable.id), /** * Encode to hex in order to pass a jsonb */ @@ -564,6 +670,7 @@ export class PostgresBucketBatch AND c.source_key = f.source_key WHERE c.group_id = ${{ type: 'int4', value: this.group_id }} + FOR NO KEY UPDATE `)) { for (const row of rows) { const key = cacheKey(row.source_table, row.source_key); @@ -610,7 +717,8 @@ export class PostgresBucketBatch ) f ON c.source_table = f.source_table_id AND c.source_key = f.source_key WHERE - c.group_id = $2; + c.group_id = $2 + FOR NO KEY UPDATE; `, params: [ { @@ -918,9 +1026,10 @@ export class PostgresBucketBatch source_key: afterId, group_id: this.group_id, data: afterData!, - source_table: sourceTable.id, + source_table: postgresTableId(sourceTable.id), buckets: newBuckets, - lookups: newLookups + lookups: newLookups, + pending_delete: null }; persistedBatch.upsertCurrentData(result); } @@ -928,8 +1037,9 @@ export class PostgresBucketBatch if (afterId == null || !storage.replicaIdEquals(beforeId, afterId)) { // Either a delete (afterId == null), or replaced the old replication id persistedBatch.deleteCurrentData({ - source_table_id: record.sourceTable.id, - source_key: beforeId! + source_table_id: postgresTableId(sourceTable.id), + source_key: beforeId!, + soft: true }); } @@ -951,16 +1061,18 @@ export class PostgresBucketBatch await this.db.transaction(async (db) => { const syncRulesRow = await db.sql` SELECT - state + state, + snapshot_done FROM sync_rules WHERE id = ${{ type: 'int4', value: this.group_id }} + FOR NO KEY UPDATE; ` - .decoded(pick(models.SyncRules, ['state'])) + .decoded(pick(models.SyncRules, ['state', 'snapshot_done'])) .first(); - if (syncRulesRow && syncRulesRow.state == storage.SyncRuleState.PROCESSING) { + if (syncRulesRow && syncRulesRow.state == storage.SyncRuleState.PROCESSING && syncRulesRow.snapshot_done) { await db.sql` UPDATE sync_rules SET @@ -968,25 +1080,27 @@ export class PostgresBucketBatch WHERE id = ${{ type: 'int4', value: this.group_id }} `.execute(); + + await db.sql` + UPDATE sync_rules + SET + state = ${{ type: 'varchar', value: storage.SyncRuleState.STOP }} + WHERE + ( + state = ${{ value: storage.SyncRuleState.ACTIVE, type: 'varchar' }} + OR state = ${{ value: storage.SyncRuleState.ERRORED, type: 'varchar' }} + ) + AND id != ${{ type: 'int4', value: this.group_id }} + `.execute(); didActivate = true; + this.needsActivation = false; + } else if (syncRulesRow?.state != storage.SyncRuleState.PROCESSING) { + this.needsActivation = false; } - - await db.sql` - UPDATE sync_rules - SET - state = ${{ type: 'varchar', value: storage.SyncRuleState.STOP }} - WHERE - ( - state = ${{ value: storage.SyncRuleState.ACTIVE, type: 'varchar' }} - OR state = ${{ value: storage.SyncRuleState.ERRORED, type: 'varchar' }} - ) - AND id != ${{ type: 'int4', value: this.group_id }} - `.execute(); }); if (didActivate) { this.logger.info(`Activated new sync rules at ${lsn}`); } - this.needsActivation = false; } /** @@ -1003,9 +1117,28 @@ export class PostgresBucketBatch callback: (tx: lib_postgres.WrappedConnection) => Promise ): Promise { try { - return await this.db.transaction(async (db) => { - return await callback(db); - }); + // Try for up to a minute + const lastTry = Date.now() + 60_000; + while (true) { + try { + return await this.db.transaction(async (db) => { + // The isolation level is required to protect against concurrent updates to the same data. + // In theory the "select ... for update" locks may be able to protect against this, but we + // still have failing tests if we use that as the only isolation mechanism. + await db.query('SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;'); + return await callback(db); + }); + } catch (err) { + const code = err[Symbol.for('pg.ErrorCode')]; + if ((code == '40001' || code == '40P01') && Date.now() < lastTry) { + // Serialization (lock) failure, retry + this.logger.warn(`Serialization failure during replication transaction, retrying: ${err.message}`); + await timers.setTimeout(100 + Math.random() * 200); + continue; + } + throw err; + } + } } finally { await this.db.sql` UPDATE sync_rules diff --git a/modules/module-postgres-storage/src/storage/batch/PostgresPersistedBatch.ts b/modules/module-postgres-storage/src/storage/batch/PostgresPersistedBatch.ts index df8a2bae6..b6e4c96fc 100644 --- a/modules/module-postgres-storage/src/storage/batch/PostgresPersistedBatch.ts +++ b/modules/module-postgres-storage/src/storage/batch/PostgresPersistedBatch.ts @@ -1,6 +1,6 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; -import { logger } from '@powersync/lib-services-framework'; -import { storage, utils } from '@powersync/service-core'; +import { logger, ServiceAssertionError } from '@powersync/lib-services-framework'; +import { bson, InternalOpId, storage, utils } from '@powersync/service-core'; import { JSONBig } from '@powersync/service-jsonbig'; import * as sync_rules from '@powersync/service-sync-rules'; import { models, RequiredOperationBatchLimits } from '../../types/types.js'; @@ -24,7 +24,7 @@ export type SaveParameterDataOptions = { }; export type DeleteCurrentDataOptions = { - source_table_id: bigint; + source_table_id: string; /** * ReplicaID which needs to be serialized in order to be queried * or inserted into the DB @@ -34,12 +34,19 @@ export type DeleteCurrentDataOptions = { * Optionally provide the serialized source key directly */ serialized_source_key?: Buffer; + + /** + * Streaming replication needs soft deletes, while truncating tables can use a hard delete directly. + */ + soft: boolean; }; export type PostgresPersistedBatchOptions = RequiredOperationBatchLimits & { group_id: number; }; +const EMPTY_DATA = Buffer.from(bson.serialize({})); + export class PostgresPersistedBatch { group_id: number; @@ -56,11 +63,13 @@ export class PostgresPersistedBatch { */ protected bucketDataInserts: models.BucketData[]; protected parameterDataInserts: models.BucketParameters[]; - protected currentDataDeletes: Pick[]; /** - * This is stored as a map to avoid multiple inserts (or conflicts) for the same key + * This is stored as a map to avoid multiple inserts (or conflicts) for the same key. + * + * Each key may only occur in one of these two maps. */ protected currentDataInserts: Map; + protected currentDataDeletes: Map; constructor(options: PostgresPersistedBatchOptions) { this.group_id = options.group_id; @@ -70,8 +79,8 @@ export class PostgresPersistedBatch { this.bucketDataInserts = []; this.parameterDataInserts = []; - this.currentDataDeletes = []; this.currentDataInserts = new Map(); + this.currentDataDeletes = new Map(); this.currentSize = 0; } @@ -98,7 +107,7 @@ export class PostgresPersistedBatch { group_id: this.group_id, bucket_name: k.bucket, op: models.OpType.PUT, - source_table: options.table.id, + source_table: postgresTableId(options.table.id), source_key: hexSourceKey, table_name: k.table, row_id: k.id, @@ -117,7 +126,7 @@ export class PostgresPersistedBatch { group_id: this.group_id, bucket_name: bd.bucket, op: models.OpType.REMOVE, - source_table: options.table.id, + source_table: postgresTableId(options.table.id), source_key: hexSourceKey, table_name: bd.table, row_id: bd.id, @@ -155,7 +164,7 @@ export class PostgresPersistedBatch { const serializedBucketParameters = JSONBig.stringify(result.bucketParameters); this.parameterDataInserts.push({ group_id: this.group_id, - source_table: table.id, + source_table: postgresTableId(table.id), source_key: hexSourceKey, bucket_parameters: serializedBucketParameters, id: 0, // auto incrementing id @@ -169,7 +178,7 @@ export class PostgresPersistedBatch { const hexLookup = lookup.toString('hex'); this.parameterDataInserts.push({ group_id: this.group_id, - source_table: table.id, + source_table: postgresTableId(table.id), source_key: hexSourceKey, bucket_parameters: JSON.stringify([]), id: 0, // auto incrementing id @@ -180,19 +189,36 @@ export class PostgresPersistedBatch { } deleteCurrentData(options: DeleteCurrentDataOptions) { - const serializedReplicaId = options.serialized_source_key ?? storage.serializeReplicaId(options.source_key); - this.currentDataDeletes.push({ - group_id: this.group_id, - source_table: options.source_table_id.toString(), - source_key: serializedReplicaId.toString('hex') - }); - this.currentSize += serializedReplicaId.byteLength + 100; + if (options.soft) { + return this.upsertCurrentData( + { + group_id: this.group_id, + source_table: options.source_table_id, + source_key: options.source_key, + buckets: [], + data: EMPTY_DATA, + lookups: [], + pending_delete: 1n // converted to nextval('op_id_sequence') in the query + }, + options.serialized_source_key + ); + } else { + const serializedReplicaId = options.serialized_source_key ?? storage.serializeReplicaId(options.source_key); + const hexReplicaId = serializedReplicaId.toString('hex'); + const source_table = options.source_table_id; + const key = `${this.group_id}-${source_table}-${hexReplicaId}`; + this.currentDataInserts.delete(key); + this.currentDataDeletes.set(key, { + source_key_hex: hexReplicaId, + source_table: source_table + }); + } } - upsertCurrentData(options: models.CurrentDataDecoded) { + upsertCurrentData(options: models.CurrentDataDecoded, serialized_source_key?: Buffer) { const { source_table, source_key, buckets } = options; - const serializedReplicaId = storage.serializeReplicaId(source_key); + const serializedReplicaId = serialized_source_key ?? storage.serializeReplicaId(source_key); const hexReplicaId = serializedReplicaId.toString('hex'); const serializedBuckets = JSONBig.stringify(options.buckets); @@ -206,13 +232,15 @@ export class PostgresPersistedBatch { */ const key = `${this.group_id}-${source_table}-${hexReplicaId}`; + this.currentDataDeletes.delete(key); this.currentDataInserts.set(key, { group_id: this.group_id, source_table: source_table, source_key: hexReplicaId, buckets: serializedBuckets, data: options.data.toString('hex'), - lookups: options.lookups.map((l) => l.toString('hex')) + lookups: options.lookups.map((l) => l.toString('hex')), + pending_delete: options.pending_delete?.toString() ?? null }); this.currentSize += @@ -230,7 +258,6 @@ export class PostgresPersistedBatch { this.currentSize >= this.maxTransactionBatchSize || this.bucketDataInserts.length >= this.maxTransactionDocCount || this.currentDataInserts.size >= this.maxTransactionDocCount || - this.currentDataDeletes.length >= this.maxTransactionDocCount || this.parameterDataInserts.length >= this.maxTransactionDocCount ); } @@ -239,24 +266,26 @@ export class PostgresPersistedBatch { const stats = { bucketDataCount: this.bucketDataInserts.length, parameterDataCount: this.parameterDataInserts.length, - currentDataCount: this.currentDataInserts.size + this.currentDataDeletes.length + currentDataCount: this.currentDataInserts.size + this.currentDataDeletes.size }; const flushedAny = stats.bucketDataCount > 0 || stats.parameterDataCount > 0 || stats.currentDataCount > 0; logger.info( `powersync_${this.group_id} Flushed ${this.bucketDataInserts.length} + ${this.parameterDataInserts.length} + ${ - this.currentDataInserts.size + this.currentDataDeletes.length + this.currentDataInserts.size } updates, ${Math.round(this.currentSize / 1024)}kb.` ); - await this.flushBucketData(db); - await this.flushParameterData(db); + // Flush current_data first, since this is where lock errors are most likely to occur, and we + // want to detect those as soon as possible. await this.flushCurrentData(db); + await this.flushBucketData(db); + await this.flushParameterData(db); this.bucketDataInserts = []; this.parameterDataInserts = []; - this.currentDataDeletes = []; this.currentDataInserts = new Map(); + this.currentDataDeletes = new Map(); this.currentSize = 0; return { @@ -342,6 +371,18 @@ export class PostgresPersistedBatch { protected async flushCurrentData(db: lib_postgres.WrappedConnection) { if (this.currentDataInserts.size > 0) { + const updates = Array.from(this.currentDataInserts.values()); + // Sort by source_table, source_key to ensure consistent order. + // While order of updates don't directly matter, using a consistent order helps to reduce 40P01 deadlock errors. + // We may still have deadlocks between deletes and inserts, but those should be less frequent. + updates.sort((a, b) => { + if (a.source_table < b.source_table) return -1; + if (a.source_table > b.source_table) return 1; + if (a.source_key < b.source_key) return -1; + if (a.source_key > b.source_key) return 1; + return 0; + }); + await db.sql` INSERT INTO current_data ( @@ -350,7 +391,8 @@ export class PostgresPersistedBatch { source_key, buckets, data, - lookups + lookups, + pending_delete ) SELECT group_id, @@ -363,42 +405,56 @@ export class PostgresPersistedBatch { decode(element, 'hex') FROM unnest(lookups) AS element - ) AS lookups + ) AS lookups, + CASE + WHEN pending_delete IS NOT NULL THEN nextval('op_id_sequence') + ELSE NULL + END AS pending_delete FROM - json_to_recordset(${{ type: 'json', value: Array.from(this.currentDataInserts.values()) }}::json) AS t ( + json_to_recordset(${{ type: 'json', value: updates }}::json) AS t ( group_id integer, source_table text, source_key text, -- Input as hex string buckets text, data text, -- Input as hex string - lookups TEXT[] -- Input as stringified JSONB array of hex strings + lookups TEXT[], -- Input as stringified JSONB array of hex strings + pending_delete bigint ) ON CONFLICT (group_id, source_table, source_key) DO UPDATE SET buckets = EXCLUDED.buckets, data = EXCLUDED.data, - lookups = EXCLUDED.lookups; + lookups = EXCLUDED.lookups, + pending_delete = EXCLUDED.pending_delete; `.execute(); } - if (this.currentDataDeletes.length > 0) { + if (this.currentDataDeletes.size > 0) { + const deletes = Array.from(this.currentDataDeletes.values()); + // Same sorting as for inserts + deletes.sort((a, b) => { + if (a.source_table < b.source_table) return -1; + if (a.source_table > b.source_table) return 1; + if (a.source_key_hex < b.source_key_hex) return -1; + if (a.source_key_hex > b.source_key_hex) return 1; + return 0; + }); + await db.sql` WITH conditions AS ( SELECT - group_id, source_table, - decode(source_key, 'hex') AS source_key -- Decode hex to bytea + decode(source_key_hex, 'hex') AS source_key -- Decode hex to bytea FROM - jsonb_to_recordset(${{ type: 'jsonb', value: this.currentDataDeletes }}::jsonb) AS t ( - group_id integer, - source_table text, - source_key text -- Input as hex string - ) + jsonb_to_recordset(${{ + type: 'jsonb', + value: deletes + }}::jsonb) AS t (source_table text, source_key_hex text) ) DELETE FROM current_data USING conditions WHERE - current_data.group_id = conditions.group_id + current_data.group_id = ${{ type: 'int4', value: this.group_id }} AND current_data.source_table = conditions.source_table AND current_data.source_key = conditions.source_key; `.execute(); @@ -409,3 +465,10 @@ export class PostgresPersistedBatch { export function currentBucketKey(b: models.CurrentBucket) { return `${b.bucket}/${b.table}/${b.id}`; } + +export function postgresTableId(id: storage.SourceTableId) { + if (typeof id == 'string') { + return id; + } + throw new ServiceAssertionError(`Expected string table id, got ObjectId`); +} diff --git a/modules/module-postgres-storage/src/types/models/CurrentData.ts b/modules/module-postgres-storage/src/types/models/CurrentData.ts index 828d9a8c0..da4f2d8f3 100644 --- a/modules/module-postgres-storage/src/types/models/CurrentData.ts +++ b/modules/module-postgres-storage/src/types/models/CurrentData.ts @@ -1,5 +1,5 @@ import * as t from 'ts-codec'; -import { hexBuffer, jsonb, pgwire_number } from '../codecs.js'; +import { bigint, hexBuffer, jsonb, pgwire_number } from '../codecs.js'; export const CurrentBucket = t.object({ bucket: t.string, @@ -16,7 +16,8 @@ export const CurrentData = t.object({ group_id: pgwire_number, lookups: t.array(hexBuffer), source_key: hexBuffer, - source_table: t.string + source_table: t.string, + pending_delete: t.Null.or(bigint) }); export type CurrentData = t.Encoded; diff --git a/modules/module-postgres-storage/src/utils/bson.ts b/modules/module-postgres-storage/src/utils/bson.ts index c60be1775..79cea0cdc 100644 --- a/modules/module-postgres-storage/src/utils/bson.ts +++ b/modules/module-postgres-storage/src/utils/bson.ts @@ -6,7 +6,7 @@ import * as uuid from 'uuid'; * JSONB columns do not directly support storing binary data which could be required in future. */ -export function replicaIdToSubkey(tableId: string, id: storage.ReplicaId): string { +export function replicaIdToSubkey(tableId: storage.SourceTableId, id: storage.ReplicaId): string { // Hashed UUID from the table and id if (storage.isUUID(id)) { // Special case for UUID for backwards-compatiblity diff --git a/modules/module-postgres-storage/src/utils/test-utils.ts b/modules/module-postgres-storage/src/utils/test-utils.ts index c0ba7c2b0..f955cf542 100644 --- a/modules/module-postgres-storage/src/utils/test-utils.ts +++ b/modules/module-postgres-storage/src/utils/test-utils.ts @@ -3,6 +3,7 @@ import { PostgresMigrationAgent } from '../migrations/PostgresMigrationAgent.js' import { normalizePostgresStorageConfig, PostgresStorageConfigDecoded } from '../types/types.js'; import { PostgresReportStorage } from '../storage/PostgresReportStorage.js'; import { PostgresBucketStorageFactory } from '../storage/PostgresBucketStorageFactory.js'; +import { logger as defaultLogger, createLogger, transports } from '@powersync/lib-services-framework'; export type PostgresTestStorageOptions = { url: string; @@ -31,11 +32,19 @@ export function postgresTestSetup(factoryOptions: PostgresTestStorageOptions) { const mockServiceContext = { configuration: { storage: BASE_CONFIG } } as unknown as ServiceContext; + // Migration logs can get really verbose in tests, so only log warnings and up. + const logger = createLogger({ + level: 'warn', + format: defaultLogger.format, + transports: [new transports.Console()] + }); + await migrationManager.migrate({ direction: framework.migrations.Direction.Down, migrationContext: { service_context: mockServiceContext - } + }, + logger }); if (direction == framework.migrations.Direction.Up) { @@ -43,7 +52,8 @@ export function postgresTestSetup(factoryOptions: PostgresTestStorageOptions) { direction: framework.migrations.Direction.Up, migrationContext: { service_context: mockServiceContext - } + }, + logger }); } }; @@ -80,7 +90,8 @@ export function postgresTestSetup(factoryOptions: PostgresTestStorageOptions) { throw ex; } }, - migrate + migrate, + tableIdStrings: true }; } diff --git a/modules/module-postgres-storage/test/src/__snapshots__/storage.test.ts.snap b/modules/module-postgres-storage/test/src/__snapshots__/storage.test.ts.snap deleted file mode 100644 index d1b24f45b..000000000 --- a/modules/module-postgres-storage/test/src/__snapshots__/storage.test.ts.snap +++ /dev/null @@ -1,9 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`Postgres Sync Bucket Storage - Data > empty storage metrics 1`] = ` -{ - "operations_size_bytes": 16384, - "parameters_size_bytes": 32768, - "replication_size_bytes": 16384, -} -`; diff --git a/modules/module-postgres-storage/test/src/migrations.test.ts b/modules/module-postgres-storage/test/src/migrations.test.ts index 1f2e12a64..1979ae0ee 100644 --- a/modules/module-postgres-storage/test/src/migrations.test.ts +++ b/modules/module-postgres-storage/test/src/migrations.test.ts @@ -21,7 +21,7 @@ describe('Migrations', () => { register.registerMigrationTests(MIGRATION_AGENT_FACTORY); it('Should have tables declared', async () => { - const { db } = await POSTGRES_STORAGE_FACTORY(); + const { db } = await POSTGRES_STORAGE_FACTORY.factory(); const tables = await db.sql` SELECT diff --git a/modules/module-postgres-storage/test/src/storage.test.ts b/modules/module-postgres-storage/test/src/storage.test.ts index 9fb1ce197..2e701aa56 100644 --- a/modules/module-postgres-storage/test/src/storage.test.ts +++ b/modules/module-postgres-storage/test/src/storage.test.ts @@ -1,5 +1,5 @@ import { storage } from '@powersync/service-core'; -import { register, TEST_TABLE, test_utils } from '@powersync/service-core-tests'; +import { register, test_utils } from '@powersync/service-core-tests'; import { describe, expect, test } from 'vitest'; import { POSTGRES_STORAGE_FACTORY } from './util.js'; @@ -32,11 +32,11 @@ describe('Postgres Sync Bucket Storage - pg-specific', () => { - SELECT id, description FROM "%" ` ); - await using factory = await POSTGRES_STORAGE_FACTORY(); + await using factory = await POSTGRES_STORAGE_FACTORY.factory(); const bucketStorage = factory.getInstance(sync_rules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { - const sourceTable = TEST_TABLE; + const sourceTable = test_utils.makeTestTable('test', ['id'], POSTGRES_STORAGE_FACTORY); const largeDescription = '0123456789'.repeat(2_000_00); diff --git a/modules/module-postgres-storage/test/src/util.ts b/modules/module-postgres-storage/test/src/util.ts index d055dc343..7a3bdbbbe 100644 --- a/modules/module-postgres-storage/test/src/util.ts +++ b/modules/module-postgres-storage/test/src/util.ts @@ -32,5 +32,5 @@ export const POSTGRES_STORAGE_SETUP = postgresTestSetup({ migrationAgent: (config) => new TestPostgresMigrationAgent(config) }); -export const POSTGRES_STORAGE_FACTORY = POSTGRES_STORAGE_SETUP.factory; +export const POSTGRES_STORAGE_FACTORY = POSTGRES_STORAGE_SETUP; export const POSTGRES_REPORT_STORAGE_FACTORY = POSTGRES_STORAGE_SETUP.reportFactory; diff --git a/modules/module-postgres/package.json b/modules/module-postgres/package.json index 212783e86..b8f90125d 100644 --- a/modules/module-postgres/package.json +++ b/modules/module-postgres/package.json @@ -36,6 +36,7 @@ "@powersync/service-sync-rules": "workspace:*", "@powersync/service-types": "workspace:*", "jose": "^4.15.1", + "p-defer": "^4.0.1", "pgwire": "github:kagis/pgwire#f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87", "semver": "^7.5.4", "ts-codec": "^1.3.0", @@ -43,9 +44,9 @@ "uuid": "^11.1.0" }, "devDependencies": { + "@powersync/lib-service-postgres": "workspace:*", "@powersync/service-core-tests": "workspace:*", "@powersync/service-module-mongodb-storage": "workspace:*", - "@powersync/lib-service-postgres": "workspace:*", "@powersync/service-module-postgres-storage": "workspace:*", "@types/semver": "^7.5.4" } diff --git a/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts b/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts index 6275676ba..9746df10c 100644 --- a/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts +++ b/modules/module-postgres/src/api/PostgresRouteAPIAdapter.ts @@ -314,7 +314,8 @@ LEFT JOIN ( AND NOT a.attisdropped AND has_column_privilege(tbl.quoted_name, a.attname, 'SELECT, INSERT, UPDATE, REFERENCES') ) -GROUP BY schemaname, tablename, quoted_name` +GROUP BY schemaname, tablename, quoted_name +ORDER BY schemaname, tablename;` ); await this.typeCache.fetchTypesForSchema(); const rows = pgwire.pgwireRows(results); diff --git a/modules/module-postgres/src/replication/PostgresSnapshotter.ts b/modules/module-postgres/src/replication/PostgresSnapshotter.ts new file mode 100644 index 000000000..888306065 --- /dev/null +++ b/modules/module-postgres/src/replication/PostgresSnapshotter.ts @@ -0,0 +1,673 @@ +import { + container, + logger as defaultLogger, + Logger, + ReplicationAbortedError, + ReplicationAssertionError +} from '@powersync/lib-services-framework'; +import { + getUuidReplicaIdentityBson, + MetricsEngine, + RelationCache, + SourceEntityDescriptor, + SourceTable, + storage +} from '@powersync/service-core'; +import * as pgwire from '@powersync/service-jpgwire'; +import { + DatabaseInputRow, + SqliteInputRow, + SqliteRow, + SqlSyncRules, + TablePattern, + toSyncRulesRow +} from '@powersync/service-sync-rules'; + +import { ReplicationMetric } from '@powersync/service-types'; +import { PgManager } from './PgManager.js'; +import { + checkSourceConfiguration, + checkTableRls, + ensureStorageCompatibility, + getReplicationIdentityColumns +} from './replication-utils.js'; +import { + ChunkedSnapshotQuery, + IdSnapshotQuery, + PrimaryKeyValue, + SimpleSnapshotQuery, + SnapshotQuery +} from './SnapshotQuery.js'; +import { + MissingReplicationSlotError, + POSTGRES_DEFAULT_SCHEMA, + PUBLICATION_NAME, + sendKeepAlive, + WalStreamOptions, + ZERO_LSN +} from './WalStream.js'; +import * as timers from 'node:timers/promises'; +import pDefer, { DeferredPromise } from 'p-defer'; + +interface InitResult { + /** True if initial snapshot is not yet done. */ + needsInitialSync: boolean; + /** True if snapshot must be started from scratch with a new slot. */ + needsNewSlot: boolean; +} + +export class PostgresSnapshotter { + sync_rules: SqlSyncRules; + group_id: number; + + connection_id = 1; + + private logger: Logger; + + private readonly storage: storage.SyncRulesBucketStorage; + private readonly metrics: MetricsEngine; + private readonly slot_name: string; + + private connections: PgManager; + + private abortSignal: AbortSignal; + + private snapshotChunkLength: number; + + private relationCache = new RelationCache((relation: number | SourceTable) => { + if (typeof relation == 'number') { + return relation; + } + return relation.objectId!; + }); + + private queue = new Set(); + private initialSnapshotDone = pDefer(); + + constructor(options: WalStreamOptions) { + this.logger = options.logger ?? defaultLogger; + this.storage = options.storage; + this.metrics = options.metrics; + this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA }); + this.group_id = options.storage.group_id; + this.slot_name = options.storage.slot_name; + this.connections = options.connections; + this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000; + + this.abortSignal = options.abort_signal; + } + + async getQualifiedTableNames( + batch: storage.BucketStorageBatch, + db: pgwire.PgConnection, + tablePattern: TablePattern + ): Promise { + const schema = tablePattern.schema; + if (tablePattern.connectionTag != this.connections.connectionTag) { + return []; + } + + let tableRows: any[]; + const prefix = tablePattern.isWildcard ? tablePattern.tablePrefix : undefined; + + { + let query = ` + SELECT + c.oid AS relid, + c.relname AS table_name, + (SELECT + json_agg(DISTINCT a.atttypid) + FROM pg_attribute a + WHERE a.attnum > 0 AND NOT a.attisdropped AND a.attrelid = c.oid) + AS column_types + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = $1 + AND c.relkind = 'r'`; + + if (tablePattern.isWildcard) { + query += ' AND c.relname LIKE $2'; + } else { + query += ' AND c.relname = $2'; + } + + const result = await db.query({ + statement: query, + params: [ + { type: 'varchar', value: schema }, + { type: 'varchar', value: tablePattern.tablePattern } + ] + }); + + tableRows = pgwire.pgwireRows(result); + } + + let result: storage.SourceTable[] = []; + + for (let row of tableRows) { + const name = row.table_name as string; + if (typeof row.relid != 'bigint') { + throw new ReplicationAssertionError(`Missing relid for ${name}`); + } + const relid = Number(row.relid as bigint); + + if (prefix && !name.startsWith(prefix)) { + continue; + } + + const rs = await db.query({ + statement: `SELECT 1 FROM pg_publication_tables WHERE pubname = $1 AND schemaname = $2 AND tablename = $3`, + params: [ + { type: 'varchar', value: PUBLICATION_NAME }, + { type: 'varchar', value: tablePattern.schema }, + { type: 'varchar', value: name } + ] + }); + if (rs.rows.length == 0) { + this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`); + continue; + } + + try { + const result = await checkTableRls(db, relid); + if (!result.canRead) { + // We log the message, then continue anyway, since the check does not cover all cases. + this.logger.warn(result.message!); + } + } catch (e) { + // It's possible that we just don't have permission to access pg_roles - log the error and continue. + this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e); + } + + const cresult = await getReplicationIdentityColumns(db, relid); + + const columnTypes = (JSON.parse(row.column_types) as string[]).map((e) => Number(e)); + const table = await this.handleRelation({ + batch, + descriptor: { + name, + schema, + objectId: relid, + replicaIdColumns: cresult.replicationColumns + } as SourceEntityDescriptor, + referencedTypeIds: columnTypes + }); + + result.push(table); + } + return result; + } + + async checkSlot(): Promise { + await checkSourceConfiguration(this.connections.pool, PUBLICATION_NAME); + await ensureStorageCompatibility(this.connections.pool, this.storage.factory); + + const slotName = this.slot_name; + + const status = await this.storage.getStatus(); + const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null; + if (snapshotDone) { + // Snapshot is done, but we still need to check the replication slot status + this.logger.info(`Initial replication already done`); + } + + // Check if replication slot exists + const slot = pgwire.pgwireRows( + await this.connections.pool.query({ + // We specifically want wal_status and invalidation_reason, but it's not available on older versions, + // so we just query *. + statement: 'SELECT * FROM pg_replication_slots WHERE slot_name = $1', + params: [{ type: 'varchar', value: slotName }] + }) + )[0]; + + // Previously we also used pg_catalog.pg_logical_slot_peek_binary_changes to confirm that we can query the slot. + // However, there were some edge cases where the query times out, repeating the query, ultimately + // causing high load on the source database and never recovering automatically. + // We now instead jump straight to replication if the wal_status is not "lost", rather detecting those + // errors during streaming replication, which is a little more robust. + + // We can have: + // 1. needsInitialSync: true, lost slot -> MissingReplicationSlotError (starts new sync rules version). + // Theoretically we could handle this the same as (2). + // 2. needsInitialSync: true, no slot -> create new slot + // 3. needsInitialSync: true, valid slot -> resume initial sync + // 4. needsInitialSync: false, lost slot -> MissingReplicationSlotError (starts new sync rules version) + // 5. needsInitialSync: false, no slot -> MissingReplicationSlotError (starts new sync rules version) + // 6. needsInitialSync: false, valid slot -> resume streaming replication + // The main advantage of MissingReplicationSlotError are: + // 1. If there was a complete snapshot already (cases 4/5), users can still sync from that snapshot while + // we do the reprocessing under a new slot name. + // 2. If there was a partial snapshot (case 1), we can start with the new slot faster by not waiting for + // the partial data to be cleared. + if (slot != null) { + // This checks that the slot is still valid + + // wal_status is present in postgres 13+ + // invalidation_reason is present in postgres 17+ + const lost = slot.wal_status == 'lost'; + if (lost) { + // Case 1 / 4 + throw new MissingReplicationSlotError( + `Replication slot ${slotName} is not valid anymore. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}` + ); + } + // Case 3 / 6 + return { + needsInitialSync: !snapshotDone, + needsNewSlot: false + }; + } else { + if (snapshotDone) { + // Case 5 + // This will create a new slot, while keeping the current sync rules active + throw new MissingReplicationSlotError(`Replication slot ${slotName} is missing`); + } + // Case 2 + // This will clear data (if any) and re-create the same slot + return { needsInitialSync: true, needsNewSlot: true }; + } + } + + async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise { + const results = await db.query({ + statement: `SELECT reltuples::bigint AS estimate + FROM pg_class + WHERE oid = $1::regclass`, + params: [{ value: table.qualifiedName, type: 'varchar' }] + }); + const row = results.rows[0]; + if ((row?.[0] ?? -1n) == -1n) { + return -1; + } else { + return Number(row[0]); + } + } + + public async setupSlot(db: pgwire.PgConnection, status: InitResult) { + // If anything here errors, the entire replication process is aborted, + // and all connections are closed, including this one. + const slotName = this.slot_name; + + if (status.needsNewSlot) { + // This happens when there is no existing replication slot, or if the + // existing one is unhealthy. + // In those cases, we have to start replication from scratch. + // If there is an existing healthy slot, we can skip this and continue + // initial replication where we left off. + await this.storage.clear({ signal: this.abortSignal }); + + await db.query({ + statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1', + params: [{ type: 'varchar', value: slotName }] + }); + + // We use the replication connection here, not a pool. + // The replication slot must be created before we start snapshotting tables. + const initReplicationConnection = await this.connections.replicationConnection(); + try { + await initReplicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`); + } finally { + await initReplicationConnection.end(); + } + + this.logger.info(`Created replication slot ${slotName}`); + } + } + + async replicateTable(table: SourceTable) { + const db = await this.connections.snapshotConnection(); + try { + const flushResults = await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }, + async (batch) => { + await this.snapshotTableInTx(batch, db, table); + // This commit ensures we set keepalive_op. + // It may be better if that is automatically set when flushing. + await batch.commit(ZERO_LSN); + } + ); + this.logger.info(`Flushed snapshot at ${flushResults?.flushed_op}`); + } finally { + await db.end(); + } + } + + async waitForInitialSnapshot() { + await this.initialSnapshotDone.promise; + } + + async replicationLoop() { + try { + if (this.queue.size == 0) { + // Special case where we start with no tables to snapshot + await this.markSnapshotDone(); + } + while (!this.abortSignal.aborted) { + const table = this.queue.values().next().value; + if (table == null) { + this.initialSnapshotDone.resolve(); + await timers.setTimeout(500, { signal: this.abortSignal }); + continue; + } + + await this.replicateTable(table); + this.queue.delete(table); + if (this.queue.size == 0) { + await this.markSnapshotDone(); + } + } + throw new ReplicationAbortedError(`Replication loop aborted`, this.abortSignal.reason); + } catch (e) { + // If initial snapshot already completed, this has no effect + this.initialSnapshotDone.reject(e); + throw e; + } + } + + private async markSnapshotDone() { + const db = await this.connections.snapshotConnection(); + await using _ = { [Symbol.asyncDispose]: () => db.end() }; + + const flushResults = await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }, + async (batch) => { + const rs = await db.query(`select pg_current_wal_lsn() as lsn`); + const globalLsnNotBefore = rs.rows[0][0]; + await batch.markAllSnapshotDone(globalLsnNotBefore); + } + ); + /** + * Send a keepalive message after initial replication. + * In some edge cases we wait for a keepalive after the initial snapshot. + * If we don't explicitly check the contents of keepalive messages then a keepalive is detected + * rather quickly after initial replication - perhaps due to other WAL events. + * If we do explicitly check the contents of messages, we need an actual keepalive payload in order + * to advance the active sync rules LSN. + */ + await sendKeepAlive(db); + + const lastOp = flushResults?.flushed_op; + if (lastOp != null) { + // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. + // TODO: only run this after initial replication, not after each table. + await this.storage.populatePersistentChecksumCache({ + // No checkpoint yet, but we do have the opId. + maxOpId: lastOp, + signal: this.abortSignal + }); + } + } + + /** + * Start initial replication. + * + * If (partial) replication was done before on this slot, this clears the state + * and starts again from scratch. + */ + async queueSnapshotTables(db: pgwire.PgConnection) { + const sourceTables = this.sync_rules.getSourceTables(); + + await this.storage.startBatch( + { + logger: this.logger, + zeroLSN: ZERO_LSN, + defaultSchema: POSTGRES_DEFAULT_SCHEMA, + storeCurrentData: true, + skipExistingRows: true + }, + async (batch) => { + for (let tablePattern of sourceTables) { + const tables = await this.getQualifiedTableNames(batch, db, tablePattern); + // Pre-get counts + for (let table of tables) { + if (table.snapshotComplete) { + this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); + continue; + } + const count = await this.estimatedCountNumber(db, table); + table = await batch.updateTableProgress(table, { totalEstimatedCount: count }); + this.relationCache.update(table); + + this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + + this.queue.add(table); + } + } + } + ); + } + + static *getQueryData(results: Iterable): Generator { + for (let row of results) { + yield toSyncRulesRow(row); + } + } + + public async queueSnapshot(batch: storage.BucketStorageBatch, table: storage.SourceTable) { + await batch.markTableSnapshotRequired(table); + this.queue.add(table); + } + + public async snapshotTableInTx( + batch: storage.BucketStorageBatch, + db: pgwire.PgConnection, + table: storage.SourceTable, + limited?: PrimaryKeyValue[] + ): Promise { + // Note: We use the default "Read Committed" isolation level here, not snapshot isolation. + // The data may change during the transaction, but that is compensated for in the streaming + // replication afterwards. + await db.query('BEGIN'); + try { + let tableLsnNotBefore: string; + await this.snapshotTable(batch, db, table, limited); + + // Get the current LSN. + // The data will only be consistent once incremental replication has passed that point. + // We have to get this LSN _after_ we have finished the table snapshot. + // + // There are basically two relevant LSNs here: + // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side, + // but it is implicitly recorded in the replication slot. + // B: The LSN after the table snapshot is complete, which is what we get here. + // When we do the snapshot queries, the data that we get back for each chunk could match the state + // anywhere between A and B. To actually have a consistent state on our side, we need to: + // 1. Complete the snapshot. + // 2. Wait until logical replication has caught up with all the change between A and B. + // Calling `markSnapshotDone(LSN B)` covers that. + const rs = await db.query(`select pg_current_wal_lsn() as lsn`); + tableLsnNotBefore = rs.rows[0][0]; + // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. + await db.query('COMMIT'); + this.logger.info(`Snapshot complete for table ${table.qualifiedName}, resume at ${tableLsnNotBefore}`); + const [resultTable] = await batch.markTableSnapshotDone([table], tableLsnNotBefore); + this.relationCache.update(resultTable); + return resultTable; + } catch (e) { + await db.query('ROLLBACK'); + throw e; + } + } + + private async snapshotTable( + batch: storage.BucketStorageBatch, + db: pgwire.PgConnection, + table: storage.SourceTable, + limited?: PrimaryKeyValue[] + ) { + let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount; + let at = table.snapshotStatus?.replicatedCount ?? 0; + let lastCountTime = 0; + let q: SnapshotQuery; + // We do streaming on two levels: + // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time. + // 2. Fine level: Stream chunks from each fetch call. + if (limited) { + q = new IdSnapshotQuery(db, table, limited); + } else if (ChunkedSnapshotQuery.supports(table)) { + // Single primary key - we can use the primary key for chunking + const orderByKey = table.replicaIdColumns[0]; + q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null); + if (table.snapshotStatus?.lastKey != null) { + this.logger.info( + `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}` + ); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`); + } + } else { + // Fallback case - query the entire table + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`); + q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength); + at = 0; + } + await q.initialize(); + + let columns: { i: number; name: string }[] = []; + let columnMap: Record = {}; + let hasRemainingData = true; + while (hasRemainingData) { + // Fetch 10k at a time. + // The balance here is between latency overhead per FETCH call, + // and not spending too much time on each FETCH call. + // We aim for a couple of seconds on each FETCH call. + const cursor = q.nextChunk(); + hasRemainingData = false; + // pgwire streams rows in chunks. + // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically. + // There are typically 100-200 rows per chunk. + for await (let chunk of cursor) { + if (chunk.tag == 'RowDescription') { + // We get a RowDescription for each FETCH call, but they should + // all be the same. + let i = 0; + columns = chunk.payload.map((c) => { + return { i: i++, name: c.name }; + }); + for (let column of chunk.payload) { + columnMap[column.name] = column.typeOid; + } + continue; + } + + const rows = chunk.rows.map((row) => { + let q: DatabaseInputRow = {}; + for (let c of columns) { + q[c.name] = row[c.i]; + } + return q; + }); + if (rows.length > 0) { + hasRemainingData = true; + } + + for (const inputRecord of PostgresSnapshotter.getQueryData(rows)) { + const record = this.syncRulesRecord(this.connections.types.constructRowRecord(columnMap, inputRecord)); + // This auto-flushes when the batch reaches its size limit + await batch.save({ + tag: storage.SaveOperationTag.INSERT, + sourceTable: table, + before: undefined, + beforeReplicaId: undefined, + after: record, + afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns) + }); + } + + at += rows.length; + this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length); + + this.touch(); + } + + // Important: flush before marking progress + await batch.flush(); + if (limited == null) { + let lastKey: Uint8Array | undefined; + if (q instanceof ChunkedSnapshotQuery) { + lastKey = q.getLastKeySerialized(); + } + if (lastCountTime < performance.now() - 10 * 60 * 1000) { + // Even though we're doing the snapshot inside a transaction, the transaction uses + // the default "Read Committed" isolation level. This means we can get new data + // within the transaction, so we re-estimate the count every 10 minutes when replicating + // large tables. + totalEstimatedCount = await this.estimatedCountNumber(db, table); + lastCountTime = performance.now(); + } + table = await batch.updateTableProgress(table, { + lastKey: lastKey, + replicatedCount: at, + totalEstimatedCount: totalEstimatedCount + }); + this.relationCache.update(table); + + this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); + } else { + this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`); + } + + if (this.abortSignal.aborted) { + // We only abort after flushing + throw new ReplicationAbortedError(`Table snapshot interrupted`, this.abortSignal.reason); + } + } + } + + async handleRelation(options: { + batch: storage.BucketStorageBatch; + descriptor: SourceEntityDescriptor; + referencedTypeIds: number[]; + }) { + const { batch, descriptor, referencedTypeIds } = options; + + if (!descriptor.objectId && typeof descriptor.objectId != 'number') { + throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`); + } + const result = await this.storage.resolveTable({ + group_id: this.group_id, + connection_id: this.connection_id, + connection_tag: this.connections.connectionTag, + entity_descriptor: descriptor, + sync_rules: this.sync_rules + }); + this.relationCache.update(result.table); + + // Drop conflicting tables. This includes for example renamed tables. + await batch.drop(result.dropTables); + + // Ensure we have a description for custom types referenced in the table. + await this.connections.types.fetchTypes(referencedTypeIds); + + return result.table; + } + + private touch() { + container.probes.touch().catch((e) => { + this.logger.error(`Error touching probe`, e); + }); + } + + private syncRulesRecord(row: SqliteInputRow): SqliteRow; + private syncRulesRecord(row: SqliteInputRow | undefined): SqliteRow | undefined; + + private syncRulesRecord(row: SqliteInputRow | undefined): SqliteRow | undefined { + if (row == null) { + return undefined; + } + return this.sync_rules.applyRowContext(row); + } +} diff --git a/modules/module-postgres/src/replication/WalStream.ts b/modules/module-postgres/src/replication/WalStream.ts index a68cf820d..9e3426d6a 100644 --- a/modules/module-postgres/src/replication/WalStream.ts +++ b/modules/module-postgres/src/replication/WalStream.ts @@ -1,9 +1,7 @@ import * as lib_postgres from '@powersync/lib-service-postgres'; import { container, - DatabaseConnectionError, logger as defaultLogger, - ErrorCode, Logger, ReplicationAbortedError, ReplicationAssertionError @@ -22,28 +20,19 @@ import * as pgwire from '@powersync/service-jpgwire'; import { applyValueContext, CompatibilityContext, - DatabaseInputRow, SqliteInputRow, SqliteInputValue, SqliteRow, SqlSyncRules, - TablePattern, - ToastableSqliteRow, - toSyncRulesRow + ToastableSqliteRow } from '@powersync/service-sync-rules'; import { ReplicationMetric } from '@powersync/service-types'; import { PgManager } from './PgManager.js'; import { getPgOutputRelation, getRelId, referencedColumnTypeIds } from './PgRelation.js'; -import { checkSourceConfiguration, checkTableRls, getReplicationIdentityColumns } from './replication-utils.js'; -import { - ChunkedSnapshotQuery, - IdSnapshotQuery, - MissingRow, - PrimaryKeyValue, - SimpleSnapshotQuery, - SnapshotQuery -} from './SnapshotQuery.js'; +import { PostgresSnapshotter } from './PostgresSnapshotter.js'; +import { ensureStorageCompatibility } from './replication-utils.js'; +import { IdSnapshotQuery, MissingRow, PrimaryKeyValue } from './SnapshotQuery.js'; export interface WalStreamOptions { logger?: Logger; @@ -62,13 +51,6 @@ export interface WalStreamOptions { snapshotChunkLength?: number; } -interface InitResult { - /** True if initial snapshot is not yet done. */ - needsInitialSync: boolean; - /** True if snapshot must be started from scratch with a new slot. */ - needsNewSlot: boolean; -} - export const ZERO_LSN = '00000000/00000000'; export const PUBLICATION_NAME = 'powersync'; export const POSTGRES_DEFAULT_SCHEMA = 'public'; @@ -120,7 +102,11 @@ export class WalStream { private connections: PgManager; - private abort_signal: AbortSignal; + private abortController = new AbortController(); + private abortSignal: AbortSignal = this.abortController.signal; + + private initPromise: Promise | null = null; + private snapshotter: PostgresSnapshotter; private relationCache = new RelationCache((relation: number | SourceTable) => { if (typeof relation == 'number') { @@ -131,8 +117,6 @@ export class WalStream { private startedStreaming = false; - private snapshotChunkLength: number; - /** * Time of the oldest uncommitted change, according to the source db. * This is used to determine the replication lag. @@ -144,7 +128,7 @@ export class WalStream { */ private isStartingReplication = true; - constructor(options: WalStreamOptions) { + constructor(private options: WalStreamOptions) { this.logger = options.logger ?? defaultLogger; this.storage = options.storage; this.metrics = options.metrics; @@ -152,10 +136,17 @@ export class WalStream { this.group_id = options.storage.group_id; this.slot_name = options.storage.slot_name; this.connections = options.connections; - this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000; - this.abort_signal = options.abort_signal; - this.abort_signal.addEventListener( + // We wrap in our own abort controller so we can trigger abort internally. + options.abort_signal.addEventListener('abort', () => { + this.abortController.abort(options.abort_signal.reason); + }); + if (options.abort_signal.aborted) { + this.abortController.abort(options.abort_signal.reason); + } + + this.snapshotter = new PostgresSnapshotter({ ...options, abort_signal: this.abortSignal }); + this.abortSignal.addEventListener( 'abort', () => { if (this.startedStreaming) { @@ -177,463 +168,7 @@ export class WalStream { } get stopped() { - return this.abort_signal.aborted; - } - - async getQualifiedTableNames( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - tablePattern: TablePattern - ): Promise { - const schema = tablePattern.schema; - if (tablePattern.connectionTag != this.connections.connectionTag) { - return []; - } - - let tableRows: any[]; - const prefix = tablePattern.isWildcard ? tablePattern.tablePrefix : undefined; - - { - let query = ` - SELECT - c.oid AS relid, - c.relname AS table_name, - (SELECT - json_agg(DISTINCT a.atttypid) - FROM pg_attribute a - WHERE a.attnum > 0 AND NOT a.attisdropped AND a.attrelid = c.oid) - AS column_types - FROM pg_class c - JOIN pg_namespace n ON n.oid = c.relnamespace - WHERE n.nspname = $1 - AND c.relkind = 'r'`; - - if (tablePattern.isWildcard) { - query += ' AND c.relname LIKE $2'; - } else { - query += ' AND c.relname = $2'; - } - - const result = await db.query({ - statement: query, - params: [ - { type: 'varchar', value: schema }, - { type: 'varchar', value: tablePattern.tablePattern } - ] - }); - - tableRows = pgwire.pgwireRows(result); - } - - let result: storage.SourceTable[] = []; - - for (let row of tableRows) { - const name = row.table_name as string; - if (typeof row.relid != 'bigint') { - throw new ReplicationAssertionError(`Missing relid for ${name}`); - } - const relid = Number(row.relid as bigint); - - if (prefix && !name.startsWith(prefix)) { - continue; - } - - const rs = await db.query({ - statement: `SELECT 1 FROM pg_publication_tables WHERE pubname = $1 AND schemaname = $2 AND tablename = $3`, - params: [ - { type: 'varchar', value: PUBLICATION_NAME }, - { type: 'varchar', value: tablePattern.schema }, - { type: 'varchar', value: name } - ] - }); - if (rs.rows.length == 0) { - this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`); - continue; - } - - try { - const result = await checkTableRls(db, relid); - if (!result.canRead) { - // We log the message, then continue anyway, since the check does not cover all cases. - this.logger.warn(result.message!); - } - } catch (e) { - // It's possible that we just don't have permission to access pg_roles - log the error and continue. - this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e); - } - - const cresult = await getReplicationIdentityColumns(db, relid); - - const columnTypes = (JSON.parse(row.column_types) as string[]).map((e) => Number(e)); - const table = await this.handleRelation({ - batch, - descriptor: { - name, - schema, - objectId: relid, - replicaIdColumns: cresult.replicationColumns - } as SourceEntityDescriptor, - snapshot: false, - referencedTypeIds: columnTypes - }); - - result.push(table); - } - return result; - } - - async initSlot(): Promise { - await checkSourceConfiguration(this.connections.pool, PUBLICATION_NAME); - await this.ensureStorageCompatibility(); - - const slotName = this.slot_name; - - const status = await this.storage.getStatus(); - const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null; - if (snapshotDone) { - // Snapshot is done, but we still need to check the replication slot status - this.logger.info(`Initial replication already done`); - } - - // Check if replication slot exists - const slot = pgwire.pgwireRows( - await this.connections.pool.query({ - // We specifically want wal_status and invalidation_reason, but it's not available on older versions, - // so we just query *. - statement: 'SELECT * FROM pg_replication_slots WHERE slot_name = $1', - params: [{ type: 'varchar', value: slotName }] - }) - )[0]; - - // Previously we also used pg_catalog.pg_logical_slot_peek_binary_changes to confirm that we can query the slot. - // However, there were some edge cases where the query times out, repeating the query, ultimately - // causing high load on the source database and never recovering automatically. - // We now instead jump straight to replication if the wal_status is not "lost", rather detecting those - // errors during streaming replication, which is a little more robust. - - // We can have: - // 1. needsInitialSync: true, lost slot -> MissingReplicationSlotError (starts new sync rules version). - // Theoretically we could handle this the same as (2). - // 2. needsInitialSync: true, no slot -> create new slot - // 3. needsInitialSync: true, valid slot -> resume initial sync - // 4. needsInitialSync: false, lost slot -> MissingReplicationSlotError (starts new sync rules version) - // 5. needsInitialSync: false, no slot -> MissingReplicationSlotError (starts new sync rules version) - // 6. needsInitialSync: false, valid slot -> resume streaming replication - // The main advantage of MissingReplicationSlotError are: - // 1. If there was a complete snapshot already (cases 4/5), users can still sync from that snapshot while - // we do the reprocessing under a new slot name. - // 2. If there was a partial snapshot (case 1), we can start with the new slot faster by not waiting for - // the partial data to be cleared. - if (slot != null) { - // This checks that the slot is still valid - - // wal_status is present in postgres 13+ - // invalidation_reason is present in postgres 17+ - const lost = slot.wal_status == 'lost'; - if (lost) { - // Case 1 / 4 - throw new MissingReplicationSlotError( - `Replication slot ${slotName} is not valid anymore. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}` - ); - } - // Case 3 / 6 - return { - needsInitialSync: !snapshotDone, - needsNewSlot: false - }; - } else { - if (snapshotDone) { - // Case 5 - // This will create a new slot, while keeping the current sync rules active - throw new MissingReplicationSlotError(`Replication slot ${slotName} is missing`); - } - // Case 2 - // This will clear data (if any) and re-create the same slot - return { needsInitialSync: true, needsNewSlot: true }; - } - } - - async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise { - const results = await db.query({ - statement: `SELECT reltuples::bigint AS estimate -FROM pg_class -WHERE oid = $1::regclass`, - params: [{ value: table.qualifiedName, type: 'varchar' }] - }); - const row = results.rows[0]; - if ((row?.[0] ?? -1n) == -1n) { - return -1; - } else { - return Number(row[0]); - } - } - - /** - * Start initial replication. - * - * If (partial) replication was done before on this slot, this clears the state - * and starts again from scratch. - */ - async startInitialReplication(replicationConnection: pgwire.PgConnection, status: InitResult) { - // If anything here errors, the entire replication process is aborted, - // and all connections are closed, including this one. - const db = await this.connections.snapshotConnection(); - - const slotName = this.slot_name; - - if (status.needsNewSlot) { - // This happens when there is no existing replication slot, or if the - // existing one is unhealthy. - // In those cases, we have to start replication from scratch. - // If there is an existing healthy slot, we can skip this and continue - // initial replication where we left off. - await this.storage.clear({ signal: this.abort_signal }); - - await db.query({ - statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1', - params: [{ type: 'varchar', value: slotName }] - }); - - // We use the replication connection here, not a pool. - // The replication slot must be created before we start snapshotting tables. - await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`); - - this.logger.info(`Created replication slot ${slotName}`); - } - - await this.initialReplication(db); - } - - async initialReplication(db: pgwire.PgConnection) { - const sourceTables = this.sync_rules.getSourceTables(); - const flushResults = await this.storage.startBatch( - { - logger: this.logger, - zeroLSN: ZERO_LSN, - defaultSchema: POSTGRES_DEFAULT_SCHEMA, - storeCurrentData: true, - skipExistingRows: true - }, - async (batch) => { - let tablesWithStatus: SourceTable[] = []; - for (let tablePattern of sourceTables) { - const tables = await this.getQualifiedTableNames(batch, db, tablePattern); - // Pre-get counts - for (let table of tables) { - if (table.snapshotComplete) { - this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`); - continue; - } - const count = await this.estimatedCountNumber(db, table); - table = await batch.updateTableProgress(table, { totalEstimatedCount: count }); - this.relationCache.update(table); - tablesWithStatus.push(table); - - this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - } - } - - for (let table of tablesWithStatus) { - await this.snapshotTableInTx(batch, db, table); - this.touch(); - } - - // Always commit the initial snapshot at zero. - // This makes sure we don't skip any changes applied before starting this snapshot, - // in the case of snapshot retries. - // We could alternatively commit at the replication slot LSN. - await batch.commit(ZERO_LSN); - } - ); - /** - * Send a keepalive message after initial replication. - * In some edge cases we wait for a keepalive after the initial snapshot. - * If we don't explicitly check the contents of keepalive messages then a keepalive is detected - * rather quickly after initial replication - perhaps due to other WAL events. - * If we do explicitly check the contents of messages, we need an actual keepalive payload in order - * to advance the active sync rules LSN. - */ - await sendKeepAlive(db); - - const lastOp = flushResults?.flushed_op; - if (lastOp != null) { - // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules. - await this.storage.populatePersistentChecksumCache({ - // No checkpoint yet, but we do have the opId. - maxOpId: lastOp, - signal: this.abort_signal - }); - } - } - - static *getQueryData(results: Iterable): Generator { - for (let row of results) { - yield toSyncRulesRow(row); - } - } - private async snapshotTableInTx( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - table: storage.SourceTable, - limited?: PrimaryKeyValue[] - ): Promise { - // Note: We use the default "Read Committed" isolation level here, not snapshot isolation. - // The data may change during the transaction, but that is compensated for in the streaming - // replication afterwards. - await db.query('BEGIN'); - try { - let tableLsnNotBefore: string; - await this.snapshotTable(batch, db, table, limited); - - // Get the current LSN. - // The data will only be consistent once incremental replication has passed that point. - // We have to get this LSN _after_ we have finished the table snapshot. - // - // There are basically two relevant LSNs here: - // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side, - // but it is implicitly recorded in the replication slot. - // B: The LSN after the table snapshot is complete, which is what we get here. - // When we do the snapshot queries, the data that we get back for each chunk could match the state - // anywhere between A and B. To actually have a consistent state on our side, we need to: - // 1. Complete the snapshot. - // 2. Wait until logical replication has caught up with all the change between A and B. - // Calling `markSnapshotDone(LSN B)` covers that. - const rs = await db.query(`select pg_current_wal_lsn() as lsn`); - tableLsnNotBefore = rs.rows[0][0]; - // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction. - await db.query('COMMIT'); - const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore); - this.relationCache.update(resultTable); - return resultTable; - } catch (e) { - await db.query('ROLLBACK'); - throw e; - } - } - - private async snapshotTable( - batch: storage.BucketStorageBatch, - db: pgwire.PgConnection, - table: storage.SourceTable, - limited?: PrimaryKeyValue[] - ) { - let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount; - let at = table.snapshotStatus?.replicatedCount ?? 0; - let lastCountTime = 0; - let q: SnapshotQuery; - // We do streaming on two levels: - // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time. - // 2. Fine level: Stream chunks from each fetch call. - if (limited) { - q = new IdSnapshotQuery(db, table, limited); - } else if (ChunkedSnapshotQuery.supports(table)) { - // Single primary key - we can use the primary key for chunking - const orderByKey = table.replicaIdColumns[0]; - q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null); - if (table.snapshotStatus?.lastKey != null) { - this.logger.info( - `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}` - ); - } else { - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`); - } - } else { - // Fallback case - query the entire table - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`); - q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength); - at = 0; - } - await q.initialize(); - - let columns: { i: number; name: string }[] = []; - let columnMap: Record = {}; - let hasRemainingData = true; - while (hasRemainingData) { - // Fetch 10k at a time. - // The balance here is between latency overhead per FETCH call, - // and not spending too much time on each FETCH call. - // We aim for a couple of seconds on each FETCH call. - const cursor = q.nextChunk(); - hasRemainingData = false; - // pgwire streams rows in chunks. - // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically. - // There are typically 100-200 rows per chunk. - for await (let chunk of cursor) { - if (chunk.tag == 'RowDescription') { - // We get a RowDescription for each FETCH call, but they should - // all be the same. - let i = 0; - columns = chunk.payload.map((c) => { - return { i: i++, name: c.name }; - }); - for (let column of chunk.payload) { - columnMap[column.name] = column.typeOid; - } - continue; - } - - const rows = chunk.rows.map((row) => { - let q: DatabaseInputRow = {}; - for (let c of columns) { - q[c.name] = row[c.i]; - } - return q; - }); - if (rows.length > 0) { - hasRemainingData = true; - } - - for (const inputRecord of WalStream.getQueryData(rows)) { - const record = this.syncRulesRecord(this.connections.types.constructRowRecord(columnMap, inputRecord)); - // This auto-flushes when the batch reaches its size limit - await batch.save({ - tag: storage.SaveOperationTag.INSERT, - sourceTable: table, - before: undefined, - beforeReplicaId: undefined, - after: record, - afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns) - }); - } - - at += rows.length; - this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length); - - this.touch(); - } - - // Important: flush before marking progress - await batch.flush(); - if (limited == null) { - let lastKey: Uint8Array | undefined; - if (q instanceof ChunkedSnapshotQuery) { - lastKey = q.getLastKeySerialized(); - } - if (lastCountTime < performance.now() - 10 * 60 * 1000) { - // Even though we're doing the snapshot inside a transaction, the transaction uses - // the default "Read Committed" isolation level. This means we can get new data - // within the transaction, so we re-estimate the count every 10 minutes when replicating - // large tables. - totalEstimatedCount = await this.estimatedCountNumber(db, table); - lastCountTime = performance.now(); - } - table = await batch.updateTableProgress(table, { - lastKey: lastKey, - replicatedCount: at, - totalEstimatedCount: totalEstimatedCount - }); - this.relationCache.update(table); - - this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`); - } else { - this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`); - } - - if (this.abort_signal.aborted) { - // We only abort after flushing - throw new ReplicationAbortedError(`Initial replication interrupted`); - } - } + return this.abortSignal.aborted; } async handleRelation(options: { @@ -657,7 +192,10 @@ WHERE oid = $1::regclass`, this.relationCache.update(result.table); // Drop conflicting tables. This includes for example renamed tables. - await batch.drop(result.dropTables); + if (result.dropTables.length > 0) { + this.logger.info(`Dropping conflicting tables: ${result.dropTables.map((t) => t.qualifiedName).join(', ')}`); + await batch.drop(result.dropTables); + } // Ensure we have a description for custom types referenced in the table. await this.connections.types.fetchTypes(referencedTypeIds); @@ -669,22 +207,8 @@ WHERE oid = $1::regclass`, const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny; if (shouldSnapshot) { - // Truncate this table, in case a previous snapshot was interrupted. - await batch.truncate([result.table]); - - // Start the snapshot inside a transaction. - // We use a dedicated connection for this. - const db = await this.connections.snapshotConnection(); - try { - const table = await this.snapshotTableInTx(batch, db, result.table); - // After the table snapshot, we wait for replication to catch up. - // To make sure there is actually something to replicate, we send a keepalive - // message. - await sendKeepAlive(db); - return table; - } finally { - await db.end(); - } + this.logger.info(`Queuing snapshot for new table ${result.table.qualifiedName}`); + await this.snapshotter.queueSnapshot(batch, result.table); } return result.table; @@ -711,7 +235,7 @@ WHERE oid = $1::regclass`, try { for (let rows of byTable.values()) { const table = rows[0].table; - await this.snapshotTableInTx( + await this.snapshotter.snapshotTableInTx( batch, db, table, @@ -813,57 +337,116 @@ WHERE oid = $1::regclass`, return null; } + /** + * Start replication loop, and continue until aborted or error. + */ async replicate() { + let streamPromise: Promise | null = null; + let loopPromise: Promise | null = null; try { - // If anything errors here, the entire replication process is halted, and - // all connections automatically closed, including this one. - const initReplicationConnection = await this.connections.replicationConnection(); - await this.initReplication(initReplicationConnection); - await initReplicationConnection.end(); - - // At this point, the above connection has often timed out, so we start a new one - const streamReplicationConnection = await this.connections.replicationConnection(); - await this.streamChanges(streamReplicationConnection); - await streamReplicationConnection.end(); + this.initPromise = this.initReplication(); + await this.initPromise; + // These Promises are both expected to run until aborted or error. + streamPromise = this.streamChanges() + .then(() => { + throw new ReplicationAssertionError(`Replication stream exited unexpectedly`); + }) + .catch((e) => { + this.abortController.abort(e); + throw e; + }); + loopPromise = this.snapshotter + .replicationLoop() + .then(() => { + throw new ReplicationAssertionError(`Replication snapshotter exited unexpectedly`); + }) + .catch((e) => { + this.abortController.abort(e); + throw e; + }); + const results = await Promise.allSettled([loopPromise, streamPromise]); + // First, prioritize non-aborted errors + for (let result of results) { + if (result.status == 'rejected' && !(result.reason instanceof ReplicationAbortedError)) { + throw result.reason; + } + } + // Then include aborted errors + for (let result of results) { + if (result.status == 'rejected') { + throw result.reason; + } + } + + // If we get here, both Promises completed successfully, which is unexpected. + throw new ReplicationAssertionError(`Replication loop exited unexpectedly`); } catch (e) { await this.storage.reportError(e); throw e; + } finally { + // Just to make sure + this.abortController.abort(); } } - async initReplication(replicationConnection: pgwire.PgConnection) { - const result = await this.initSlot(); - if (result.needsInitialSync) { - await this.startInitialReplication(replicationConnection, result); + /** + * For tests: Wait until the initial snapshot is complete. + */ + public async waitForInitialSnapshot() { + if (this.initPromise == null) { + throw new ReplicationAssertionError('replicate() must be called before waitForInitialSnapshot()'); } + await this.initPromise; + + await this.snapshotter.waitForInitialSnapshot(); } - async streamChanges(replicationConnection: pgwire.PgConnection) { + /** + * Initialize replication. + * Start replication loop, and continue until aborted, error or initial snapshot completed. + */ + private async initReplication() { + const result = await this.snapshotter.checkSlot(); + const db = await this.connections.snapshotConnection(); + try { + await this.snapshotter.setupSlot(db, result); + if (result.needsInitialSync) { + await this.snapshotter.queueSnapshotTables(db); + } + } finally { + await db.end(); + } + } + + private async streamChanges() { + const streamReplicationConnection = await this.connections.replicationConnection(); try { - await this.streamChangesInternal(replicationConnection); + await this.streamChangesInternal(streamReplicationConnection); } catch (e) { if (isReplicationSlotInvalidError(e)) { throw new MissingReplicationSlotError(e.message, e); } throw e; + } finally { + await streamReplicationConnection.end(); } } private async streamChangesInternal(replicationConnection: pgwire.PgConnection) { // When changing any logic here, check /docs/wal-lsns.md. - const { createEmptyCheckpoints } = await this.ensureStorageCompatibility(); + + // Viewing the contents of logical messages emitted with `pg_logical_emit_message` + // is only supported on Postgres >= 14.0. + // https://www.postgresql.org/docs/14/protocol-logical-replication.html + const { createEmptyCheckpoints, exposesLogicalMessages } = await ensureStorageCompatibility( + this.connections.pool, + this.storage.factory + ); const replicationOptions: Record = { proto_version: '1', publication_names: PUBLICATION_NAME }; - - /** - * Viewing the contents of logical messages emitted with `pg_logical_emit_message` - * is only supported on Postgres >= 14.0. - * https://www.postgresql.org/docs/14/protocol-logical-replication.html - */ - const exposesLogicalMessages = await this.checkLogicalMessageSupport(); if (exposesLogicalMessages) { /** * Only add this option if the Postgres server supports it. @@ -924,7 +507,7 @@ WHERE oid = $1::regclass`, for await (const chunk of replicationStream.pgoutputDecode()) { this.touch(); - if (this.abort_signal.aborted) { + if (this.abortSignal.aborted) { break; } @@ -1019,6 +602,7 @@ WHERE oid = $1::regclass`, // Big caveat: This _must not_ be used to skip individual messages, since this LSN // may be in the middle of the next transaction. // It must only be used to associate checkpoints with LSNs. + const didCommit = await batch.keepalive(chunkLastLsn); if (didCommit) { this.oldestUncommittedChange = null; @@ -1036,6 +620,8 @@ WHERE oid = $1::regclass`, } } ); + + throw new ReplicationAbortedError(`Replication stream aborted`, this.abortSignal.reason); } async ack(lsn: string, replicationStream: pgwire.ReplicationStream) { @@ -1046,55 +632,6 @@ WHERE oid = $1::regclass`, replicationStream.ack(lsn); } - /** - * Ensures that the storage is compatible with the replication connection. - * @throws {DatabaseConnectionError} If the storage is not compatible with the replication connection. - */ - protected async ensureStorageCompatibility(): Promise { - const supportsLogicalMessages = await this.checkLogicalMessageSupport(); - - const storageIdentifier = await this.storage.factory.getSystemIdentifier(); - if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) { - return { - // Keep the same behaviour as before allowing Postgres storage. - createEmptyCheckpoints: true, - oldestUncommittedChange: null - }; - } - - const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id); - /** - * Check if the same server is being used for both the sync bucket storage and the logical replication. - */ - const replicationIdentifier = await lib_postgres.utils.queryPostgresSystemIdentifier(this.connections.pool); - - if (!supportsLogicalMessages && replicationIdentifier.server_id == parsedStorageIdentifier.server_id) { - throw new DatabaseConnectionError( - ErrorCode.PSYNC_S1144, - `Separate Postgres servers are required for the replication source and sync bucket storage when using Postgres versions below 14.0.`, - new Error('Postgres version is below 14') - ); - } - - return { - /** - * Don't create empty checkpoints if the same Postgres database is used for the data source - * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops. - */ - createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name, - oldestUncommittedChange: null - }; - } - - /** - * Check if the replication connection Postgres server supports - * viewing the contents of logical replication messages. - */ - protected async checkLogicalMessageSupport() { - const version = await this.connections.getServerVersion(); - return version ? version.compareMain('14.0.0') >= 0 : false; - } - async getReplicationLagMillis(): Promise { if (this.oldestUncommittedChange == null) { if (this.isStartingReplication) { diff --git a/modules/module-postgres/src/replication/replication-utils.ts b/modules/module-postgres/src/replication/replication-utils.ts index 57ec4ba74..48f627222 100644 --- a/modules/module-postgres/src/replication/replication-utils.ts +++ b/modules/module-postgres/src/replication/replication-utils.ts @@ -1,11 +1,18 @@ import * as pgwire from '@powersync/service-jpgwire'; import * as lib_postgres from '@powersync/lib-service-postgres'; -import { ErrorCode, logger, ServiceAssertionError, ServiceError } from '@powersync/lib-services-framework'; -import { PatternResult, storage } from '@powersync/service-core'; +import { + DatabaseConnectionError, + ErrorCode, + logger, + ServiceAssertionError, + ServiceError +} from '@powersync/lib-services-framework'; +import { BucketStorageFactory, PatternResult, storage } from '@powersync/service-core'; import * as sync_rules from '@powersync/service-sync-rules'; import * as service_types from '@powersync/service-types'; import { ReplicationIdentity } from './PgRelation.js'; +import { getServerVersion } from '../utils/postgres_version.js'; export interface ReplicaIdentityResult { replicationColumns: storage.ColumnDescriptor[]; @@ -316,7 +323,7 @@ export async function getDebugTableInfo(options: GetDebugTableInfoOptions): Prom const id_columns = id_columns_result?.replicationColumns ?? []; const sourceTable = new storage.SourceTable({ - id: 0, + id: '', // not used connectionTag: connectionTag, objectId: relationId ?? 0, schema: schema, @@ -396,3 +403,57 @@ export async function cleanUpReplicationSlot(slotName: string, db: pgwire.PgClie params: [{ type: 'varchar', value: slotName }] }); } + +/** + * Ensures that the storage is compatible with the replication connection. + * @throws {DatabaseConnectionError} If the storage is not compatible with the replication connection. + */ +export async function ensureStorageCompatibility( + db: pgwire.PgClient, + factory: BucketStorageFactory +): Promise { + const supportsLogicalMessages = await checkLogicalMessageSupport(db); + + const storageIdentifier = await factory.getSystemIdentifier(); + if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) { + return { + // Keep the same behaviour as before allowing Postgres storage. + createEmptyCheckpoints: true, + oldestUncommittedChange: null, + exposesLogicalMessages: supportsLogicalMessages + }; + } + + const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id); + /** + * Check if the same server is being used for both the sync bucket storage and the logical replication. + */ + const replicationIdentifier = await lib_postgres.utils.queryPostgresSystemIdentifier(db); + + if (!supportsLogicalMessages && replicationIdentifier.server_id == parsedStorageIdentifier.server_id) { + throw new DatabaseConnectionError( + ErrorCode.PSYNC_S1144, + `Separate Postgres servers are required for the replication source and sync bucket storage when using Postgres versions below 14.0.`, + new Error('Postgres version is below 14') + ); + } + + return { + /** + * Don't create empty checkpoints if the same Postgres database is used for the data source + * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops. + */ + createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name, + oldestUncommittedChange: null, + exposesLogicalMessages: supportsLogicalMessages + }; +} + +/** + * Check if the replication connection Postgres server supports + * viewing the contents of logical replication messages. + */ +export async function checkLogicalMessageSupport(db: pgwire.PgClient) { + const version = await getServerVersion(db); + return version ? version.compareMain('14.0.0') >= 0 : false; +} diff --git a/modules/module-postgres/test/src/__snapshots__/schema_changes.test.ts.snap b/modules/module-postgres/test/src/__snapshots__/schema_changes.test.ts.snap index 4133150e4..fec809548 100644 --- a/modules/module-postgres/test/src/__snapshots__/schema_changes.test.ts.snap +++ b/modules/module-postgres/test/src/__snapshots__/schema_changes.test.ts.snap @@ -2,4 +2,4 @@ exports[`schema changes > mongodb storage > add to publication (not in sync rules) 1`] = `0`; -exports[`schema changes > postgres storage > add to publication (not in sync rules) 1`] = `16384`; +exports[`schema changes > postgres storage > add to publication (not in sync rules) 1`] = `24576`; diff --git a/modules/module-postgres/test/src/checkpoints.test.ts b/modules/module-postgres/test/src/checkpoints.test.ts index 6657a12e8..2b68d75e2 100644 --- a/modules/module-postgres/test/src/checkpoints.test.ts +++ b/modules/module-postgres/test/src/checkpoints.test.ts @@ -1,5 +1,5 @@ import { PostgresRouteAPIAdapter } from '@module/api/PostgresRouteAPIAdapter.js'; -import { checkpointUserId, createWriteCheckpoint, TestStorageFactory } from '@powersync/service-core'; +import { checkpointUserId, createWriteCheckpoint, TestStorageConfig } from '@powersync/service-core'; import { describe, test } from 'vitest'; import { describeWithStorage } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; @@ -15,7 +15,9 @@ describe('checkpoint tests', () => { describeWithStorage({}, checkpointTests); }); -const checkpointTests = (factory: TestStorageFactory) => { +const checkpointTests = (config: TestStorageConfig) => { + const { factory } = config; + test('write checkpoints', { timeout: 50_000 }, async () => { await using context = await WalStreamTestContext.open(factory); @@ -35,11 +37,9 @@ const checkpointTests = (factory: TestStorageFactory) => { await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`); - await context.replicateSnapshot(); - - context.startStreaming(); // Wait for a consistent checkpoint before we start. - await context.getCheckpoint(); + await context.initializeReplication(); + const storage = context.storage!; const controller = new AbortController(); diff --git a/modules/module-postgres/test/src/chunked_snapshots.test.ts b/modules/module-postgres/test/src/chunked_snapshots.test.ts index dffb8aa1e..44a095922 100644 --- a/modules/module-postgres/test/src/chunked_snapshots.test.ts +++ b/modules/module-postgres/test/src/chunked_snapshots.test.ts @@ -1,4 +1,4 @@ -import { reduceBucket, TestStorageFactory } from '@powersync/service-core'; +import { reduceBucket, TestStorageConfig, TestStorageFactory } from '@powersync/service-core'; import { METRICS_HELPER } from '@powersync/service-core-tests'; import { SqliteJsonValue } from '@powersync/service-sync-rules'; import * as crypto from 'node:crypto'; @@ -11,7 +11,9 @@ describe('chunked snapshots', () => { describeWithStorage({ timeout: 120_000 }, defineBatchTests); }); -function defineBatchTests(factory: TestStorageFactory) { +function defineBatchTests(config: TestStorageConfig) { + const { factory } = config; + // We need to test every supported type, since chunking could be quite sensitive to // how each specific type is handled. test('chunked snapshot edge case (int2)', async () => { @@ -142,7 +144,8 @@ function defineBatchTests(factory: TestStorageFactory) { await p; // 5. Logical replication picks up the UPDATE above, but it is missing the TOAST column. - context.startStreaming(); + // Note: logical replication now runs concurrently with the snapshot. + // TODO: re-check the test logic here. // 6. If all went well, the "resnapshot" process would take care of this. const data = await context.getBucketData('global[]', undefined, {}); diff --git a/modules/module-postgres/test/src/large_batch.test.ts b/modules/module-postgres/test/src/large_batch.test.ts index 777662f12..43a29ea28 100644 --- a/modules/module-postgres/test/src/large_batch.test.ts +++ b/modules/module-postgres/test/src/large_batch.test.ts @@ -6,8 +6,8 @@ import { describeWithStorage, TEST_CONNECTION_OPTIONS } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { - describeWithStorage({ timeout: 240_000 }, function (factory) { - defineBatchTests(factory); + describeWithStorage({ timeout: 240_000 }, function (config) { + defineBatchTests(config); }); }); @@ -16,7 +16,9 @@ const BASIC_SYNC_RULES = `bucket_definitions: data: - SELECT id, description, other FROM "test_data"`; -function defineBatchTests(factory: storage.TestStorageFactory) { +function defineBatchTests(config: storage.TestStorageConfig) { + const { factory } = config; + test('update large record', async () => { await using context = await WalStreamTestContext.open(factory); // This test generates a large transaction in MongoDB, despite the replicated data @@ -39,8 +41,6 @@ function defineBatchTests(factory: storage.TestStorageFactory) { const start = Date.now(); - context.startStreaming(); - const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); const duration = Date.now() - start; const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024); @@ -87,7 +87,6 @@ function defineBatchTests(factory: storage.TestStorageFactory) { const start = Date.now(); await context.replicateSnapshot(); - context.startStreaming(); const checkpoint = await context.getCheckpoint({ timeout: 100_000 }); const duration = Date.now() - start; @@ -139,8 +138,6 @@ function defineBatchTests(factory: storage.TestStorageFactory) { const start = Date.now(); - context.startStreaming(); - const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); const duration = Date.now() - start; const used = Math.round(process.memoryUsage().heapUsed / 1024 / 1024); @@ -226,8 +223,6 @@ function defineBatchTests(factory: storage.TestStorageFactory) { }); await context.replicateSnapshot(); - context.startStreaming(); - const checkpoint = await context.getCheckpoint({ timeout: 50_000 }); const checksum = await context.storage!.getChecksums(checkpoint, ['global[]']); expect(checksum.get('global[]')!.count).toEqual((numDocs + 2) * 4); diff --git a/modules/module-postgres/test/src/pg_test.test.ts b/modules/module-postgres/test/src/pg_test.test.ts index 6a3dbb140..a4fa4f7d0 100644 --- a/modules/module-postgres/test/src/pg_test.test.ts +++ b/modules/module-postgres/test/src/pg_test.test.ts @@ -12,6 +12,7 @@ import { clearTestDb, connectPgPool, connectPgWire, TEST_URI } from './util.js'; import { WalStream } from '@module/replication/WalStream.js'; import { PostgresTypeResolver } from '@module/types/resolver.js'; import { CustomTypeRegistry } from '@module/types/registry.js'; +import { PostgresSnapshotter } from '@module/replication/PostgresSnapshotter.js'; describe('pg data types', () => { async function setupTable(db: pgwire.PgClient) { @@ -303,7 +304,7 @@ VALUES(10, ARRAY['null']::TEXT[]); await insert(db); const transformed = [ - ...WalStream.getQueryData(pgwire.pgwireRows(await db.query(`SELECT * FROM test_data ORDER BY id`))) + ...PostgresSnapshotter.getQueryData(pgwire.pgwireRows(await db.query(`SELECT * FROM test_data ORDER BY id`))) ]; checkResults(transformed); @@ -322,7 +323,7 @@ VALUES(10, ARRAY['null']::TEXT[]); await insert(db); const transformed = [ - ...WalStream.getQueryData( + ...PostgresSnapshotter.getQueryData( pgwire.pgwireRows( await db.query({ statement: `SELECT * FROM test_data WHERE $1 ORDER BY id`, @@ -346,7 +347,9 @@ VALUES(10, ARRAY['null']::TEXT[]); await insertArrays(db); const transformed = [ - ...WalStream.getQueryData(pgwire.pgwireRows(await db.query(`SELECT * FROM test_data_arrays ORDER BY id`))) + ...PostgresSnapshotter.getQueryData( + pgwire.pgwireRows(await db.query(`SELECT * FROM test_data_arrays ORDER BY id`)) + ) ].map((e) => applyRowContext(e, CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY)); checkResultArrays(transformed); @@ -449,7 +452,7 @@ INSERT INTO test_data(id, time, timestamp, timestamptz) VALUES (1, '17:42:01.12' `); const [row] = [ - ...WalStream.getQueryData( + ...PostgresSnapshotter.getQueryData( pgwire.pgwireRows(await db.query(`SELECT time, timestamp, timestamptz FROM test_data`)) ) ]; diff --git a/modules/module-postgres/test/src/resuming_snapshots.test.ts b/modules/module-postgres/test/src/resuming_snapshots.test.ts index 01da33739..f10d85032 100644 --- a/modules/module-postgres/test/src/resuming_snapshots.test.ts +++ b/modules/module-postgres/test/src/resuming_snapshots.test.ts @@ -1,12 +1,11 @@ +import { TestStorageConfig } from '@powersync/service-core'; +import { METRICS_HELPER } from '@powersync/service-core-tests'; +import { ReplicationMetric } from '@powersync/service-types'; +import * as timers from 'node:timers/promises'; import { describe, expect, test } from 'vitest'; import { env } from './env.js'; import { describeWithStorage } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; -import { TestStorageFactory } from '@powersync/service-core'; -import { METRICS_HELPER } from '@powersync/service-core-tests'; -import { ReplicationMetric } from '@powersync/service-types'; -import * as timers from 'node:timers/promises'; -import { ReplicationAbortedError } from '@powersync/lib-services-framework'; describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { describeWithStorage({ timeout: 240_000 }, function (factory) { @@ -21,7 +20,7 @@ describe.skipIf(!(env.CI || env.SLOW_TESTS))('batch replication', function () { }); }); -async function testResumingReplication(factory: TestStorageFactory, stopAfter: number) { +async function testResumingReplication(config: TestStorageConfig, stopAfter: number) { // This tests interrupting and then resuming initial replication. // We interrupt replication after test_data1 has fully replicated, and // test_data2 has partially replicated. @@ -33,7 +32,9 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n // have been / have not been replicated at that point is not deterministic. // We do allow for some variation in the test results to account for this. - await using context = await WalStreamTestContext.open(factory, { walStreamOptions: { snapshotChunkLength: 1000 } }); + await using context = await WalStreamTestContext.open(config.factory, { + walStreamOptions: { snapshotChunkLength: 1000 } + }); await context.updateSyncRules(`bucket_definitions: global: @@ -74,15 +75,14 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n await context.dispose(); })(); // This confirms that initial replication was interrupted - const error = await p.catch((e) => e); - expect(error).toBeInstanceOf(ReplicationAbortedError); + await expect(p).rejects.toThrowError(); done = true; } finally { done = true; } // Bypass the usual "clear db on factory open" step. - await using context2 = await WalStreamTestContext.open(factory, { + await using context2 = await WalStreamTestContext.open(config.factory, { doNotClear: true, walStreamOptions: { snapshotChunkLength: 1000 } }); @@ -104,7 +104,6 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n await context2.loadNextSyncRules(); await context2.replicateSnapshot(); - context2.startStreaming(); const data = await context2.getBucketData('global[]', undefined, {}); const deletedRowOps = data.filter((row) => row.object_type == 'test_data2' && row.object_id === String(id1)); @@ -121,14 +120,14 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n // so it's not in the resulting ops at all. } - expect(updatedRowOps.length).toEqual(2); + expect(updatedRowOps.length).toBeGreaterThanOrEqual(2); // description for the first op could be 'foo' or 'update1'. // We only test the final version. - expect(JSON.parse(updatedRowOps[1].data as string).description).toEqual('update1'); + expect(JSON.parse(updatedRowOps[updatedRowOps.length - 1].data as string).description).toEqual('update1'); - expect(insertedRowOps.length).toEqual(2); + expect(insertedRowOps.length).toBeGreaterThanOrEqual(1); expect(JSON.parse(insertedRowOps[0].data as string).description).toEqual('insert1'); - expect(JSON.parse(insertedRowOps[1].data as string).description).toEqual('insert1'); + expect(JSON.parse(insertedRowOps[insertedRowOps.length - 1].data as string).description).toEqual('insert1'); // 1000 of test_data1 during first replication attempt. // N >= 1000 of test_data2 during first replication attempt. @@ -139,12 +138,12 @@ async function testResumingReplication(factory: TestStorageFactory, stopAfter: n // This adds 2 ops. // We expect this to be 11002 for stopAfter: 2000, and 11004 for stopAfter: 8000. // However, this is not deterministic. - const expectedCount = 11002 + deletedRowOps.length; + const expectedCount = 11000 - 2 + insertedRowOps.length + updatedRowOps.length + deletedRowOps.length; expect(data.length).toEqual(expectedCount); const replicatedCount = ((await METRICS_HELPER.getMetricValueForTests(ReplicationMetric.ROWS_REPLICATED)) ?? 0) - startRowCount; // With resumable replication, there should be no need to re-replicate anything. - expect(replicatedCount).toEqual(expectedCount); + expect(replicatedCount).toBeGreaterThanOrEqual(expectedCount); } diff --git a/modules/module-postgres/test/src/route_api_adapter.test.ts b/modules/module-postgres/test/src/route_api_adapter.test.ts index 98f16930c..bda84cafd 100644 --- a/modules/module-postgres/test/src/route_api_adapter.test.ts +++ b/modules/module-postgres/test/src/route_api_adapter.test.ts @@ -1,7 +1,7 @@ -import { describe, expect, test } from 'vitest'; -import { clearTestDb, connectPgPool } from './util.js'; import { PostgresRouteAPIAdapter } from '@module/api/PostgresRouteAPIAdapter.js'; import { TYPE_INTEGER, TYPE_REAL, TYPE_TEXT } from '@powersync/service-sync-rules'; +import { describe, expect, test } from 'vitest'; +import { clearTestDb, connectPgPool } from './util.js'; describe('PostgresRouteAPIAdapter tests', () => { test('infers connection schema', async () => { @@ -20,7 +20,9 @@ describe('PostgresRouteAPIAdapter tests', () => { `); const schema = await api.getConnectionSchema(); - expect(schema).toStrictEqual([ + // Filter out powersync schema, for cases where we use the same database for storage and replication testing. + const filtered = schema.filter((s) => s.name != 'powersync'); + expect(filtered).toStrictEqual([ { name: 'public', tables: [ diff --git a/modules/module-postgres/test/src/schema_changes.test.ts b/modules/module-postgres/test/src/schema_changes.test.ts index c1994e7a8..7482a781e 100644 --- a/modules/module-postgres/test/src/schema_changes.test.ts +++ b/modules/module-postgres/test/src/schema_changes.test.ts @@ -24,7 +24,9 @@ const PUT_T3 = test_utils.putOp('test_data', { id: 't3', description: 'test3' }) const REMOVE_T1 = test_utils.removeOp('test_data', 't1'); const REMOVE_T2 = test_utils.removeOp('test_data', 't2'); -function defineTests(factory: storage.TestStorageFactory) { +function defineTests(config: storage.TestStorageConfig) { + const { factory } = config; + test('re-create table', async () => { await using context = await WalStreamTestContext.open(factory); @@ -37,7 +39,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query(`INSERT INTO test_data(id, description) VALUES('t2', 'test2')`); @@ -56,10 +57,7 @@ function defineTests(factory: storage.TestStorageFactory) { expect(data.slice(2, 4).sort(compareIds)).toMatchObject([REMOVE_T1, REMOVE_T2]); expect(data.slice(4)).toMatchObject([ - // Snapshot insert - PUT_T3, - // Replicated insert - // We may eventually be able to de-duplicate this + // Snapshot and/or replication insert PUT_T3 ]); }); @@ -71,20 +69,27 @@ function defineTests(factory: storage.TestStorageFactory) { const { pool } = context; await context.replicateSnapshot(); - context.startStreaming(); await pool.query(`CREATE TABLE test_data(id text primary key, description text)`); await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); const data = await context.getBucketData('global[]'); - expect(data).toMatchObject([ - // Snapshot insert - PUT_T1, - // Replicated insert - // We may eventually be able to de-duplicate this - PUT_T1 - ]); + // Both of these are valid + if (data.length == 2) { + expect(data).toMatchObject([ + // Snapshot insert + PUT_T1, + // Replicated insert + // May be de-duplicated + PUT_T1 + ]); + } else { + expect(data).toMatchObject([ + // Replicated insert + PUT_T1 + ]); + } }); test('rename table (1)', async () => { @@ -98,7 +103,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data_old(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query( { statement: `ALTER TABLE test_data_old RENAME TO test_data` }, @@ -113,11 +117,13 @@ function defineTests(factory: storage.TestStorageFactory) { PUT_T1, PUT_T2 ]); - expect(data.slice(2)).toMatchObject([ - // Replicated insert - // We may eventually be able to de-duplicate this - PUT_T2 - ]); + if (data.length > 2) { + expect(data.slice(2)).toMatchObject([ + // Replicated insert + // May be de-duplicated + PUT_T2 + ]); + } }); test('rename table (2)', async () => { @@ -136,7 +142,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data1(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query( { statement: `ALTER TABLE test_data1 RENAME TO test_data2` }, @@ -158,11 +163,13 @@ function defineTests(factory: storage.TestStorageFactory) { putOp('test_data2', { id: 't1', description: 'test1' }), putOp('test_data2', { id: 't2', description: 'test2' }) ]); - expect(data.slice(4)).toMatchObject([ - // Replicated insert - // We may eventually be able to de-duplicate this - putOp('test_data2', { id: 't2', description: 'test2' }) - ]); + if (data.length > 4) { + expect(data.slice(4)).toMatchObject([ + // Replicated insert + // This may be de-duplicated + putOp('test_data2', { id: 't2', description: 'test2' }) + ]); + } }); test('rename table (3)', async () => { @@ -177,7 +184,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query( { statement: `ALTER TABLE test_data RENAME TO test_data_na` }, @@ -207,7 +213,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query( { statement: `ALTER TABLE test_data REPLICA IDENTITY FULL` }, @@ -227,11 +232,13 @@ function defineTests(factory: storage.TestStorageFactory) { // Snapshot - order doesn't matter expect(data.slice(2, 4).sort(compareIds)).toMatchObject([PUT_T1, PUT_T2]); - expect(data.slice(4).sort(compareIds)).toMatchObject([ - // Replicated insert - // We may eventually be able to de-duplicate this - PUT_T2 - ]); + if (data.length > 4) { + expect(data.slice(4).sort(compareIds)).toMatchObject([ + // Replicated insert + // This may be de-duplicated + PUT_T2 + ]); + } }); test('change full replica id by adding column', async () => { @@ -248,7 +255,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query( { statement: `ALTER TABLE test_data ADD COLUMN other TEXT` }, @@ -270,11 +276,13 @@ function defineTests(factory: storage.TestStorageFactory) { putOp('test_data', { id: 't2', description: 'test2', other: null }) ]); - expect(data.slice(4).sort(compareIds)).toMatchObject([ - // Replicated insert - // We may eventually be able to de-duplicate this - putOp('test_data', { id: 't2', description: 'test2', other: null }) - ]); + if (data.length > 4) { + expect(data.slice(4).sort(compareIds)).toMatchObject([ + // Replicated insert + // This may be de-duplicated + putOp('test_data', { id: 't2', description: 'test2', other: null }) + ]); + } }); test('change default replica id by changing column type', async () => { @@ -288,7 +296,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query( { statement: `ALTER TABLE test_data ALTER COLUMN id TYPE varchar` }, @@ -307,11 +314,13 @@ function defineTests(factory: storage.TestStorageFactory) { // Snapshot - order doesn't matter expect(data.slice(2, 4).sort(compareIds)).toMatchObject([PUT_T1, PUT_T2]); - expect(data.slice(4).sort(compareIds)).toMatchObject([ - // Replicated insert - // We may eventually be able to de-duplicate this - PUT_T2 - ]); + if (data.length > 4) { + expect(data.slice(4).sort(compareIds)).toMatchObject([ + // Replicated insert + // May be de-duplicated + PUT_T2 + ]); + } }); test('change index id by changing column type', async () => { @@ -330,7 +339,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query(`INSERT INTO test_data(id, description) VALUES('t2', 'test2')`); @@ -357,11 +365,13 @@ function defineTests(factory: storage.TestStorageFactory) { // Snapshot - order doesn't matter expect(data.slice(4, 7).sort(compareIds)).toMatchObject([PUT_T1, PUT_T2, PUT_T3]); - expect(data.slice(7).sort(compareIds)).toMatchObject([ - // Replicated insert - // We may eventually be able to de-duplicate this - PUT_T3 - ]); + if (data.length > 7) { + expect(data.slice(7).sort(compareIds)).toMatchObject([ + // Replicated insert + // May be de-duplicated + PUT_T3 + ]); + } }); test('add to publication', async () => { @@ -379,7 +389,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query(`INSERT INTO test_data(id, description) VALUES('t2', 'test2')`); @@ -395,11 +404,13 @@ function defineTests(factory: storage.TestStorageFactory) { PUT_T3 ]); - expect(data.slice(3)).toMatchObject([ - // Replicated insert - // We may eventually be able to de-duplicate this - PUT_T3 - ]); + if (data.length > 3) { + expect(data.slice(3)).toMatchObject([ + // Replicated insert + // May be de-duplicated + PUT_T3 + ]); + } const metrics = await storage.factory.getStorageMetrics(); expect(metrics.replication_size_bytes).toBeGreaterThan(0); @@ -421,7 +432,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_other(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query(`INSERT INTO test_other(id, description) VALUES('t2', 'test2')`); @@ -449,7 +459,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query(`INSERT INTO test_data(id, description) VALUES('t2', 'test2')`); @@ -487,7 +496,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data(id, description) VALUES('t1', 'test1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query(`INSERT INTO test_data(id, description) VALUES('t2', 'test2')`); @@ -536,7 +544,6 @@ function defineTests(factory: storage.TestStorageFactory) { await pool.query(`INSERT INTO test_data_old(id, num) VALUES('t2', 0)`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query( { statement: `ALTER TABLE test_data_old RENAME TO test_data` }, @@ -608,7 +615,6 @@ config: await pool.query(`INSERT INTO test_data(id) VALUES ('t1')`); await context.replicateSnapshot(); - context.startStreaming(); await pool.query( { statement: `CREATE TYPE composite AS (foo bool, bar int4);` }, diff --git a/modules/module-postgres/test/src/slow_tests.test.ts b/modules/module-postgres/test/src/slow_tests.test.ts index e792de8be..7a4f7603f 100644 --- a/modules/module-postgres/test/src/slow_tests.test.ts +++ b/modules/module-postgres/test/src/slow_tests.test.ts @@ -14,20 +14,28 @@ import * as pgwire from '@powersync/service-jpgwire'; import { SqliteRow } from '@powersync/service-sync-rules'; import { PgManager } from '@module/replication/PgManager.js'; -import { createCoreReplicationMetrics, initializeCoreReplicationMetrics, storage } from '@powersync/service-core'; +import { ReplicationAbortedError } from '@powersync/lib-services-framework'; +import { + createCoreReplicationMetrics, + initializeCoreReplicationMetrics, + reduceBucket, + storage +} from '@powersync/service-core'; import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import * as mongo_storage from '@powersync/service-module-mongodb-storage'; import * as postgres_storage from '@powersync/service-module-postgres-storage'; import * as timers from 'node:timers/promises'; -import { CustomTypeRegistry } from '@module/types/registry.js'; +import { WalStreamTestContext } from './wal_stream_utils.js'; describe.skipIf(!(env.CI || env.SLOW_TESTS))('slow tests', function () { - describeWithStorage({ timeout: 120_000 }, function (factory) { - defineSlowTests(factory); + describeWithStorage({ timeout: 120_000 }, function (config) { + defineSlowTests(config); }); }); -function defineSlowTests(factory: storage.TestStorageFactory) { +function defineSlowTests(config: storage.TestStorageConfig) { + const factory = config.factory; + let walStream: WalStream | undefined; let connections: PgManager | undefined; let abortController: AbortController | undefined; @@ -42,7 +50,7 @@ function defineSlowTests(factory: storage.TestStorageFactory) { // This cleans up, similar to WalStreamTestContext.dispose(). // These tests are a little more complex than what is supported by WalStreamTestContext. abortController?.abort(); - await streamPromise; + await streamPromise?.catch((_) => {}); streamPromise = undefined; connections?.destroy(); @@ -70,7 +78,6 @@ function defineSlowTests(factory: storage.TestStorageFactory) { async function testRepeatedReplication(testOptions: { compact: boolean; maxBatchSize: number; numBatches: number }) { const connections = new PgManager(TEST_CONNECTION_OPTIONS, {}); - const replicationConnection = await connections.replicationConnection(); const pool = connections.pool; await clearTestDb(pool); await using f = await factory(); @@ -97,11 +104,11 @@ bucket_definitions: ); await pool.query(`ALTER TABLE test_data REPLICA IDENTITY FULL`); - await walStream.initReplication(replicationConnection); let abort = false; - streamPromise = walStream.streamChanges(replicationConnection).finally(() => { + streamPromise = walStream.replicate().finally(() => { abort = true; }); + await walStream.waitForInitialSnapshot(); const start = Date.now(); while (!abort && Date.now() - start < TEST_DURATION_MS) { @@ -223,11 +230,12 @@ bucket_definitions: await compactPromise; // Wait for replication to finish - let checkpoint = await getClientCheckpoint(pool, storage.factory, { timeout: TIMEOUT_MARGIN_MS }); + await getClientCheckpoint(pool, storage.factory, { timeout: TIMEOUT_MARGIN_MS }); if (f instanceof mongo_storage.storage.MongoBucketStorage) { // Check that all inserts have been deleted again - const docs = await f.db.current_data.find().toArray(); + // Note: at this point, the pending_delete cleanup may not have run yet. + const docs = await f.db.current_data.find({ pending_delete: { $exists: false } }).toArray(); const transformed = docs.map((doc) => { return bson.deserialize(doc.data.buffer) as SqliteRow; }); @@ -254,6 +262,8 @@ bucket_definitions: * FROM current_data + WHERE + pending_delete IS NULL ` .decoded(postgres_storage.models.CurrentData) .rows(); @@ -288,14 +298,20 @@ bucket_definitions: } abortController.abort(); - await streamPromise; + await streamPromise.catch((e) => { + if (e instanceof ReplicationAbortedError) { + // Ignore + } else { + throw e; + } + }); } // Test repeatedly performing initial replication. // // If the first LSN does not correctly match with the first replication transaction, // we may miss some updates. - test('repeated initial replication', { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS }, async () => { + test('repeated initial replication (1)', { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS }, async () => { const pool = await connectPgPool(); await clearTestDb(pool); await using f = await factory(); @@ -331,7 +347,6 @@ bucket_definitions: i += 1; const connections = new PgManager(TEST_CONNECTION_OPTIONS, {}); - const replicationConnection = await connections.replicationConnection(); abortController = new AbortController(); const options: WalStreamOptions = { @@ -344,19 +359,14 @@ bucket_definitions: await storage.clear(); - // 3. Start initial replication, then streaming, but don't wait for any of this + // 3. Start replication, but don't wait for it let initialReplicationDone = false; - streamPromise = (async () => { - await walStream.initReplication(replicationConnection); - initialReplicationDone = true; - await walStream.streamChanges(replicationConnection); - })() - .catch((e) => { + streamPromise = walStream.replicate(); + walStream + .waitForInitialSnapshot() + .catch((_) => {}) + .finally(() => { initialReplicationDone = true; - throw e; - }) - .then((v) => { - return v; }); // 4. While initial replication is still running, write more changes @@ -399,8 +409,104 @@ bucket_definitions: } abortController.abort(); - await streamPromise; + await streamPromise.catch((e) => { + if (e instanceof ReplicationAbortedError) { + // Ignore + } else { + throw e; + } + }); await connections.end(); } }); + + // Test repeatedly performing initial replication while deleting data. + // + // This specifically checks for data in the initial snapshot being deleted while snapshotting. + test('repeated initial replication with deletes', { timeout: TEST_DURATION_MS + TIMEOUT_MARGIN_MS }, async () => { + const syncRuleContent = ` +bucket_definitions: + global: + data: + - SELECT id, description FROM "test_data" +`; + + const start = Date.now(); + let i = 0; + + while (Date.now() - start < TEST_DURATION_MS) { + i += 1; + + // 1. Each iteration starts with a clean slate + await using context = await WalStreamTestContext.open(factory, { + walStreamOptions: { snapshotChunkLength: 100 } + }); + const pool = context.pool; + + // Introduce an artificial delay in snapshot queries, to make it more likely to reproduce an + // issue. + const originalSnapshotConnectionFn = context.connectionManager.snapshotConnection; + context.connectionManager.snapshotConnection = async () => { + const conn = await originalSnapshotConnectionFn.call(context.connectionManager); + // Wrap streaming query to add delays to snapshots + const originalStream = conn.stream; + conn.stream = async function* (...args: any[]) { + const delay = Math.random() * 20; + yield* originalStream.call(this, ...args); + await new Promise((resolve) => setTimeout(resolve, delay)); + }; + return conn; + }; + + await pool.query(`CREATE TABLE test_data(id uuid primary key default uuid_generate_v4(), description text)`); + await context.updateSyncRules(syncRuleContent); + + let statements: pgwire.Statement[] = []; + + const n = Math.floor(Math.random() * 200); + for (let i = 0; i < n; i++) { + statements.push({ + statement: `INSERT INTO test_data(description) VALUES('test_init') RETURNING id` + }); + } + const results = await pool.query(...statements); + const ids = new Set( + results.results.map((sub) => { + return sub.rows[0][0] as string; + }) + ); + + // 3. Start replication, but don't wait for it + let initialReplicationDone = false; + + streamPromise = context.replicateSnapshot().finally(() => { + initialReplicationDone = true; + }); + + // 4. While initial replication is still running, delete random rows + while (!initialReplicationDone && ids.size > 0) { + let statements: pgwire.Statement[] = []; + + const m = Math.floor(Math.random() * 10) + 1; + const idArray = Array.from(ids); + for (let i = 0; i < m; i++) { + const id = idArray[Math.floor(Math.random() * idArray.length)]; + statements.push({ + statement: `DELETE FROM test_data WHERE id = $1`, + params: [{ type: 'uuid', value: id }] + }); + ids.delete(id); + } + await pool.query(...statements); + await new Promise((resolve) => setTimeout(resolve, Math.random() * 10)); + } + + await streamPromise; + + // 5. Once initial replication is done, wait for the streaming changes to complete syncing. + const data = await context.getBucketData('global[]', 0n); + const normalized = reduceBucket(data).filter((op) => op.op !== 'CLEAR'); + expect(normalized.length).toEqual(ids.size); + } + }); } diff --git a/modules/module-postgres/test/src/util.ts b/modules/module-postgres/test/src/util.ts index f0516f6b8..ffc4c797f 100644 --- a/modules/module-postgres/test/src/util.ts +++ b/modules/module-postgres/test/src/util.ts @@ -2,7 +2,7 @@ import { PostgresRouteAPIAdapter } from '@module/api/PostgresRouteAPIAdapter.js' import * as types from '@module/types/types.js'; import * as lib_postgres from '@powersync/lib-service-postgres'; import { logger } from '@powersync/lib-services-framework'; -import { BucketStorageFactory, InternalOpId, TestStorageFactory } from '@powersync/service-core'; +import { BucketStorageFactory, InternalOpId, TestStorageConfig, TestStorageFactory } from '@powersync/service-core'; import * as pgwire from '@powersync/service-jpgwire'; import * as mongo_storage from '@powersync/service-module-mongodb-storage'; import * as postgres_storage from '@powersync/service-module-postgres-storage'; @@ -16,11 +16,11 @@ export const INITIALIZED_MONGO_STORAGE_FACTORY = mongo_storage.test_utils.mongoT isCI: env.CI }); -export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.test_utils.postgresTestStorageFactoryGenerator({ +export const INITIALIZED_POSTGRES_STORAGE_FACTORY = postgres_storage.test_utils.postgresTestSetup({ url: env.PG_STORAGE_TEST_URL }); -export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageFactory) => void) { +export function describeWithStorage(options: TestOptions, fn: (factory: TestStorageConfig) => void) { describe.skipIf(!env.TEST_MONGO_STORAGE)(`mongodb storage`, options, function () { fn(INITIALIZED_MONGO_STORAGE_FACTORY); }); diff --git a/modules/module-postgres/test/src/validation.test.ts b/modules/module-postgres/test/src/validation.test.ts index 135b0fab7..30fe1c6ed 100644 --- a/modules/module-postgres/test/src/validation.test.ts +++ b/modules/module-postgres/test/src/validation.test.ts @@ -5,7 +5,7 @@ import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js'; import { WalStreamTestContext } from './wal_stream_utils.js'; test('validate tables', async () => { - await using context = await WalStreamTestContext.open(INITIALIZED_MONGO_STORAGE_FACTORY); + await using context = await WalStreamTestContext.open(INITIALIZED_MONGO_STORAGE_FACTORY.factory); const { pool } = context; await pool.query(`CREATE TABLE test_data(id uuid primary key default uuid_generate_v4(), description text)`); diff --git a/modules/module-postgres/test/src/wal_stream.test.ts b/modules/module-postgres/test/src/wal_stream.test.ts index bd7440809..3a225ecec 100644 --- a/modules/module-postgres/test/src/wal_stream.test.ts +++ b/modules/module-postgres/test/src/wal_stream.test.ts @@ -2,12 +2,12 @@ import { MissingReplicationSlotError } from '@module/replication/WalStream.js'; import { storage } from '@powersync/service-core'; import { METRICS_HELPER, putOp, removeOp } from '@powersync/service-core-tests'; import { pgwireRows } from '@powersync/service-jpgwire'; +import { JSONBig } from '@powersync/service-jsonbig'; import { ReplicationMetric } from '@powersync/service-types'; import * as crypto from 'crypto'; -import { afterAll, beforeAll, describe, expect, test } from 'vitest'; +import { describe, expect, test } from 'vitest'; import { describeWithStorage } from './util.js'; import { WalStreamTestContext, withMaxWalSize } from './wal_stream_utils.js'; -import { JSONBig } from '@powersync/service-jsonbig'; const BASIC_SYNC_RULES = ` bucket_definitions: @@ -20,7 +20,9 @@ describe('wal stream', () => { describeWithStorage({ timeout: 20_000 }, defineWalStreamTests); }); -function defineWalStreamTests(factory: storage.TestStorageFactory) { +function defineWalStreamTests(config: storage.TestStorageConfig) { + const { factory } = config; + test('replicating basic values', async () => { await using context = await WalStreamTestContext.open(factory); const { pool } = context; @@ -103,7 +105,6 @@ bucket_definitions: ); await context.replicateSnapshot(); - context.startStreaming(); // Must be > 8kb after compression const largeDescription = crypto.randomBytes(20_000).toString('hex'); @@ -210,7 +211,6 @@ bucket_definitions: ); await context.replicateSnapshot(); - context.startStreaming(); const data = await context.getBucketData('global[]'); expect(data).toMatchObject([putOp('test_data', { id: test_id, description: 'test1' })]); @@ -242,8 +242,6 @@ bucket_definitions: params: [{ type: 'varchar', value: largeDescription }] }); - context.startStreaming(); - const data = await context.getBucketData('global[]'); expect(data.length).toEqual(1); const row = JSON.parse(data[0].data as string); @@ -295,7 +293,6 @@ bucket_definitions: `INSERT INTO test_data(id, description) VALUES('8133cd37-903b-4937-a022-7c8294015a3a', 'test1') returning id as test_id` ); await context.replicateSnapshot(); - context.startStreaming(); const data = await context.getBucketData('global[]'); @@ -320,15 +317,12 @@ bucket_definitions: await context.loadActiveSyncRules(); - // Previously, the `replicateSnapshot` call picked up on this error. - // Now, we have removed that check, this only comes up when we start actually streaming. - // We don't get the streaming response directly here, but getCheckpoint() checks for that. - await context.replicateSnapshot(); - context.startStreaming(); + // Note: The actual error may be thrown either in replicateSnapshot(), or in getCheckpoint(). if (serverVersion!.compareMain('18.0.0') >= 0) { // No error expected in Postres 18. Replication keeps on working depite the // publication being re-created. + await context.replicateSnapshot(); await context.getCheckpoint(); } else { // await context.getCheckpoint(); @@ -336,9 +330,9 @@ bucket_definitions: // In the service, this error is handled in WalStreamReplicationJob, // creating a new replication slot. await expect(async () => { + await context.replicateSnapshot(); await context.getCheckpoint(); }).rejects.toThrowError(MissingReplicationSlotError); - context.clearStreamError(); } } }); @@ -360,7 +354,6 @@ bucket_definitions: `INSERT INTO test_data(id, description) VALUES('8133cd37-903b-4937-a022-7c8294015a3a', 'test1') returning id as test_id` ); await context.replicateSnapshot(); - context.startStreaming(); const data = await context.getBucketData('global[]'); @@ -423,7 +416,6 @@ bucket_definitions: `INSERT INTO test_data(id, description) VALUES('8133cd37-903b-4937-a022-7c8294015a3a', 'test1') returning id as test_id` ); await context.replicateSnapshot(); - context.startStreaming(); const data = await context.getBucketData('global[]'); @@ -591,7 +583,6 @@ config: ); await context.replicateSnapshot(); - context.startStreaming(); await pool.query(`UPDATE test_data SET description = 'test2' WHERE id = '${test_id}'`); diff --git a/modules/module-postgres/test/src/wal_stream_utils.ts b/modules/module-postgres/test/src/wal_stream_utils.ts index 33ebecee8..aefe859bc 100644 --- a/modules/module-postgres/test/src/wal_stream_utils.ts +++ b/modules/module-postgres/test/src/wal_stream_utils.ts @@ -6,21 +6,22 @@ import { initializeCoreReplicationMetrics, InternalOpId, OplogEntry, + settledPromise, storage, - SyncRulesBucketStorage + SyncRulesBucketStorage, + unsettledPromise } from '@powersync/service-core'; import { METRICS_HELPER, test_utils } from '@powersync/service-core-tests'; import * as pgwire from '@powersync/service-jpgwire'; import { clearTestDb, getClientCheckpoint, TEST_CONNECTION_OPTIONS } from './util.js'; import { CustomTypeRegistry } from '@module/types/registry.js'; +import { ReplicationAbortedError } from '@powersync/lib-services-framework'; export class WalStreamTestContext implements AsyncDisposable { private _walStream?: WalStream; private abortController = new AbortController(); - private streamPromise?: Promise; public storage?: SyncRulesBucketStorage; - private replicationConnection?: pgwire.PgConnection; - private snapshotPromise?: Promise; + private settledReplicationPromise?: Promise>; /** * Tests operating on the wal stream need to configure the stream and manage asynchronous @@ -55,21 +56,10 @@ export class WalStreamTestContext implements AsyncDisposable { await this.dispose(); } - /** - * Clear any errors from startStream, to allow for a graceful dispose when streaming errors - * were expected. - */ - async clearStreamError() { - if (this.streamPromise != null) { - this.streamPromise = this.streamPromise.catch((e) => {}); - } - } - async dispose() { this.abortController.abort(); try { - await this.snapshotPromise; - await this.streamPromise; + await this.settledReplicationPromise; await this.connectionManager.destroy(); await this.factory?.[Symbol.asyncDispose](); } catch (e) { @@ -143,36 +133,38 @@ export class WalStreamTestContext implements AsyncDisposable { */ async initializeReplication() { await this.replicateSnapshot(); - this.startStreaming(); // Make sure we're up to date await this.getCheckpoint(); } + /** + * Replicate the initial snapshot, and start streaming. + */ async replicateSnapshot() { - const promise = (async () => { - this.replicationConnection = await this.connectionManager.replicationConnection(); - await this.walStream.initReplication(this.replicationConnection); - })(); - this.snapshotPromise = promise.catch((e) => e); - await promise; - } - - startStreaming() { - if (this.replicationConnection == null) { - throw new Error('Call replicateSnapshot() before startStreaming()'); + // Use a settledPromise to avoid unhandled rejections + this.settledReplicationPromise = settledPromise(this.walStream.replicate()); + try { + await Promise.race([unsettledPromise(this.settledReplicationPromise), this.walStream.waitForInitialSnapshot()]); + } catch (e) { + if (e instanceof ReplicationAbortedError && e.cause != null) { + // Edge case for tests: replicate() can throw an error, but we'd receive the ReplicationAbortedError from + // waitForInitialSnapshot() first. In that case, prioritize the cause, e.g. MissingReplicationSlotError. + // This is not a concern for production use, since we only use waitForInitialSnapshot() in tests. + throw e.cause; + } + throw e; } - this.streamPromise = this.walStream.streamChanges(this.replicationConnection!); } async getCheckpoint(options?: { timeout?: number }) { let checkpoint = await Promise.race([ getClientCheckpoint(this.pool, this.factory, { timeout: options?.timeout ?? 15_000 }), - this.streamPromise + unsettledPromise(this.settledReplicationPromise!) ]); if (checkpoint == null) { - // This indicates an issue with the test setup - streamingPromise completed instead + // This indicates an issue with the test setup - replicationPromise completed instead // of getClientCheckpoint() - throw new Error('Test failure - streamingPromise completed'); + throw new Error('Test failure - replicationPromise completed'); } return checkpoint; } diff --git a/packages/service-core-tests/src/test-utils/general-utils.ts b/packages/service-core-tests/src/test-utils/general-utils.ts index 6f93fffb9..286811b71 100644 --- a/packages/service-core-tests/src/test-utils/general-utils.ts +++ b/packages/service-core-tests/src/test-utils/general-utils.ts @@ -34,9 +34,14 @@ export function testRules(content: string): storage.PersistedSyncRulesContent { }; } -export function makeTestTable(name: string, replicaIdColumns?: string[] | undefined) { +export function makeTestTable( + name: string, + replicaIdColumns: string[] | undefined, + options: { tableIdStrings: boolean } +) { const relId = utils.hashData('table', name, (replicaIdColumns ?? ['id']).join(',')); - const id = new bson.ObjectId('6544e3899293153fa7b38331'); + const id = + options.tableIdStrings == false ? new bson.ObjectId('6544e3899293153fa7b38331') : '6544e3899293153fa7b38331'; return new storage.SourceTable({ id: id, connectionTag: storage.SourceTable.DEFAULT_TAG, diff --git a/packages/service-core-tests/src/tests/register-compacting-tests.ts b/packages/service-core-tests/src/tests/register-compacting-tests.ts index b214a2be2..1c43bf25a 100644 --- a/packages/service-core-tests/src/tests/register-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-compacting-tests.ts @@ -1,10 +1,11 @@ -import { storage } from '@powersync/service-core'; +import { addChecksums, storage } from '@powersync/service-core'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; -const TEST_TABLE = test_utils.makeTestTable('test', ['id']); +export function registerCompactTests(config: storage.TestStorageConfig) { + const generateStorageFactory = config.factory; + const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); -export function registerCompactTests(generateStorageFactory: storage.TestStorageFactory) { test('compacting (1)', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -17,6 +18,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -57,19 +59,16 @@ bucket_definitions: expect(dataBefore).toMatchObject([ { - checksum: 2634521662, object_id: 't1', op: 'PUT', op_id: '1' }, { - checksum: 4243212114, object_id: 't2', op: 'PUT', op_id: '2' }, { - checksum: 4243212114, object_id: 't2', op: 'PUT', op_id: '3' @@ -94,19 +93,14 @@ bucket_definitions: expect(batchAfter.targetOp).toEqual(3n); expect(dataAfter).toMatchObject([ + dataBefore[0], { - checksum: 2634521662, - object_id: 't1', - op: 'PUT', - op_id: '1' - }, - { - checksum: 4243212114, + checksum: dataBefore[1].checksum, op: 'MOVE', op_id: '2' }, { - checksum: 4243212114, + checksum: dataBefore[2].checksum, object_id: 't2', op: 'PUT', op_id: '3' @@ -131,6 +125,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -178,30 +173,23 @@ bucket_definitions: const dataBefore = batchBefore.chunkData.data; const checksumBefore = await bucketStorage.getChecksums(checkpoint, ['global[]']); + // op_id sequence depends on the storage implementation expect(dataBefore).toMatchObject([ { - checksum: 2634521662, object_id: 't1', - op: 'PUT', - op_id: '1' + op: 'PUT' }, { - checksum: 4243212114, object_id: 't2', - op: 'PUT', - op_id: '2' + op: 'PUT' }, { - checksum: 4228978084, object_id: 't1', - op: 'REMOVE', - op_id: '3' + op: 'REMOVE' }, { - checksum: 4243212114, object_id: 't2', - op: 'PUT', - op_id: '4' + op: 'PUT' } ]); @@ -219,18 +207,19 @@ bucket_definitions: bucketStorage.clearChecksumCache(); const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['global[]']); - expect(batchAfter.targetOp).toEqual(4n); + expect(batchAfter.targetOp).toBeLessThanOrEqual(checkpoint); expect(dataAfter).toMatchObject([ { - checksum: -1778190028, - op: 'CLEAR', - op_id: '3' + checksum: addChecksums( + addChecksums(dataBefore[0].checksum as number, dataBefore[1].checksum as number), + dataBefore[2].checksum as number + ), + op: 'CLEAR' }, { - checksum: 4243212114, + checksum: dataBefore[3].checksum, object_id: 't2', - op: 'PUT', - op_id: '4' + op: 'PUT' } ]); expect(checksumAfter.get('global[]')).toEqual({ @@ -253,6 +242,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -284,7 +274,7 @@ bucket_definitions: }); const checkpoint1 = result!.flushed_op; - const checksumBefore = await bucketStorage.getChecksums(checkpoint1, ['global[]']); + await bucketStorage.getChecksums(checkpoint1, ['global[]']); const result2 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { await batch.save({ @@ -313,18 +303,15 @@ bucket_definitions: await bucketStorage.clearChecksumCache(); const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); - expect(batchAfter.targetOp).toEqual(4n); expect(dataAfter).toMatchObject([ { - checksum: 1874612650, - op: 'CLEAR', - op_id: '4' + op: 'CLEAR' } ]); expect(checksumAfter.get('global[]')).toEqual({ bucket: 'global[]', count: 1, - checksum: 1874612650 + checksum: dataAfter[0].checksum }); }); @@ -343,6 +330,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); /** * Repeatedly create operations which fall into different buckets. * The bucket operations are purposely interleaved as the op_id increases. @@ -468,7 +456,8 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -520,10 +509,9 @@ bucket_definitions: const checkpoint2 = result2!.flushed_op; await bucketStorage.clearChecksumCache(); const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); - expect(checksumAfter.get('global[]')).toEqual({ + expect(checksumAfter.get('global[]')).toMatchObject({ bucket: 'global[]', - count: 4, - checksum: 1874612650 + count: 4 }); }); @@ -539,6 +527,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -582,12 +571,10 @@ bucket_definitions: }); const checkpoint2 = result2!.flushed_op; - // Check that the checksum was correctly updated with the clear operation after having a cached checksum const checksumAfter = await bucketStorage.getChecksums(checkpoint2, ['global[]']); expect(checksumAfter.get('global[]')).toMatchObject({ bucket: 'global[]', - count: 1, - checksum: -1481659821 + count: 1 }); }); } diff --git a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts index 72dd7dced..d597f2cba 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-checkpoint-tests.ts @@ -12,7 +12,9 @@ import * as test_utils from '../test-utils/test-utils-index.js'; * * ``` */ -export function registerDataStorageCheckpointTests(generateStorageFactory: storage.TestStorageFactory) { +export function registerDataStorageCheckpointTests(config: storage.TestStorageConfig) { + const generateStorageFactory = config.factory; + test('managed write checkpoints - checkpoint after write', async (context) => { await using factory = await generateStorageFactory(); const r = await factory.configureSyncRules({ @@ -31,6 +33,10 @@ bucket_definitions: .watchCheckpointChanges({ user_id: 'user1', signal: abortController.signal }) [Symbol.asyncIterator](); + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + }); + const writeCheckpoint = await bucketStorage.createManagedWriteCheckpoint({ heads: { '1': '5/0' }, user_id: 'user1' @@ -65,6 +71,10 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(r.persisted_sync_rules!); + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + }); + const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); const iter = bucketStorage @@ -128,6 +138,10 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + }); + const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); const iter = bucketStorage @@ -168,6 +182,10 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + }); + const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); const iter = bucketStorage @@ -211,6 +229,10 @@ bucket_definitions: const bucketStorage = factory.getInstance(r.persisted_sync_rules!); bucketStorage.setWriteCheckpointMode(storage.WriteCheckpointMode.CUSTOM); + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + }); + const abortController = new AbortController(); context.onTestFinished(() => abortController.abort()); const iter = bucketStorage diff --git a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts index f9729f21d..5ba42456d 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-data-tests.ts @@ -1,7 +1,6 @@ import { BucketDataBatchOptions, getUuidReplicaIdentityBson, OplogEntry, storage } from '@powersync/service-core'; import { describe, expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; -import { TEST_TABLE } from './util.js'; /** * Normalize data from OplogEntries for comparison in tests. @@ -24,7 +23,9 @@ const normalizeOplogData = (data: OplogEntry['data']) => { * * ``` */ -export function registerDataStorageDataTests(generateStorageFactory: storage.TestStorageFactory) { +export function registerDataStorageDataTests(config: storage.TestStorageConfig) { + const generateStorageFactory = config.factory; + const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); test('removing row', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -39,6 +40,7 @@ bucket_definitions: await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { const sourceTable = TEST_TABLE; + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable, @@ -86,6 +88,199 @@ bucket_definitions: ]); }); + test('insert after delete in new batch', async () => { + await using factory = await generateStorageFactory(); + const syncRules = await factory.updateSyncRules({ + content: ` +bucket_definitions: + global: + data: + - SELECT id, description FROM "%" +` + }); + const bucketStorage = factory.getInstance(syncRules); + + const sourceTable = TEST_TABLE; + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + + await batch.save({ + sourceTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') + }); + + await batch.commit('0/1'); + }); + + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + const sourceTable = TEST_TABLE; + + await batch.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); + await batch.commit('2/1'); + }); + + const { checkpoint } = await bucketStorage.getCheckpoint(); + + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const data = batch[0].chunkData.data.map((d) => { + return { + op: d.op, + object_id: d.object_id, + checksum: d.checksum + }; + }); + + const c1 = 2871785649; + + expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); + + const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + expect(checksums).toEqual([ + { + bucket: 'global[]', + checksum: c1 & 0xffffffff, + count: 1 + } + ]); + }); + + test('update after delete in new batch', async () => { + // Update after delete may not be common, but the storage layer should handle it in an eventually-consistent way. + await using factory = await generateStorageFactory(); + const syncRules = await factory.updateSyncRules({ + content: ` +bucket_definitions: + global: + data: + - SELECT id, description FROM "%" +` + }); + const bucketStorage = factory.getInstance(syncRules); + + const sourceTable = TEST_TABLE; + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + + await batch.save({ + sourceTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') + }); + + await batch.commit('0/1'); + }); + + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + const sourceTable = TEST_TABLE; + + await batch.save({ + sourceTable, + tag: storage.SaveOperationTag.UPDATE, + before: { + id: 'test1' + }, + after: { + id: 'test1', + description: 'test1' + }, + beforeReplicaId: test_utils.rid('test1'), + afterReplicaId: test_utils.rid('test1') + }); + await batch.commit('2/1'); + }); + + const { checkpoint } = await bucketStorage.getCheckpoint(); + + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const data = batch[0].chunkData.data.map((d) => { + return { + op: d.op, + object_id: d.object_id, + checksum: d.checksum + }; + }); + + const c1 = 2871785649; + + expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); + + const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + expect(checksums).toEqual([ + { + bucket: 'global[]', + checksum: c1 & 0xffffffff, + count: 1 + } + ]); + }); + + test('insert after delete in same batch', async () => { + await using factory = await generateStorageFactory(); + const syncRules = await factory.updateSyncRules({ + content: ` +bucket_definitions: + global: + data: + - SELECT id, description FROM "%" +` + }); + const bucketStorage = factory.getInstance(syncRules); + + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + const sourceTable = TEST_TABLE; + await batch.markAllSnapshotDone('1/1'); + + await batch.save({ + sourceTable, + tag: storage.SaveOperationTag.DELETE, + beforeReplicaId: test_utils.rid('test1') + }); + await batch.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1' + }, + afterReplicaId: test_utils.rid('test1') + }); + await batch.commit('1/1'); + }); + + const { checkpoint } = await bucketStorage.getCheckpoint(); + + const batch = await test_utils.fromAsync(bucketStorage.getBucketDataBatch(checkpoint, new Map([['global[]', 0n]]))); + const data = batch[0].chunkData.data.map((d) => { + return { + op: d.op, + object_id: d.object_id, + checksum: d.checksum + }; + }); + + const c1 = 2871785649; + + expect(data).toEqual([{ op: 'PUT', object_id: 'test1', checksum: c1 }]); + + const checksums = [...(await bucketStorage.getChecksums(checkpoint, ['global[]'])).values()]; + expect(checksums).toEqual([ + { + bucket: 'global[]', + checksum: c1 & 0xffffffff, + count: 1 + } + ]); + }); + test('changing client ids', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -100,6 +295,7 @@ bucket_definitions: const sourceTable = TEST_TABLE; await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable, tag: storage.SaveOperationTag.INSERT, @@ -165,6 +361,7 @@ bucket_definitions: await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { const sourceTable = TEST_TABLE; + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable, @@ -241,6 +438,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); const sourceTable = TEST_TABLE; await batch.save({ @@ -255,6 +453,7 @@ bucket_definitions: }); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); const sourceTable = TEST_TABLE; await batch.save({ @@ -287,6 +486,7 @@ bucket_definitions: }); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); const sourceTable = TEST_TABLE; await batch.save({ @@ -372,6 +572,7 @@ bucket_definitions: // Pre-setup const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); const sourceTable = TEST_TABLE; await batch.save({ @@ -528,10 +729,11 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - const sourceTable = test_utils.makeTestTable('test', ['id', 'description']); + const sourceTable = test_utils.makeTestTable('test', ['id', 'description'], config); // Pre-setup const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable, tag: storage.SaveOperationTag.INSERT, @@ -636,10 +838,11 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - const sourceTable = test_utils.makeTestTable('test', ['id', 'description']); + const sourceTable = test_utils.makeTestTable('test', ['id', 'description'], config); // Pre-setup const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable, tag: storage.SaveOperationTag.INSERT, @@ -735,6 +938,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); const sourceTable = TEST_TABLE; const largeDescription = '0123456789'.repeat(12_000_00); @@ -844,6 +1048,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); const sourceTable = TEST_TABLE; for (let i = 1; i <= 6; i++) { @@ -923,6 +1128,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); const sourceTable = TEST_TABLE; for (let i = 1; i <= 10; i++) { @@ -1073,11 +1279,14 @@ bucket_definitions: const r = await f.configureSyncRules({ content: 'bucket_definitions: {}', validate: false }); const storage = f.getInstance(r.persisted_sync_rules!); await storage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/0'); await batch.keepalive('1/0'); }); const metrics2 = await f.getStorageMetrics(); - expect(metrics2).toMatchSnapshot(); + expect(metrics2.operations_size_bytes).toBeLessThanOrEqual(20_000); + expect(metrics2.parameters_size_bytes).toBeLessThanOrEqual(40_000); + expect(metrics2.replication_size_bytes).toBeLessThanOrEqual(30_000); }); test('op_id initialization edge case', async () => { @@ -1096,10 +1305,11 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - const sourceTable = test_utils.makeTestTable('test', ['id']); - const sourceTableIgnore = test_utils.makeTestTable('test_ignore', ['id']); + const sourceTable = test_utils.makeTestTable('test', ['id'], config); + const sourceTableIgnore = test_utils.makeTestTable('test_ignore', ['id'], config); const result1 = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); // This saves a record to current_data, but not bucket_data. // This causes a checkpoint to be created without increasing the op_id sequence. await batch.save({ @@ -1144,6 +1354,7 @@ bucket_definitions: const sourceTable = TEST_TABLE; await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable, tag: storage.SaveOperationTag.INSERT, @@ -1163,7 +1374,143 @@ bucket_definitions: expect(checksums2).toEqual([{ bucket: 'global[]', checksum: 1917136889, count: 1 }]); }); - testChecksumBatching(generateStorageFactory); + testChecksumBatching(config); + + test('empty checkpoints (1)', async () => { + await using factory = await generateStorageFactory(); + const syncRules = await factory.updateSyncRules({ + content: ` +bucket_definitions: + global: + data: + - SELECT id, description FROM "%" +` + }); + const bucketStorage = factory.getInstance(syncRules); + + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + await batch.commit('1/1'); + + const cp1 = await bucketStorage.getCheckpoint(); + expect(cp1.lsn).toEqual('1/1'); + + await batch.commit('2/1', { createEmptyCheckpoints: true }); + const cp2 = await bucketStorage.getCheckpoint(); + expect(cp2.lsn).toEqual('2/1'); + + await batch.keepalive('3/1'); + const cp3 = await bucketStorage.getCheckpoint(); + expect(cp3.lsn).toEqual('3/1'); + + // For the last one, we skip creating empty checkpoints + // This means the LSN stays at 3/1. + await batch.commit('4/1', { createEmptyCheckpoints: false }); + const cp4 = await bucketStorage.getCheckpoint(); + expect(cp4.lsn).toEqual('3/1'); + }); + }); + + test('empty checkpoints (2)', async () => { + await using factory = await generateStorageFactory(); + const syncRules = await factory.updateSyncRules({ + content: ` +bucket_definitions: + global: + data: + - SELECT id, description FROM "%" +` + }); + const bucketStorage = factory.getInstance(syncRules); + + const sourceTable = TEST_TABLE; + // We simulate two concurrent batches, but nesting is the easiest way to do this. + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch1) => { + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch2) => { + await batch1.markAllSnapshotDone('1/1'); + await batch1.commit('1/1'); + + await batch1.commit('2/1', { createEmptyCheckpoints: false }); + const cp2 = await bucketStorage.getCheckpoint(); + expect(cp2.lsn).toEqual('1/1'); // checkpoint 2/1 skipped + + await batch2.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') + }); + // This simulates what happens on a snapshot processor. + // This may later change to a flush() rather than commit(). + await batch2.commit(test_utils.BATCH_OPTIONS.zeroLSN); + + const cp3 = await bucketStorage.getCheckpoint(); + expect(cp3.lsn).toEqual('1/1'); // Still unchanged + + // This now needs to advance the LSN, despite {createEmptyCheckpoints: false} + await batch1.commit('4/1', { createEmptyCheckpoints: false }); + const cp4 = await bucketStorage.getCheckpoint(); + expect(cp4.lsn).toEqual('4/1'); + }); + }); + }); + + test('deleting while streaming', async () => { + await using factory = await generateStorageFactory(); + const syncRules = await factory.updateSyncRules({ + content: ` +bucket_definitions: + global: + data: + - SELECT id, description FROM "%" +` + }); + const bucketStorage = factory.getInstance(syncRules); + + const sourceTable = TEST_TABLE; + // We simulate two concurrent batches, and nesting is the easiest way to do this. + // For this test, we assume that we start with a row "test1", which is picked up by a snapshot + // query, right before the delete is streamed. But the snapshot query is only persisted _after_ + // the delete is streamed, and we need to ensure that the streamed delete takes precedence. + await bucketStorage.startBatch({ ...test_utils.BATCH_OPTIONS, skipExistingRows: true }, async (snapshotBatch) => { + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (streamingBatch) => { + streamingBatch.save({ + sourceTable, + tag: storage.SaveOperationTag.DELETE, + before: { + id: 'test1' + }, + beforeReplicaId: test_utils.rid('test1') + }); + await streamingBatch.commit('2/1'); + + await snapshotBatch.save({ + sourceTable, + tag: storage.SaveOperationTag.INSERT, + after: { + id: 'test1', + description: 'test1a' + }, + afterReplicaId: test_utils.rid('test1') + }); + await snapshotBatch.markAllSnapshotDone('3/1'); + await snapshotBatch.commit('1/1'); + + await streamingBatch.keepalive('3/1'); + }); + }); + + const cp = await bucketStorage.getCheckpoint(); + expect(cp.lsn).toEqual('3/1'); + const data = await test_utils.fromAsync( + bucketStorage.getBucketDataBatch(cp.checkpoint, new Map([['global[]', 0n]])) + ); + + expect(data).toEqual([]); + }); } /** @@ -1171,9 +1518,9 @@ bucket_definitions: * * Exposed as a separate test so we can test with more storage parameters. */ -export function testChecksumBatching(generateStorageFactory: storage.TestStorageFactory) { +export function testChecksumBatching(config: storage.TestStorageConfig) { test('checksums for multiple buckets', async () => { - await using factory = await generateStorageFactory(); + await using factory = await config.factory(); const syncRules = await factory.updateSyncRules({ content: ` bucket_definitions: @@ -1185,8 +1532,9 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - const sourceTable = TEST_TABLE; + const sourceTable = test_utils.makeTestTable('test', ['id'], config); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); for (let u of ['u1', 'u2', 'u3', 'u4']) { for (let t of ['t1', 't2', 't3', 't4']) { const id = `${t}_${u}`; diff --git a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts index d079aaa8c..c213832db 100644 --- a/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts +++ b/packages/service-core-tests/src/tests/register-data-storage-parameter-tests.ts @@ -3,7 +3,6 @@ import { ParameterLookup, RequestParameters } from '@powersync/service-sync-rule import { SqlBucketDescriptor } from '@powersync/service-sync-rules/src/SqlBucketDescriptor.js'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; -import { TEST_TABLE } from './util.js'; /** * @example @@ -15,7 +14,10 @@ import { TEST_TABLE } from './util.js'; * * ``` */ -export function registerDataStorageParameterTests(generateStorageFactory: storage.TestStorageFactory) { +export function registerDataStorageParameterTests(config: storage.TestStorageConfig) { + const generateStorageFactory = config.factory; + const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); + test('save and load parameters', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -30,6 +32,8 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); + await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -80,6 +84,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -137,9 +142,10 @@ bucket_definitions: }); const bucketStorage = factory.getInstance(syncRules); - const table = test_utils.makeTestTable('todos', ['id', 'list_id']); + const table = test_utils.makeTestTable('todos', ['id', 'list_id'], config); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); // Create two todos which initially belong to different lists await batch.save({ sourceTable: table, @@ -211,6 +217,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -263,6 +270,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -301,7 +309,7 @@ bucket_definitions: }); test('save and load parameters with workspaceId', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', ['id']); + const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', ['id'], config); await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -318,6 +326,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: WORKSPACE_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -354,7 +363,7 @@ bucket_definitions: }); test('save and load parameters with dynamic global buckets', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace'); + const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', undefined, config); await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -371,6 +380,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: WORKSPACE_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -442,7 +452,7 @@ bucket_definitions: }); test('multiple parameter queries', async () => { - const WORKSPACE_TABLE = test_utils.makeTestTable('workspace'); + const WORKSPACE_TABLE = test_utils.makeTestTable('workspace', undefined, config); await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -461,6 +471,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: WORKSPACE_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -555,6 +566,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, diff --git a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts index 59499fa02..835ed0ec1 100644 --- a/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts +++ b/packages/service-core-tests/src/tests/register-parameter-compacting-tests.ts @@ -3,9 +3,11 @@ import { ParameterLookup } from '@powersync/service-sync-rules'; import { expect, test } from 'vitest'; import * as test_utils from '../test-utils/test-utils-index.js'; -const TEST_TABLE = test_utils.makeTestTable('test', ['id']); +export function registerParameterCompactTests(config: storage.TestStorageConfig) { + const generateStorageFactory = config.factory; + + const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); -export function registerParameterCompactTests(generateStorageFactory: storage.TestStorageFactory) { test('compacting parameters', async () => { await using factory = await generateStorageFactory(); const syncRules = await factory.updateSyncRules({ @@ -19,6 +21,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -102,6 +105,7 @@ bucket_definitions: const bucketStorage = factory.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('1/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, diff --git a/packages/service-core-tests/src/tests/register-sync-tests.ts b/packages/service-core-tests/src/tests/register-sync-tests.ts index e721a2b31..83ec8dcf3 100644 --- a/packages/service-core-tests/src/tests/register-sync-tests.ts +++ b/packages/service-core-tests/src/tests/register-sync-tests.ts @@ -7,7 +7,6 @@ import { utils } from '@powersync/service-core'; import { JSONBig } from '@powersync/service-jsonbig'; -import { BucketSourceType, RequestParameters } from '@powersync/service-sync-rules'; import path from 'path'; import * as timers from 'timers/promises'; import { fileURLToPath } from 'url'; @@ -18,8 +17,6 @@ import { METRICS_HELPER } from '../test-utils/test-utils-index.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); -const TEST_TABLE = test_utils.makeTestTable('test', ['id']); - const BASIC_SYNC_RULES = ` bucket_definitions: mybucket: @@ -37,7 +34,9 @@ export const SYNC_SNAPSHOT_PATH = path.resolve(__dirname, '../__snapshots/sync.t * }); * ``` */ -export function registerSyncTests(factory: storage.TestStorageFactory) { +export function registerSyncTests(config: storage.TestStorageConfig) { + const factory = config.factory; + createCoreAPIMetrics(METRICS_HELPER.metricsEngine); const tracker = new sync.RequestTracker(METRICS_HELPER.metricsEngine); const syncContext = new sync.SyncContext({ @@ -46,6 +45,8 @@ export function registerSyncTests(factory: storage.TestStorageFactory) { maxDataFetchConcurrency: 2 }); + const TEST_TABLE = test_utils.makeTestTable('test', ['id'], config); + test('sync global data', async () => { await using f = await factory(); @@ -55,7 +56,9 @@ export function registerSyncTests(factory: storage.TestStorageFactory) { const bucketStorage = f.getInstance(syncRules); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); + await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -119,7 +122,8 @@ bucket_definitions: const bucketStorage = f.getInstance(syncRules); - const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -184,6 +188,7 @@ bucket_definitions: const bucketStorage = f.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); // Initial data: Add one priority row and 10k low-priority rows. await batch.save({ sourceTable: TEST_TABLE, @@ -297,6 +302,7 @@ bucket_definitions: const bucketStorage = f.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); // Initial data: Add one priority row and 10k low-priority rows. await batch.save({ sourceTable: TEST_TABLE, @@ -441,6 +447,7 @@ bucket_definitions: const bucketStorage = f.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); // Initial data: Add one priority row and 10k low-priority rows. await batch.save({ sourceTable: TEST_TABLE, @@ -573,6 +580,7 @@ bucket_definitions: const bucketStorage = f.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -640,6 +648,7 @@ bucket_definitions: const bucketStorage = await f.getInstance(syncRules); const result = await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -717,6 +726,7 @@ bucket_definitions: const bucketStorage = await f.getInstance(syncRules); // Activate await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/0'); await batch.keepalive('0/0'); }); @@ -788,12 +798,13 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id']); - const listsTable = test_utils.makeTestTable('lists', ['id']); + const usersTable = test_utils.makeTestTable('users', ['id'], config); + const listsTable = test_utils.makeTestTable('lists', ['id'], config); const bucketStorage = await f.getInstance(syncRules); // Activate await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/0'); await batch.keepalive('0/0'); }); @@ -857,12 +868,13 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id']); - const listsTable = test_utils.makeTestTable('lists', ['id']); + const usersTable = test_utils.makeTestTable('users', ['id'], config); + const listsTable = test_utils.makeTestTable('lists', ['id'], config); const bucketStorage = await f.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); await batch.save({ sourceTable: usersTable, tag: storage.SaveOperationTag.INSERT, @@ -937,12 +949,13 @@ bucket_definitions: ` }); - const usersTable = test_utils.makeTestTable('users', ['id']); - const listsTable = test_utils.makeTestTable('lists', ['id']); + const usersTable = test_utils.makeTestTable('users', ['id'], config); + const listsTable = test_utils.makeTestTable('lists', ['id'], config); const bucketStorage = await f.getInstance(syncRules); // Activate await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/0'); await batch.keepalive('0/0'); }); @@ -971,6 +984,7 @@ bucket_definitions: expect(await getCheckpointLines(iter)).toMatchSnapshot(); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); await batch.save({ sourceTable: listsTable, tag: storage.SaveOperationTag.INSERT, @@ -1012,6 +1026,7 @@ bucket_definitions: const bucketStorage = await f.getInstance(syncRules); // Activate await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/0'); await batch.keepalive('0/0'); }); @@ -1060,6 +1075,7 @@ bucket_definitions: const bucketStorage = await f.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, @@ -1118,6 +1134,7 @@ bucket_definitions: // This invalidates the checkpoint we've received above. await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.UPDATE, @@ -1205,6 +1222,7 @@ bucket_definitions: const bucketStorage = f.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); // <= the managed write checkpoint LSN below await batch.commit('0/1'); }); @@ -1243,6 +1261,7 @@ bucket_definitions: }); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); // must be >= the managed write checkpoint LSN await batch.commit('1/0'); }); @@ -1278,6 +1297,7 @@ config: const bucketStorage = f.getInstance(syncRules); await bucketStorage.startBatch(test_utils.BATCH_OPTIONS, async (batch) => { + await batch.markAllSnapshotDone('0/1'); await batch.save({ sourceTable: TEST_TABLE, tag: storage.SaveOperationTag.INSERT, diff --git a/packages/service-core-tests/src/tests/tests-index.ts b/packages/service-core-tests/src/tests/tests-index.ts index 3145bb725..072b14e84 100644 --- a/packages/service-core-tests/src/tests/tests-index.ts +++ b/packages/service-core-tests/src/tests/tests-index.ts @@ -7,4 +7,3 @@ export * from './register-data-storage-checkpoint-tests.js'; export * from './register-migration-tests.js'; export * from './register-sync-tests.js'; export * from './register-report-tests.js'; -export * from './util.js'; diff --git a/packages/service-core-tests/src/tests/util.ts b/packages/service-core-tests/src/tests/util.ts deleted file mode 100644 index 67e90f11c..000000000 --- a/packages/service-core-tests/src/tests/util.ts +++ /dev/null @@ -1,3 +0,0 @@ -import { test_utils } from '../index.js'; - -export const TEST_TABLE = test_utils.makeTestTable('test', ['id']); diff --git a/packages/service-core/src/storage/BucketStorageBatch.ts b/packages/service-core/src/storage/BucketStorageBatch.ts index 62db7dd43..e35a83395 100644 --- a/packages/service-core/src/storage/BucketStorageBatch.ts +++ b/packages/service-core/src/storage/BucketStorageBatch.ts @@ -83,7 +83,9 @@ export interface BucketStorageBatch extends ObserverClient; + markTableSnapshotDone(tables: SourceTable[], no_checkpoint_before_lsn?: string): Promise; + markTableSnapshotRequired(table: SourceTable): Promise; + markAllSnapshotDone(no_checkpoint_before_lsn: string): Promise; updateTableProgress(table: SourceTable, progress: Partial): Promise; diff --git a/packages/service-core/src/storage/BucketStorageFactory.ts b/packages/service-core/src/storage/BucketStorageFactory.ts index 355efec84..87299fc05 100644 --- a/packages/service-core/src/storage/BucketStorageFactory.ts +++ b/packages/service-core/src/storage/BucketStorageFactory.ts @@ -166,3 +166,8 @@ export interface TestStorageOptions { } export type TestStorageFactory = (options?: TestStorageOptions) => Promise; export type TestReportStorageFactory = (options?: TestStorageOptions) => Promise; + +export interface TestStorageConfig { + factory: TestStorageFactory; + tableIdStrings: boolean; +} diff --git a/packages/service-core/src/storage/SourceTable.ts b/packages/service-core/src/storage/SourceTable.ts index 8e5951540..9a36bc125 100644 --- a/packages/service-core/src/storage/SourceTable.ts +++ b/packages/service-core/src/storage/SourceTable.ts @@ -1,9 +1,15 @@ import { DEFAULT_TAG } from '@powersync/service-sync-rules'; import * as util from '../util/util-index.js'; import { ColumnDescriptor, SourceEntityDescriptor } from './SourceEntity.js'; +import { bson } from '../index.js'; + +/** + * Format of the id depends on the bucket storage module. It should be consistent within the module. + */ +export type SourceTableId = string | bson.ObjectId; export interface SourceTableOptions { - id: any; + id: SourceTableId; connectionTag: string; objectId: number | string | undefined; schema: string; diff --git a/packages/service-core/src/sync/util.ts b/packages/service-core/src/sync/util.ts index a458af993..12187aeb5 100644 --- a/packages/service-core/src/sync/util.ts +++ b/packages/service-core/src/sync/util.ts @@ -183,6 +183,16 @@ export function settledPromise(promise: Promise): Promise(settled: Promise>): Promise { + return settled.then((result) => { + if (result.status === 'fulfilled') { + return Promise.resolve(result.value); + } else { + return Promise.reject(result.reason); + } + }); +} + export type MapOrSet = Map | Set; /** diff --git a/packages/service-errors/src/errors.ts b/packages/service-errors/src/errors.ts index 393b35eff..46f8f483c 100644 --- a/packages/service-errors/src/errors.ts +++ b/packages/service-errors/src/errors.ts @@ -151,11 +151,13 @@ export class ServiceAssertionError extends ServiceError { export class ReplicationAbortedError extends ServiceError { static readonly CODE = ErrorCode.PSYNC_S1103; - constructor(description?: string) { + constructor(description?: string, cause?: any) { super({ code: ReplicationAbortedError.CODE, description: description ?? 'Replication aborted' }); + + this.cause = cause; } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 327052e44..6fc506cc7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -339,6 +339,9 @@ importers: jose: specifier: ^4.15.1 version: 4.15.9 + p-defer: + specifier: ^4.0.1 + version: 4.0.1 pgwire: specifier: github:kagis/pgwire#f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87 version: https://codeload.github.com/kagis/pgwire/tar.gz/f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87