From b638b343b691a455bccead375211ea26d4149ae2 Mon Sep 17 00:00:00 2001 From: gagik Date: Thu, 6 Feb 2025 14:18:59 +0100 Subject: [PATCH 1/6] feat(mongodb-log-writer): add `logRetentionGB` configuration MONGOSH-1985 --- .../src/mongo-log-manager.spec.ts | 58 ++++++++++++++----- .../src/mongo-log-manager.ts | 53 ++++++++++++++--- 2 files changed, 90 insertions(+), 21 deletions(-) diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts b/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts index b4652864..0e7716e4 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts @@ -86,6 +86,19 @@ describe('MongoLogManager', function () { } }); + const getFilesState = async (paths: string[]) => { + return ( + await Promise.all( + paths.map((path) => + fs.stat(path).then( + () => 1, + () => 0 + ) + ) + ) + ).join(''); + }; + it('cleans up least recent log files when requested', async function () { const manager = new MongoLogManager({ directory, @@ -106,21 +119,38 @@ describe('MongoLogManager', function () { paths.unshift(filename); } - const getFiles = async () => { - return ( - await Promise.all( - paths.map((path) => - fs.stat(path).then( - () => 1, - () => 0 - ) - ) - ) - ).join(''); - }; - expect(await getFiles()).to.equal('1111111111'); + expect(await getFilesState(paths)).to.equal('1111111111'); + await manager.cleanupOldLogFiles(); + expect(await getFilesState(paths)).to.equal('0000011111'); + }); + + it('cleans up least recent log files when requested with a storage limit', async function () { + const manager = new MongoLogManager({ + directory, + retentionDays, + maxLogFileCount: 1000, + // 6 KB + logRetentionGB: 6 / 1024 / 1024, + onwarn, + onerror, + }); + + const paths: string[] = []; + const offset = Math.floor(Date.now() / 1000); + + // Create 10 files of 1 KB each. + for (let i = 0; i < 10; i++) { + const filename = path.join( + directory, + ObjectId.createFromTime(offset - i).toHexString() + '_log' + ); + await fs.writeFile(filename, '0'.repeat(1024)); + paths.unshift(filename); + } + + expect(await getFilesState(paths)).to.equal('1111111111'); await manager.cleanupOldLogFiles(); - expect(await getFiles()).to.equal('0000011111'); + expect(await getFilesState(paths)).to.equal('0000111111'); }); it('cleaning up old log files is a no-op by default', async function () { diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.ts b/packages/mongodb-log-writer/src/mongo-log-manager.ts index 487c6a2d..d9328c31 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.ts @@ -17,6 +17,8 @@ interface MongoLogOptions { retentionDays: number; /** The maximal number of log files which are kept. */ maxLogFileCount?: number; + /** The maximal GB of log files which are kept. */ + logRetentionGB?: number; /** A handler for warnings related to a specific filesystem path. */ onerror: (err: Error, path: string) => unknown | Promise; /** A handler for errors related to a specific filesystem path. */ @@ -54,8 +56,15 @@ export class MongoLogManager { const leastRecentFileHeap = new Heap<{ fileTimestamp: number; fullPath: string; + fileSize?: number; }>((a, b) => a.fileTimestamp - b.fileTimestamp); + const storageSizeLimit = this._options.logRetentionGB + ? this._options.logRetentionGB * 1024 * 1024 * 1024 + : Infinity; + let usedStorageSize = this._options.logRetentionGB ? 0 : -Infinity; + // eslint-disable-next-line no-console + for await (const dirent of dirHandle) { // Cap the overall time spent inside this function. Consider situations like // a large number of machines using a shared network-mounted $HOME directory @@ -69,23 +78,53 @@ export class MongoLogManager { if (!id) continue; const fileTimestamp = +new ObjectId(id).getTimestamp(); const fullPath = path.join(dir, dirent.name); - let toDelete: string | undefined; + let toDelete: + | { + fullPath: string; + /** If the file wasn't deleted right away and there is a + * retention size limit, its size should be accounted */ + fileSize?: number; + } + | undefined; // If the file is older than expected, delete it. If the file is recent, // add it to the list of seen files, and if that list is too large, remove // the least recent file we've seen so far. if (fileTimestamp < deletionCutoffTimestamp) { - toDelete = fullPath; - } else if (this._options.maxLogFileCount) { - leastRecentFileHeap.push({ fullPath, fileTimestamp }); - if (leastRecentFileHeap.size() > this._options.maxLogFileCount) { - toDelete = leastRecentFileHeap.pop()?.fullPath; + toDelete = { + fullPath, + }; + } else if ( + this._options.logRetentionGB || + this._options.maxLogFileCount + ) { + const fileSize = (await fs.stat(fullPath)).size; + if (this._options.logRetentionGB) { + usedStorageSize += fileSize; + } + + leastRecentFileHeap.push({ + fullPath, + fileTimestamp, + fileSize, + }); + + const reachedMaxStorageSize = usedStorageSize > storageSizeLimit; + const reachedMaxFileCount = + this._options.maxLogFileCount && + leastRecentFileHeap.size() > this._options.maxLogFileCount; + + if (reachedMaxStorageSize || reachedMaxFileCount) { + toDelete = leastRecentFileHeap.pop(); } } if (!toDelete) continue; try { - await fs.unlink(toDelete); + await fs.unlink(toDelete.fullPath); + if (toDelete.fileSize) { + usedStorageSize -= toDelete.fileSize; + } // eslint-disable-next-line @typescript-eslint/no-explicit-any } catch (err: any) { if (err?.code !== 'ENOENT') { From 6e4e929cd51349851a4202bfeca3d1eccd4ee025 Mon Sep 17 00:00:00 2001 From: gagik Date: Fri, 7 Feb 2025 13:03:46 +0100 Subject: [PATCH 2/6] refactor: rename to retentionGB --- .../src/mongo-log-manager.spec.ts | 2 +- .../mongodb-log-writer/src/mongo-log-manager.ts | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts b/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts index 0e7716e4..edebec54 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts @@ -130,7 +130,7 @@ describe('MongoLogManager', function () { retentionDays, maxLogFileCount: 1000, // 6 KB - logRetentionGB: 6 / 1024 / 1024, + retentionGB: 6 / 1024 / 1024, onwarn, onerror, }); diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.ts b/packages/mongodb-log-writer/src/mongo-log-manager.ts index d9328c31..d529be36 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.ts @@ -18,7 +18,7 @@ interface MongoLogOptions { /** The maximal number of log files which are kept. */ maxLogFileCount?: number; /** The maximal GB of log files which are kept. */ - logRetentionGB?: number; + retentionGB?: number; /** A handler for warnings related to a specific filesystem path. */ onerror: (err: Error, path: string) => unknown | Promise; /** A handler for errors related to a specific filesystem path. */ @@ -59,11 +59,10 @@ export class MongoLogManager { fileSize?: number; }>((a, b) => a.fileTimestamp - b.fileTimestamp); - const storageSizeLimit = this._options.logRetentionGB - ? this._options.logRetentionGB * 1024 * 1024 * 1024 + const storageSizeLimit = this._options.retentionGB + ? this._options.retentionGB * 1024 * 1024 * 1024 : Infinity; - let usedStorageSize = this._options.logRetentionGB ? 0 : -Infinity; - // eslint-disable-next-line no-console + let usedStorageSize = this._options.retentionGB ? 0 : -Infinity; for await (const dirent of dirHandle) { // Cap the overall time spent inside this function. Consider situations like @@ -94,12 +93,9 @@ export class MongoLogManager { toDelete = { fullPath, }; - } else if ( - this._options.logRetentionGB || - this._options.maxLogFileCount - ) { + } else if (this._options.retentionGB || this._options.maxLogFileCount) { const fileSize = (await fs.stat(fullPath)).size; - if (this._options.logRetentionGB) { + if (this._options.retentionGB) { usedStorageSize += fileSize; } From bd2e5f9ec81ba12507144a54a50dcb6833864e0e Mon Sep 17 00:00:00 2001 From: gagik Date: Fri, 7 Feb 2025 14:10:42 +0100 Subject: [PATCH 3/6] feat: handle fs.stat error by emitting an error and ignoring the file --- .../src/mongo-log-manager.spec.ts | 54 +++++++++++++++++++ .../src/mongo-log-manager.ts | 16 ++++-- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts b/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts index edebec54..35748ca1 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts @@ -1,6 +1,7 @@ import { MongoLogManager, mongoLogId } from '.'; import { ObjectId } from 'bson'; import { once } from 'events'; +import type { Stats } from 'fs'; import { promises as fs } from 'fs'; import path from 'path'; import os from 'os'; @@ -27,6 +28,7 @@ describe('MongoLogManager', function () { }); afterEach(async function () { await fs.rmdir(directory, { recursive: true }); + sinon.restore(); }); it('allows creating and writing to log files', async function () { @@ -124,6 +126,58 @@ describe('MongoLogManager', function () { expect(await getFilesState(paths)).to.equal('0000011111'); }); + it('if fs.stat fails, it errors and is not considered towards the logs limit', async function () { + const manager = new MongoLogManager({ + directory, + retentionDays, + retentionGB: 3, + onwarn, + onerror, + }); + + const offset = Math.floor(Date.now() / 1000); + + const faultyFile = path.join( + directory, + ObjectId.createFromTime(offset - 10).toHexString() + '_log' + ); + await fs.writeFile(faultyFile, ''); + + const faultyFileError = new Error('test error'); + + const validFiles: string[] = []; + // Create 5 valid files. + for (let i = 5; i >= 0; i--) { + const filename = path.join( + directory, + ObjectId.createFromTime(offset - i).toHexString() + '_log' + ); + await fs.writeFile(filename, ''); + validFiles.push(filename); + } + + expect(onerror).not.called; + + const fsStatStub = sinon.stub(fs, 'stat'); + + fsStatStub.resolves({ + size: 1024 * 1024 * 1024, + } as Stats); + fsStatStub.withArgs(faultyFile).rejects(faultyFileError); + + await manager.cleanupOldLogFiles(); + + expect(onerror).calledOnceWithExactly(faultyFileError, faultyFile); + + // fs.stat is stubbed so getFilesState will not be accurate. + const leftoverFiles = (await fs.readdir(directory)) + .sort() + .map((file) => path.join(directory, file)); + + expect(leftoverFiles).to.have.lengthOf(4); + expect(leftoverFiles).deep.equals([faultyFile, ...validFiles.slice(3)]); + }); + it('cleans up least recent log files when requested with a storage limit', async function () { const manager = new MongoLogManager({ directory, diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.ts b/packages/mongodb-log-writer/src/mongo-log-manager.ts index d529be36..ea1a1a56 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.ts @@ -19,9 +19,9 @@ interface MongoLogOptions { maxLogFileCount?: number; /** The maximal GB of log files which are kept. */ retentionGB?: number; - /** A handler for warnings related to a specific filesystem path. */ - onerror: (err: Error, path: string) => unknown | Promise; /** A handler for errors related to a specific filesystem path. */ + onerror: (err: Error, path: string) => unknown | Promise; + /** A handler for warnings related to a specific filesystem path. */ onwarn: (err: Error, path: string) => unknown | Promise; } @@ -94,9 +94,17 @@ export class MongoLogManager { fullPath, }; } else if (this._options.retentionGB || this._options.maxLogFileCount) { - const fileSize = (await fs.stat(fullPath)).size; + let fileSize: number | undefined; + if (this._options.retentionGB) { - usedStorageSize += fileSize; + try { + fileSize = (await fs.stat(fullPath)).size; + if (this._options.retentionGB) { + usedStorageSize += fileSize; + } + } catch (err) { + this._options.onerror(err as Error, fullPath); + } } leastRecentFileHeap.push({ From b008854360b0231815b4e21b96f6e84bd276eb1f Mon Sep 17 00:00:00 2001 From: gagik Date: Fri, 7 Feb 2025 16:46:34 +0100 Subject: [PATCH 4/6] refactor: use a sorted array of logs in advance --- .../src/mongo-log-manager.ts | 74 +++++++++---------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.ts b/packages/mongodb-log-writer/src/mongo-log-manager.ts index ea1a1a56..f51c296c 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.ts @@ -3,7 +3,6 @@ import { ObjectId } from 'bson'; import { once } from 'events'; import { createWriteStream, promises as fs } from 'fs'; import { createGzip, constants as zlibConstants } from 'zlib'; -import { Heap } from 'heap-js'; import { MongoLogWriter } from './mongo-log-writer'; import { Writable } from 'stream'; @@ -40,9 +39,37 @@ export class MongoLogManager { /** Clean up log files older than `retentionDays`. */ async cleanupOldLogFiles(maxDurationMs = 5_000): Promise { const dir = this._options.directory; - let dirHandle; + const sortedLogFiles: { + fullPath: string; + id: string; + size?: number; + }[] = []; + let usedStorageSize = this._options.retentionGB ? 0 : -Infinity; + try { - dirHandle = await fs.opendir(dir); + const files = await fs.readdir(dir, { withFileTypes: true }); + for (const file of files) { + const { id } = + /^(?[a-f0-9]{24})_log(\.gz)?$/i.exec(file.name)?.groups ?? {}; + + if (!file.isFile() || !id) { + continue; + } + + const fullPath = path.join(dir, file.name); + let size: number | undefined; + if (this._options.retentionGB) { + try { + size = (await fs.stat(fullPath)).size; + usedStorageSize += size; + } catch (err) { + this._options.onerror(err as Error, fullPath); + continue; + } + } + + sortedLogFiles.push({ fullPath, id, size }); + } } catch { return; } @@ -51,32 +78,19 @@ export class MongoLogManager { // Delete files older than N days const deletionCutoffTimestamp = deletionStartTimestamp - this._options.retentionDays * 86400 * 1000; - // Store the known set of least recent files in a heap in order to be able to - // delete all but the most recent N files. - const leastRecentFileHeap = new Heap<{ - fileTimestamp: number; - fullPath: string; - fileSize?: number; - }>((a, b) => a.fileTimestamp - b.fileTimestamp); const storageSizeLimit = this._options.retentionGB ? this._options.retentionGB * 1024 * 1024 * 1024 : Infinity; - let usedStorageSize = this._options.retentionGB ? 0 : -Infinity; - for await (const dirent of dirHandle) { + for await (const { id, fullPath } of [...sortedLogFiles]) { // Cap the overall time spent inside this function. Consider situations like // a large number of machines using a shared network-mounted $HOME directory // where lots and lots of log files end up and filesystem operations happen // with network latency. if (Date.now() - deletionStartTimestamp > maxDurationMs) break; - if (!dirent.isFile()) continue; - const { id } = - /^(?[a-f0-9]{24})_log(\.gz)?$/i.exec(dirent.name)?.groups ?? {}; - if (!id) continue; const fileTimestamp = +new ObjectId(id).getTimestamp(); - const fullPath = path.join(dir, dirent.name); let toDelete: | { fullPath: string; @@ -94,32 +108,13 @@ export class MongoLogManager { fullPath, }; } else if (this._options.retentionGB || this._options.maxLogFileCount) { - let fileSize: number | undefined; - - if (this._options.retentionGB) { - try { - fileSize = (await fs.stat(fullPath)).size; - if (this._options.retentionGB) { - usedStorageSize += fileSize; - } - } catch (err) { - this._options.onerror(err as Error, fullPath); - } - } - - leastRecentFileHeap.push({ - fullPath, - fileTimestamp, - fileSize, - }); - const reachedMaxStorageSize = usedStorageSize > storageSizeLimit; const reachedMaxFileCount = this._options.maxLogFileCount && - leastRecentFileHeap.size() > this._options.maxLogFileCount; + sortedLogFiles.length > this._options.maxLogFileCount; if (reachedMaxStorageSize || reachedMaxFileCount) { - toDelete = leastRecentFileHeap.pop(); + toDelete = sortedLogFiles.shift(); } } @@ -132,8 +127,7 @@ export class MongoLogManager { // eslint-disable-next-line @typescript-eslint/no-explicit-any } catch (err: any) { if (err?.code !== 'ENOENT') { - // eslint-disable-next-line @typescript-eslint/no-unsafe-argument - this._options.onerror(err, fullPath); + this._options.onerror(err as Error, fullPath); } } } From 99afade9793b778dba52b4276dca47757b334f9a Mon Sep 17 00:00:00 2001 From: gagik Date: Mon, 10 Feb 2025 14:53:00 +0100 Subject: [PATCH 5/6] refactor: use a heap-based second run approach Includes test for random OS-based file order as well as mixing different settings together. --- .../src/mongo-log-manager.spec.ts | 170 +++++++++++++++++- .../src/mongo-log-manager.ts | 130 +++++++------- 2 files changed, 235 insertions(+), 65 deletions(-) diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts b/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts index 35748ca1..eb63b50b 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.spec.ts @@ -1,7 +1,7 @@ import { MongoLogManager, mongoLogId } from '.'; import { ObjectId } from 'bson'; import { once } from 'events'; -import type { Stats } from 'fs'; +import type { Stats, Dir } from 'fs'; import { promises as fs } from 'fs'; import path from 'path'; import os from 'os'; @@ -178,7 +178,7 @@ describe('MongoLogManager', function () { expect(leftoverFiles).deep.equals([faultyFile, ...validFiles.slice(3)]); }); - it('cleans up least recent log files when requested with a storage limit', async function () { + it('cleans up least recent log files when over a storage limit', async function () { const manager = new MongoLogManager({ directory, retentionDays, @@ -207,6 +207,172 @@ describe('MongoLogManager', function () { expect(await getFilesState(paths)).to.equal('0000111111'); }); + describe('with a random file order', function () { + let paths: string[] = []; + const times = [92, 90, 1, 2, 3, 91]; + + beforeEach(async function () { + const fileNames: string[] = []; + paths = []; + const offset = Math.floor(Date.now() / 1000); + + for (const time of times) { + const fileName = + ObjectId.createFromTime(offset - time).toHexString() + '_log'; + const fullPath = path.join(directory, fileName); + await fs.writeFile(fullPath, '0'.repeat(1024)); + fileNames.push(fileName); + paths.push(fullPath); + } + + sinon.replace(fs, 'opendir', async () => + Promise.resolve({ + [Symbol.asyncIterator]: function* () { + for (const fileName of fileNames) { + yield { + name: fileName, + isFile: () => true, + }; + } + }, + } as unknown as Dir) + ); + }); + + it('cleans up in the expected order with maxLogFileCount', async function () { + const manager = new MongoLogManager({ + directory, + retentionDays, + maxLogFileCount: 3, + onwarn, + onerror, + }); + + expect(await getFilesState(paths)).to.equal('111111'); + + await manager.cleanupOldLogFiles(); + + expect(await getFilesState(paths)).to.equal('001110'); + }); + + it('cleans up in the expected order with retentionGB', async function () { + const manager = new MongoLogManager({ + directory, + retentionDays, + retentionGB: 3 / 1024 / 1024, + onwarn, + onerror, + }); + + expect(await getFilesState(paths)).to.equal('111111'); + + await manager.cleanupOldLogFiles(); + + expect(await getFilesState(paths)).to.equal('001110'); + }); + }); + + describe('with multiple log retention settings', function () { + it('with retention days, file count, and max size maintains all conditions', async function () { + const manager = new MongoLogManager({ + directory, + retentionDays: 1, + maxLogFileCount: 3, + retentionGB: 2 / 1024 / 1024, + onwarn, + onerror, + }); + + const paths: string[] = []; + + // Create 4 files which are all older than 1 day and 4 which are from today. + for (let i = 0; i < 4; i++) { + const today = Math.floor(Date.now() / 1000); + const yesterday = today - 25 * 60 * 60; + const todayFile = path.join( + directory, + ObjectId.createFromTime(today - i).toHexString() + '_log' + ); + await fs.writeFile(todayFile, '0'.repeat(1024)); + + const yesterdayFile = path.join( + directory, + ObjectId.createFromTime(yesterday - i).toHexString() + '_log' + ); + await fs.writeFile(yesterdayFile, '0'.repeat(1024)); + + paths.unshift(todayFile); + paths.unshift(yesterdayFile); + } + + expect(await getFilesState(paths)).to.equal('11111111'); + + await manager.cleanupOldLogFiles(); + + // All yesterdays files, 2 of today's files should be deleted. + // (because of file count and file size) + expect(await getFilesState(paths)).to.equal('00000101'); + }); + + it('with low GB but high file count maintains both conditions', async function () { + const manager = new MongoLogManager({ + directory, + retentionDays, + maxLogFileCount: 3, + // 2 KB, so 2 files + retentionGB: 2 / 1024 / 1024, + onwarn, + onerror, + }); + + const paths: string[] = []; + const offset = Math.floor(Date.now() / 1000); + + // Create 10 files of 1 KB each. + for (let i = 0; i < 10; i++) { + const filename = path.join( + directory, + ObjectId.createFromTime(offset - i).toHexString() + '_log' + ); + await fs.writeFile(filename, '0'.repeat(1024)); + paths.unshift(filename); + } + + expect(await getFilesState(paths)).to.equal('1111111111'); + await manager.cleanupOldLogFiles(); + expect(await getFilesState(paths)).to.equal('0000000011'); + }); + + it('with high GB but low file count maintains both conditions', async function () { + const manager = new MongoLogManager({ + directory, + retentionDays, + maxLogFileCount: 2, + // 3 KB, so 3 files + retentionGB: 3 / 1024 / 1024, + onwarn, + onerror, + }); + + const paths: string[] = []; + const offset = Math.floor(Date.now() / 1000); + + // Create 10 files of 1 KB each. + for (let i = 0; i < 10; i++) { + const filename = path.join( + directory, + ObjectId.createFromTime(offset - i).toHexString() + '_log' + ); + await fs.writeFile(filename, '0'.repeat(1024)); + paths.unshift(filename); + } + + expect(await getFilesState(paths)).to.equal('1111111111'); + await manager.cleanupOldLogFiles(); + expect(await getFilesState(paths)).to.equal('0000000011'); + }); + }); + it('cleaning up old log files is a no-op by default', async function () { const manager = new MongoLogManager({ directory: path.join('directory', 'nonexistent'), diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.ts b/packages/mongodb-log-writer/src/mongo-log-manager.ts index f51c296c..28b6408f 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.ts @@ -3,6 +3,7 @@ import { ObjectId } from 'bson'; import { once } from 'events'; import { createWriteStream, promises as fs } from 'fs'; import { createGzip, constants as zlibConstants } from 'zlib'; +import { Heap } from 'heap-js'; import { MongoLogWriter } from './mongo-log-writer'; import { Writable } from 'stream'; @@ -36,40 +37,23 @@ export class MongoLogManager { this._options = options; } + private async deleteFile(path: string) { + try { + await fs.unlink(path); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (err: any) { + if (err?.code !== 'ENOENT') { + this._options.onerror(err as Error, path); + } + } + } + /** Clean up log files older than `retentionDays`. */ async cleanupOldLogFiles(maxDurationMs = 5_000): Promise { const dir = this._options.directory; - const sortedLogFiles: { - fullPath: string; - id: string; - size?: number; - }[] = []; - let usedStorageSize = this._options.retentionGB ? 0 : -Infinity; - + let dirHandle; try { - const files = await fs.readdir(dir, { withFileTypes: true }); - for (const file of files) { - const { id } = - /^(?[a-f0-9]{24})_log(\.gz)?$/i.exec(file.name)?.groups ?? {}; - - if (!file.isFile() || !id) { - continue; - } - - const fullPath = path.join(dir, file.name); - let size: number | undefined; - if (this._options.retentionGB) { - try { - size = (await fs.stat(fullPath)).size; - usedStorageSize += size; - } catch (err) { - this._options.onerror(err as Error, fullPath); - continue; - } - } - - sortedLogFiles.push({ fullPath, id, size }); - } + dirHandle = await fs.opendir(dir); } catch { return; } @@ -78,58 +62,78 @@ export class MongoLogManager { // Delete files older than N days const deletionCutoffTimestamp = deletionStartTimestamp - this._options.retentionDays * 86400 * 1000; + // Store the known set of least recent files in a heap in order to be able to + // delete all but the most recent N files. + const leastRecentFileHeap = new Heap<{ + fileTimestamp: number; + fullPath: string; + fileSize: number | undefined; + }>((a, b) => a.fileTimestamp - b.fileTimestamp); - const storageSizeLimit = this._options.retentionGB - ? this._options.retentionGB * 1024 * 1024 * 1024 - : Infinity; + let usedStorageSize = this._options.retentionGB ? 0 : -Infinity; - for await (const { id, fullPath } of [...sortedLogFiles]) { + for await (const dirent of dirHandle) { // Cap the overall time spent inside this function. Consider situations like // a large number of machines using a shared network-mounted $HOME directory // where lots and lots of log files end up and filesystem operations happen // with network latency. if (Date.now() - deletionStartTimestamp > maxDurationMs) break; + if (!dirent.isFile()) continue; + const { id } = + /^(?[a-f0-9]{24})_log(\.gz)?$/i.exec(dirent.name)?.groups ?? {}; + if (!id) continue; + const fileTimestamp = +new ObjectId(id).getTimestamp(); - let toDelete: - | { - fullPath: string; - /** If the file wasn't deleted right away and there is a - * retention size limit, its size should be accounted */ - fileSize?: number; - } - | undefined; + const fullPath = path.join(dir, dirent.name); // If the file is older than expected, delete it. If the file is recent, // add it to the list of seen files, and if that list is too large, remove // the least recent file we've seen so far. if (fileTimestamp < deletionCutoffTimestamp) { - toDelete = { - fullPath, - }; - } else if (this._options.retentionGB || this._options.maxLogFileCount) { - const reachedMaxStorageSize = usedStorageSize > storageSizeLimit; - const reachedMaxFileCount = - this._options.maxLogFileCount && - sortedLogFiles.length > this._options.maxLogFileCount; - - if (reachedMaxStorageSize || reachedMaxFileCount) { - toDelete = sortedLogFiles.shift(); - } + await this.deleteFile(fullPath); + continue; } - if (!toDelete) continue; - try { - await fs.unlink(toDelete.fullPath); - if (toDelete.fileSize) { - usedStorageSize -= toDelete.fileSize; - } - // eslint-disable-next-line @typescript-eslint/no-explicit-any - } catch (err: any) { - if (err?.code !== 'ENOENT') { + let fileSize: number | undefined; + if (this._options.retentionGB) { + try { + fileSize = (await fs.stat(fullPath)).size; + usedStorageSize += fileSize; + } catch (err) { this._options.onerror(err as Error, fullPath); + continue; } } + + if (this._options.maxLogFileCount || this._options.retentionGB) { + leastRecentFileHeap.push({ fullPath, fileTimestamp, fileSize }); + } + + if ( + this._options.maxLogFileCount && + leastRecentFileHeap.size() > this._options.maxLogFileCount + ) { + const toDelete = leastRecentFileHeap.pop(); + if (!toDelete) continue; + await this.deleteFile(toDelete.fullPath); + usedStorageSize -= toDelete.fileSize ?? 0; + } + } + + if (this._options.retentionGB) { + const storageSizeLimit = this._options.retentionGB * 1024 * 1024 * 1024; + + for (const file of leastRecentFileHeap) { + if (Date.now() - deletionStartTimestamp > maxDurationMs) break; + + if (usedStorageSize <= storageSizeLimit) break; + + if (!file.fileSize) continue; + + await this.deleteFile(file.fullPath); + usedStorageSize -= file.fileSize; + } } } From 9f3c1bfad1c0ea058a6f6a036d936b3efe2c4d0b Mon Sep 17 00:00:00 2001 From: gagik Date: Mon, 10 Feb 2025 15:55:07 +0100 Subject: [PATCH 6/6] fix: minor type and comment changes --- packages/mongodb-log-writer/src/mongo-log-manager.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/mongodb-log-writer/src/mongo-log-manager.ts b/packages/mongodb-log-writer/src/mongo-log-manager.ts index 28b6408f..180cf517 100644 --- a/packages/mongodb-log-writer/src/mongo-log-manager.ts +++ b/packages/mongodb-log-writer/src/mongo-log-manager.ts @@ -17,7 +17,7 @@ interface MongoLogOptions { retentionDays: number; /** The maximal number of log files which are kept. */ maxLogFileCount?: number; - /** The maximal GB of log files which are kept. */ + /** The maximal size of log files which are kept. */ retentionGB?: number; /** A handler for errors related to a specific filesystem path. */ onerror: (err: Error, path: string) => unknown | Promise; @@ -37,7 +37,7 @@ export class MongoLogManager { this._options = options; } - private async deleteFile(path: string) { + private async deleteFile(path: string): Promise { try { await fs.unlink(path); // eslint-disable-next-line @typescript-eslint/no-explicit-any