fix: update resource estimation calculation

giladgd · giladgd · commit 99938f897d53 · 2025-12-08T19:02:05.000+02:00
diff --git a/src/gguf/insights/GgufInsights.ts b/src/gguf/insights/GgufInsights.ts
@@ -382,8 +382,24 @@ export class GgufInsights {
         const cpuKVCacheSize = this._estimateKvMemorySizeInBytes(kvSize, finalCpuLayers);
 
         // source: `llama_context::graph_max_nodes` in `llama-context.cpp`
-        const maxNodes = Math.max(65536, 5 * tensorInfo.length);
-        const cpuNodes = 5 * (tensorInfo.length * (finalCpuLayers / totalFileLayers));
+        const getMaxNodesMultiplier = (arch: GgufArchitectureType | undefined, nTokens: number): {min: number, multiplier: number} => {
+            if (arch === GgufArchitectureType.qwen3next)
+                return {
+                    min: nTokens * 40,
+                    multiplier: 32
+                };
+
+            return {
+                min: 1024,
+                multiplier: 8
+            };
+        };
+        const maxNodesMultiplier = getMaxNodesMultiplier(
+            this._ggufFileInfo.metadata?.general?.architecture,
+            Math.min(actualContextSize, batchSize)
+        );
+        const maxNodes = Math.max(maxNodesMultiplier.min, maxNodesMultiplier.multiplier * tensorInfo.length);
+        const cpuNodes = maxNodesMultiplier.multiplier * (tensorInfo.length * (finalCpuLayers / totalFileLayers));
         const gpuNodes = maxNodes - cpuNodes;
 
         const gpuComputeBufferSize = (this._llama._consts.ggmlTensorOverhead * gpuNodes) +
diff --git a/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts b/test/modelDependent/functionary/functionaryModelGpuLayersOptions.test.ts
@@ -255,7 +255,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 4.5
                         });
                         expect(res.gpuLayers).to.eql(16);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("3840");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("4096");
                     }
                     try {
                         await resolveGpuLayers(16, {
@@ -343,7 +343,7 @@ describe("functionary", () => {
                             unifiedMemorySize: s1GB * 7.3
                         });
                         expect(res.gpuLayers).to.eql(16);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("1536");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("1792");
                     }
                     {
                         const res = await resolveGpuLayers(16, {
@@ -820,7 +820,7 @@ describe("functionary", () => {
                             unifiedMemorySize: s1GB * 6
                         });
                         expect(res.gpuLayers).to.eql(33);
-                        expect(res.contextSize).to.toMatchInlineSnapshot("2816");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("3072");
                     }
                     {
                         const res = await resolveGpuLayers(33, {
@@ -908,7 +908,7 @@ describe("functionary", () => {
                         freeRam: s1GB * 1
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("458");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("501");
                 }
                 {
                     const res = await resolveGpuLayers("max", {
@@ -918,7 +918,7 @@ describe("functionary", () => {
                         freeRam: s1GB * 1
                     });
                     expect(res.gpuLayers).to.eql(33);
-                    expect(res.contextSize).to.toMatchInlineSnapshot("768");
+                    expect(res.contextSize).to.toMatchInlineSnapshot("1024");
                 }
             });
 
@@ -962,7 +962,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 8
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("7");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7424");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7936");
                     }
                     {
                         const res = await resolveGpuLayers("auto", {
@@ -1125,7 +1125,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 5
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("7");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7424");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7936");
                     }
                     {
                         const res = await resolveGpuLayers("auto", {
@@ -1349,7 +1349,7 @@ describe("functionary", () => {
                         expect(res.gpuLayers).to.be.gte(16);
                         expect(res.gpuLayers).to.be.lte(24);
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("16");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("3840");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("4096");
                     }
                 });
 
@@ -1451,7 +1451,7 @@ describe("functionary", () => {
                         expect(res.gpuLayers).to.be.gte(16);
                         expect(res.gpuLayers).to.be.lte(24);
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("16");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("3840");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("4096");
                     }
                 });
             });
@@ -1479,8 +1479,8 @@ describe("functionary", () => {
                             totalRam: s1GB * 8,
                             freeRam: s1GB * 8
                         });
-                        expect(res.gpuLayers).to.toMatchInlineSnapshot("21");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("6400");
+                        expect(res.gpuLayers).to.toMatchInlineSnapshot("22");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("5376");
                         expect(res.contextSize).to.be.gte(contextSize);
                     }
                     {
@@ -1492,7 +1492,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 8
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("7");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7424");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7936");
                         expect(res.contextSize).to.be.gte(contextSize);
                     }
                     {
@@ -1569,7 +1569,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 7
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("21");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("6400");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("6656");
                         expect(res.contextSize).to.be.gte(contextSize);
                     }
                     {
@@ -1581,7 +1581,7 @@ describe("functionary", () => {
                             freeRam: s1GB * 7
                         });
                         expect(res.gpuLayers).to.toMatchInlineSnapshot("7");
-                        expect(res.contextSize).to.toMatchInlineSnapshot("7424");
+                        expect(res.contextSize).to.toMatchInlineSnapshot("7936");
                         expect(res.contextSize).to.be.gte(contextSize);
                     }
                     {
diff --git a/test/modelDependent/functionary/gguf/ggufInsights.test.ts b/test/modelDependent/functionary/gguf/ggufInsights.test.ts
@@ -124,7 +124,7 @@ describe("gguf", async () => {
                 sequences: context.totalSequences,
                 modelGpuLayers: ggufInsights.totalLayers
             }).gpuVram;
-            expect(toBytes(estimatedContextVramUsage)).toMatchInlineSnapshot("\"1.03GB\"");
+            expect(toBytes(estimatedContextVramUsage)).toMatchInlineSnapshot('"1GB"');
             expect(Math.abs(contextVramUsageDiff - estimatedContextVramUsage)).to.be.lte(s330MB);
 
             await model.dispose();
@@ -190,7 +190,7 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "643.08MB",
+                "cpuRam": "643.45MB",
                 "gpuVram": "0B",
               }
             `);
@@ -201,7 +201,7 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "451.08MB",
+                "cpuRam": "451.45MB",
                 "gpuVram": "0B",
               }
             `);
@@ -214,7 +214,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "1.71GB",
-                "gpuVram": "355.75MB",
+                "gpuVram": "330.78MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -224,8 +224,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "1002.82MB",
-                "gpuVram": "315.75MB",
+                "cpuRam": "1003.17MB",
+                "gpuVram": "290.78MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -235,8 +235,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "630.82MB",
-                "gpuVram": "295.75MB",
+                "cpuRam": "631.17MB",
+                "gpuVram": "270.78MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -246,8 +246,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "444.82MB",
-                "gpuVram": "285.75MB",
+                "cpuRam": "445.17MB",
+                "gpuVram": "260.78MB",
               }
             `);
 
@@ -258,8 +258,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "1022.79MB",
-                "gpuVram": "1.05GB",
+                "cpuRam": "1022.98MB",
+                "gpuVram": "1.03GB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -269,8 +269,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "638.79MB",
-                "gpuVram": "679.75MB",
+                "cpuRam": "638.98MB",
+                "gpuVram": "654.98MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -280,8 +280,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "446.79MB",
-                "gpuVram": "479.75MB",
+                "cpuRam": "446.98MB",
+                "gpuVram": "454.98MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -291,8 +291,8 @@ describe("gguf", async () => {
                 batchSize: 512
             }))).toMatchInlineSnapshot(`
               {
-                "cpuRam": "350.79MB",
-                "gpuVram": "379.75MB",
+                "cpuRam": "350.98MB",
+                "gpuVram": "354.98MB",
               }
             `);
 
@@ -304,7 +304,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "1.78GB",
+                "gpuVram": "1.75GB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -315,7 +315,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "1.03GB",
+                "gpuVram": "1GB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -326,7 +326,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "668.02MB",
+                "gpuVram": "643.45MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -337,7 +337,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "476.02MB",
+                "gpuVram": "451.45MB",
               }
             `);
 
@@ -349,7 +349,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "1.78GB",
+                "gpuVram": "1.75GB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -360,7 +360,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "1.03GB",
+                "gpuVram": "1GB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -371,7 +371,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "668.02MB",
+                "gpuVram": "643.45MB",
               }
             `);
             expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({
@@ -382,7 +382,7 @@ describe("gguf", async () => {
             }))).toMatchInlineSnapshot(`
               {
                 "cpuRam": "250.5MB",
-                "gpuVram": "476.02MB",
+                "gpuVram": "451.45MB",
               }
             `);
         });
diff --git a/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts b/test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts

Original file line number	Diff line number	Diff line change
`@@ -255,7 +255,7 @@ describe("functionary", () => {`
`255`	`255`	`freeRam: s1GB * 4.5`
`256`	`256`	`});`
`257`	`257`	`expect(res.gpuLayers).to.eql(16);`
`258`		`- expect(res.contextSize).to.toMatchInlineSnapshot("3840");`
	`258`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("4096");`
`259`	`259`	`}`
`260`	`260`	`try {`
`261`	`261`	`await resolveGpuLayers(16, {`
`@@ -343,7 +343,7 @@ describe("functionary", () => {`
`343`	`343`	`unifiedMemorySize: s1GB * 7.3`
`344`	`344`	`});`
`345`	`345`	`expect(res.gpuLayers).to.eql(16);`
`346`		`- expect(res.contextSize).to.toMatchInlineSnapshot("1536");`
	`346`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("1792");`
`347`	`347`	`}`
`348`	`348`	`{`
`349`	`349`	`const res = await resolveGpuLayers(16, {`
`@@ -820,7 +820,7 @@ describe("functionary", () => {`
`820`	`820`	`unifiedMemorySize: s1GB * 6`
`821`	`821`	`});`
`822`	`822`	`expect(res.gpuLayers).to.eql(33);`
`823`		`- expect(res.contextSize).to.toMatchInlineSnapshot("2816");`
	`823`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("3072");`
`824`	`824`	`}`
`825`	`825`	`{`
`826`	`826`	`const res = await resolveGpuLayers(33, {`
`@@ -908,7 +908,7 @@ describe("functionary", () => {`
`908`	`908`	`freeRam: s1GB * 1`
`909`	`909`	`});`
`910`	`910`	`expect(res.gpuLayers).to.eql(33);`
`911`		`- expect(res.contextSize).to.toMatchInlineSnapshot("458");`
	`911`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("501");`
`912`	`912`	`}`
`913`	`913`	`{`
`914`	`914`	`const res = await resolveGpuLayers("max", {`
`@@ -918,7 +918,7 @@ describe("functionary", () => {`
`918`	`918`	`freeRam: s1GB * 1`
`919`	`919`	`});`
`920`	`920`	`expect(res.gpuLayers).to.eql(33);`
`921`		`- expect(res.contextSize).to.toMatchInlineSnapshot("768");`
	`921`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("1024");`
`922`	`922`	`}`
`923`	`923`	`});`
`924`	`924`
`@@ -962,7 +962,7 @@ describe("functionary", () => {`
`962`	`962`	`freeRam: s1GB * 8`
`963`	`963`	`});`
`964`	`964`	`expect(res.gpuLayers).to.toMatchInlineSnapshot("7");`
`965`		`- expect(res.contextSize).to.toMatchInlineSnapshot("7424");`
	`965`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("7936");`
`966`	`966`	`}`
`967`	`967`	`{`
`968`	`968`	`const res = await resolveGpuLayers("auto", {`
`@@ -1125,7 +1125,7 @@ describe("functionary", () => {`
`1125`	`1125`	`freeRam: s1GB * 5`
`1126`	`1126`	`});`
`1127`	`1127`	`expect(res.gpuLayers).to.toMatchInlineSnapshot("7");`
`1128`		`- expect(res.contextSize).to.toMatchInlineSnapshot("7424");`
	`1128`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("7936");`
`1129`	`1129`	`}`
`1130`	`1130`	`{`
`1131`	`1131`	`const res = await resolveGpuLayers("auto", {`
`@@ -1349,7 +1349,7 @@ describe("functionary", () => {`
`1349`	`1349`	`expect(res.gpuLayers).to.be.gte(16);`
`1350`	`1350`	`expect(res.gpuLayers).to.be.lte(24);`
`1351`	`1351`	`expect(res.gpuLayers).to.toMatchInlineSnapshot("16");`
`1352`		`- expect(res.contextSize).to.toMatchInlineSnapshot("3840");`
	`1352`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("4096");`
`1353`	`1353`	`}`
`1354`	`1354`	`});`
`1355`	`1355`
`@@ -1451,7 +1451,7 @@ describe("functionary", () => {`
`1451`	`1451`	`expect(res.gpuLayers).to.be.gte(16);`
`1452`	`1452`	`expect(res.gpuLayers).to.be.lte(24);`
`1453`	`1453`	`expect(res.gpuLayers).to.toMatchInlineSnapshot("16");`
`1454`		`- expect(res.contextSize).to.toMatchInlineSnapshot("3840");`
	`1454`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("4096");`
`1455`	`1455`	`}`
`1456`	`1456`	`});`
`1457`	`1457`	`});`
`@@ -1479,8 +1479,8 @@ describe("functionary", () => {`
`1479`	`1479`	`totalRam: s1GB * 8,`
`1480`	`1480`	`freeRam: s1GB * 8`
`1481`	`1481`	`});`
`1482`		`- expect(res.gpuLayers).to.toMatchInlineSnapshot("21");`
`1483`		`- expect(res.contextSize).to.toMatchInlineSnapshot("6400");`
	`1482`	`+ expect(res.gpuLayers).to.toMatchInlineSnapshot("22");`
	`1483`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("5376");`
`1484`	`1484`	`expect(res.contextSize).to.be.gte(contextSize);`
`1485`	`1485`	`}`
`1486`	`1486`	`{`
`@@ -1492,7 +1492,7 @@ describe("functionary", () => {`
`1492`	`1492`	`freeRam: s1GB * 8`
`1493`	`1493`	`});`
`1494`	`1494`	`expect(res.gpuLayers).to.toMatchInlineSnapshot("7");`
`1495`		`- expect(res.contextSize).to.toMatchInlineSnapshot("7424");`
	`1495`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("7936");`
`1496`	`1496`	`expect(res.contextSize).to.be.gte(contextSize);`
`1497`	`1497`	`}`
`1498`	`1498`	`{`
`@@ -1569,7 +1569,7 @@ describe("functionary", () => {`
`1569`	`1569`	`freeRam: s1GB * 7`
`1570`	`1570`	`});`
`1571`	`1571`	`expect(res.gpuLayers).to.toMatchInlineSnapshot("21");`
`1572`		`- expect(res.contextSize).to.toMatchInlineSnapshot("6400");`
	`1572`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("6656");`
`1573`	`1573`	`expect(res.contextSize).to.be.gte(contextSize);`
`1574`	`1574`	`}`
`1575`	`1575`	`{`
`@@ -1581,7 +1581,7 @@ describe("functionary", () => {`
`1581`	`1581`	`freeRam: s1GB * 7`
`1582`	`1582`	`});`
`1583`	`1583`	`expect(res.gpuLayers).to.toMatchInlineSnapshot("7");`
`1584`		`- expect(res.contextSize).to.toMatchInlineSnapshot("7424");`
	`1584`	`+ expect(res.contextSize).to.toMatchInlineSnapshot("7936");`
`1585`	`1585`	`expect(res.contextSize).to.be.gte(contextSize);`
`1586`	`1586`	`}`
`1587`	`1587`	`{`
Original file line number	Diff line number	Diff line change
`@@ -124,7 +124,7 @@ describe("gguf", async () => {`
`124`	`124`	`sequences: context.totalSequences,`
`125`	`125`	`modelGpuLayers: ggufInsights.totalLayers`
`126`	`126`	`}).gpuVram;`
`127`		`- expect(toBytes(estimatedContextVramUsage)).toMatchInlineSnapshot("\"1.03GB\"");`
	`127`	`+ expect(toBytes(estimatedContextVramUsage)).toMatchInlineSnapshot('"1GB"');`
`128`	`128`	`expect(Math.abs(contextVramUsageDiff - estimatedContextVramUsage)).to.be.lte(s330MB);`
`129`	`129`
`130`	`130`	`await model.dispose();`
`@@ -190,7 +190,7 @@ describe("gguf", async () => {`
`190`	`190`	`batchSize: 512`
`191`	`191`	}))).toMatchInlineSnapshot(`
`192`	`192`	`{`
`193`		`- "cpuRam": "643.08MB",`
	`193`	`+ "cpuRam": "643.45MB",`
`194`	`194`	`"gpuVram": "0B",`
`195`	`195`	`}`
`196`	`196`	`);
`@@ -201,7 +201,7 @@ describe("gguf", async () => {`
`201`	`201`	`batchSize: 512`
`202`	`202`	}))).toMatchInlineSnapshot(`
`203`	`203`	`{`
`204`		`- "cpuRam": "451.08MB",`
	`204`	`+ "cpuRam": "451.45MB",`
`205`	`205`	`"gpuVram": "0B",`
`206`	`206`	`}`
`207`	`207`	`);
`@@ -214,7 +214,7 @@ describe("gguf", async () => {`
`214`	`214`	}))).toMatchInlineSnapshot(`
`215`	`215`	`{`
`216`	`216`	`"cpuRam": "1.71GB",`
`217`		`- "gpuVram": "355.75MB",`
	`217`	`+ "gpuVram": "330.78MB",`
`218`	`218`	`}`
`219`	`219`	`);
`220`	`220`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -224,8 +224,8 @@ describe("gguf", async () => {`
`224`	`224`	`batchSize: 512`
`225`	`225`	}))).toMatchInlineSnapshot(`
`226`	`226`	`{`
`227`		`- "cpuRam": "1002.82MB",`
`228`		`- "gpuVram": "315.75MB",`
	`227`	`+ "cpuRam": "1003.17MB",`
	`228`	`+ "gpuVram": "290.78MB",`
`229`	`229`	`}`
`230`	`230`	`);
`231`	`231`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -235,8 +235,8 @@ describe("gguf", async () => {`
`235`	`235`	`batchSize: 512`
`236`	`236`	}))).toMatchInlineSnapshot(`
`237`	`237`	`{`
`238`		`- "cpuRam": "630.82MB",`
`239`		`- "gpuVram": "295.75MB",`
	`238`	`+ "cpuRam": "631.17MB",`
	`239`	`+ "gpuVram": "270.78MB",`
`240`	`240`	`}`
`241`	`241`	`);
`242`	`242`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -246,8 +246,8 @@ describe("gguf", async () => {`
`246`	`246`	`batchSize: 512`
`247`	`247`	}))).toMatchInlineSnapshot(`
`248`	`248`	`{`
`249`		`- "cpuRam": "444.82MB",`
`250`		`- "gpuVram": "285.75MB",`
	`249`	`+ "cpuRam": "445.17MB",`
	`250`	`+ "gpuVram": "260.78MB",`
`251`	`251`	`}`
`252`	`252`	`);
`253`	`253`
`@@ -258,8 +258,8 @@ describe("gguf", async () => {`
`258`	`258`	`batchSize: 512`
`259`	`259`	}))).toMatchInlineSnapshot(`
`260`	`260`	`{`
`261`		`- "cpuRam": "1022.79MB",`
`262`		`- "gpuVram": "1.05GB",`
	`261`	`+ "cpuRam": "1022.98MB",`
	`262`	`+ "gpuVram": "1.03GB",`
`263`	`263`	`}`
`264`	`264`	`);
`265`	`265`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -269,8 +269,8 @@ describe("gguf", async () => {`
`269`	`269`	`batchSize: 512`
`270`	`270`	}))).toMatchInlineSnapshot(`
`271`	`271`	`{`
`272`		`- "cpuRam": "638.79MB",`
`273`		`- "gpuVram": "679.75MB",`
	`272`	`+ "cpuRam": "638.98MB",`
	`273`	`+ "gpuVram": "654.98MB",`
`274`	`274`	`}`
`275`	`275`	`);
`276`	`276`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -280,8 +280,8 @@ describe("gguf", async () => {`
`280`	`280`	`batchSize: 512`
`281`	`281`	}))).toMatchInlineSnapshot(`
`282`	`282`	`{`
`283`		`- "cpuRam": "446.79MB",`
`284`		`- "gpuVram": "479.75MB",`
	`283`	`+ "cpuRam": "446.98MB",`
	`284`	`+ "gpuVram": "454.98MB",`
`285`	`285`	`}`
`286`	`286`	`);
`287`	`287`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -291,8 +291,8 @@ describe("gguf", async () => {`
`291`	`291`	`batchSize: 512`
`292`	`292`	}))).toMatchInlineSnapshot(`
`293`	`293`	`{`
`294`		`- "cpuRam": "350.79MB",`
`295`		`- "gpuVram": "379.75MB",`
	`294`	`+ "cpuRam": "350.98MB",`
	`295`	`+ "gpuVram": "354.98MB",`
`296`	`296`	`}`
`297`	`297`	`);
`298`	`298`
`@@ -304,7 +304,7 @@ describe("gguf", async () => {`
`304`	`304`	}))).toMatchInlineSnapshot(`
`305`	`305`	`{`
`306`	`306`	`"cpuRam": "250.5MB",`
`307`		`- "gpuVram": "1.78GB",`
	`307`	`+ "gpuVram": "1.75GB",`
`308`	`308`	`}`
`309`	`309`	`);
`310`	`310`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -315,7 +315,7 @@ describe("gguf", async () => {`
`315`	`315`	}))).toMatchInlineSnapshot(`
`316`	`316`	`{`
`317`	`317`	`"cpuRam": "250.5MB",`
`318`		`- "gpuVram": "1.03GB",`
	`318`	`+ "gpuVram": "1GB",`
`319`	`319`	`}`
`320`	`320`	`);
`321`	`321`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -326,7 +326,7 @@ describe("gguf", async () => {`
`326`	`326`	}))).toMatchInlineSnapshot(`
`327`	`327`	`{`
`328`	`328`	`"cpuRam": "250.5MB",`
`329`		`- "gpuVram": "668.02MB",`
	`329`	`+ "gpuVram": "643.45MB",`
`330`	`330`	`}`
`331`	`331`	`);
`332`	`332`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -337,7 +337,7 @@ describe("gguf", async () => {`
`337`	`337`	}))).toMatchInlineSnapshot(`
`338`	`338`	`{`
`339`	`339`	`"cpuRam": "250.5MB",`
`340`		`- "gpuVram": "476.02MB",`
	`340`	`+ "gpuVram": "451.45MB",`
`341`	`341`	`}`
`342`	`342`	`);
`343`	`343`
`@@ -349,7 +349,7 @@ describe("gguf", async () => {`
`349`	`349`	}))).toMatchInlineSnapshot(`
`350`	`350`	`{`
`351`	`351`	`"cpuRam": "250.5MB",`
`352`		`- "gpuVram": "1.78GB",`
	`352`	`+ "gpuVram": "1.75GB",`
`353`	`353`	`}`
`354`	`354`	`);
`355`	`355`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -360,7 +360,7 @@ describe("gguf", async () => {`
`360`	`360`	}))).toMatchInlineSnapshot(`
`361`	`361`	`{`
`362`	`362`	`"cpuRam": "250.5MB",`
`363`		`- "gpuVram": "1.03GB",`
	`363`	`+ "gpuVram": "1GB",`
`364`	`364`	`}`
`365`	`365`	`);
`366`	`366`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -371,7 +371,7 @@ describe("gguf", async () => {`
`371`	`371`	}))).toMatchInlineSnapshot(`
`372`	`372`	`{`
`373`	`373`	`"cpuRam": "250.5MB",`
`374`		`- "gpuVram": "668.02MB",`
	`374`	`+ "gpuVram": "643.45MB",`
`375`	`375`	`}`
`376`	`376`	`);
`377`	`377`	`expect(makeEstimationReadable(ggufInsights.estimateContextResourceRequirements({`
`@@ -382,7 +382,7 @@ describe("gguf", async () => {`
`382`	`382`	}))).toMatchInlineSnapshot(`
`383`	`383`	`{`
`384`	`384`	`"cpuRam": "250.5MB",`
`385`		`- "gpuVram": "476.02MB",`
	`385`	`+ "gpuVram": "451.45MB",`
`386`	`386`	`}`
`387`	`387`	`);
`388`	`388`	`});`