Skip to content

Commit 39deaa9

Browse files
committed
fix: properly encode raw pixel data from PDFs as PNG images
Previous implementation returned raw pixel data (Uint8Array) from pdf.js without encoding, causing "Could not process image" errors in Claude API. Changes: - Add pngjs dependency for PNG encoding - Implement encodePixelsToPNG() to convert raw pixels to PNG format - Support grayscale (1 channel), RGB (3 channels), and RGBA (4 channels) - Implement three-tier image extraction strategy: 1. Check commonObjs for g_ prefixed images 2. Try synchronous objs.get() for loaded images 3. Fallback to async callback with 10s timeout - Update all images to use image/png MIME type - Add comprehensive tests for PNG encoding and image extraction This fix ensures images extracted from PDFs are properly encoded and displayable in Claude.
1 parent 3f36113 commit 39deaa9

File tree

7 files changed

+727
-62
lines changed

7 files changed

+727
-62
lines changed

dist/handlers/readPdf.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,11 @@ export const handleReadPdfFunc = async (args) => {
147147
});
148148
}
149149
else if (item.type === 'image' && item.imageData) {
150-
// Add image content part
150+
// Add image content part (all images are now encoded as PNG)
151151
content.push({
152152
type: 'image',
153153
data: item.imageData.data,
154-
mimeType: item.imageData.format === 'rgba' ? 'image/png' : 'image/jpeg',
154+
mimeType: 'image/png',
155155
});
156156
}
157157
}

dist/pdf/extractor.js

Lines changed: 153 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,42 @@
11
// PDF text and metadata extraction utilities
2+
import { PNG } from 'pngjs';
23
import { OPS } from 'pdfjs-dist/legacy/build/pdf.mjs';
4+
/**
5+
* Encode raw pixel data to PNG format
6+
*/
7+
const encodePixelsToPNG = (pixelData, width, height, channels) => {
8+
const png = new PNG({ width, height });
9+
// Convert pixel data to RGBA format expected by pngjs
10+
if (channels === 4) {
11+
// Already RGBA
12+
png.data = Buffer.from(pixelData);
13+
}
14+
else if (channels === 3) {
15+
// RGB -> RGBA (add alpha channel)
16+
for (let i = 0; i < width * height; i++) {
17+
const srcIdx = i * 3;
18+
const dstIdx = i * 4;
19+
png.data[dstIdx] = pixelData[srcIdx] ?? 0; // R
20+
png.data[dstIdx + 1] = pixelData[srcIdx + 1] ?? 0; // G
21+
png.data[dstIdx + 2] = pixelData[srcIdx + 2] ?? 0; // B
22+
png.data[dstIdx + 3] = 255; // A (fully opaque)
23+
}
24+
}
25+
else if (channels === 1) {
26+
// Grayscale -> RGBA
27+
for (let i = 0; i < width * height; i++) {
28+
const gray = pixelData[i] ?? 0;
29+
const dstIdx = i * 4;
30+
png.data[dstIdx] = gray; // R
31+
png.data[dstIdx + 1] = gray; // G
32+
png.data[dstIdx + 2] = gray; // B
33+
png.data[dstIdx + 3] = 255; // A
34+
}
35+
}
36+
// Encode to PNG and convert to base64
37+
const pngBuffer = PNG.sync.write(png);
38+
return pngBuffer.toString('base64');
39+
};
340
/**
441
* Extract metadata and page count from a PDF document
542
*/
@@ -78,38 +115,83 @@ const extractImagesFromPage = async (page, pageNum) => {
78115
imageIndices.push(i);
79116
}
80117
}
81-
// Extract each image using Promise-based approach
118+
// Extract each image - try sync first, then async if needed
82119
const imagePromises = imageIndices.map((imgIndex, arrayIndex) => new Promise((resolve) => {
83120
const argsArray = operatorList.argsArray[imgIndex];
84121
if (!argsArray || argsArray.length === 0) {
85122
resolve(null);
86123
return;
87124
}
88125
const imageName = argsArray[0];
89-
// Use callback-based get() as images may not be resolved yet
90-
page.objs.get(imageName, (imageData) => {
126+
// Helper to process image data
127+
const processImageData = (imageData) => {
91128
if (!imageData || typeof imageData !== 'object') {
92-
resolve(null);
93-
return;
129+
return null;
94130
}
95131
const img = imageData;
96132
if (!img.data || !img.width || !img.height) {
97-
resolve(null);
98-
return;
133+
return null;
99134
}
100-
// Determine image format based on kind
101-
// kind === 1 = grayscale, 2 = RGB, 3 = RGBA
135+
// Determine number of channels based on kind
136+
// kind === 1 = grayscale (1 channel), 2 = RGB (3 channels), 3 = RGBA (4 channels)
137+
const channels = img.kind === 1 ? 1 : img.kind === 3 ? 4 : 3;
102138
const format = img.kind === 1 ? 'grayscale' : img.kind === 3 ? 'rgba' : 'rgb';
103-
// Convert Uint8Array to base64
104-
const base64 = Buffer.from(img.data).toString('base64');
105-
resolve({
139+
// Encode raw pixel data to PNG format
140+
const pngBase64 = encodePixelsToPNG(img.data, img.width, img.height, channels);
141+
return {
106142
page: pageNum,
107143
index: arrayIndex,
108144
width: img.width,
109145
height: img.height,
110146
format,
111-
data: base64,
112-
});
147+
data: pngBase64,
148+
};
149+
};
150+
// Try to get from commonObjs first if it starts with 'g_'
151+
if (imageName.startsWith('g_')) {
152+
try {
153+
const imageData = page.commonObjs.get(imageName);
154+
if (imageData) {
155+
const result = processImageData(imageData);
156+
resolve(result);
157+
return;
158+
}
159+
}
160+
catch (error) {
161+
const message = error instanceof Error ? error.message : String(error);
162+
console.warn(`[PDF Reader MCP] Error getting image from commonObjs ${imageName}: ${message}`);
163+
}
164+
}
165+
// Try synchronous get first - if image is already loaded
166+
try {
167+
const imageData = page.objs.get(imageName);
168+
if (imageData !== undefined) {
169+
const result = processImageData(imageData);
170+
resolve(result);
171+
return;
172+
}
173+
}
174+
catch (error) {
175+
// Synchronous get failed or not supported, fall through to async
176+
const message = error instanceof Error ? error.message : String(error);
177+
console.warn(`[PDF Reader MCP] Sync image get failed for ${imageName}, trying async: ${message}`);
178+
}
179+
// Fallback to async callback-based get with timeout
180+
let resolved = false;
181+
const timeout = setTimeout(() => {
182+
if (!resolved) {
183+
resolved = true;
184+
console.warn(`[PDF Reader MCP] Image extraction timeout for ${imageName} on page ${String(pageNum)}`);
185+
resolve(null);
186+
}
187+
}, 10000); // 10 second timeout as a safety net
188+
page.objs.get(imageName, (imageData) => {
189+
if (!resolved) {
190+
resolved = true;
191+
clearTimeout(timeout);
192+
const result = processImageData(imageData);
193+
resolve(result);
194+
}
113195
});
114196
}));
115197
const resolvedImages = await Promise.all(imagePromises);
@@ -196,7 +278,7 @@ export const extractPageContent = async (pdfDocument, pageNum, includeImages, so
196278
imageIndices.push(i);
197279
}
198280
}
199-
// Extract each image with its Y-coordinate
281+
// Extract each image with its Y-coordinate - try sync first, then async if needed
200282
const imagePromises = imageIndices.map((imgIndex, arrayIndex) => new Promise((resolve) => {
201283
const argsArray = operatorList.argsArray[imgIndex];
202284
if (!argsArray || argsArray.length === 0) {
@@ -205,32 +287,29 @@ export const extractPageContent = async (pdfDocument, pageNum, includeImages, so
205287
}
206288
const imageName = argsArray[0];
207289
// Get transform matrix from the args (if available)
208-
// The transform is typically in argsArray[1] for some ops
209290
let yPosition = 0;
210291
if (argsArray.length > 1 && Array.isArray(argsArray[1])) {
211292
const transform = argsArray[1];
212-
// transform[5] is the Y coordinate
213293
const yCoord = transform[5];
214294
if (yCoord !== undefined) {
215295
yPosition = Math.round(yCoord);
216296
}
217297
}
218-
// Use callback-based get() as images may not be resolved yet
219-
page.objs.get(imageName, (imageData) => {
298+
// Helper to process image data
299+
const processImageData = (imageData) => {
220300
if (!imageData || typeof imageData !== 'object') {
221-
resolve(null);
222-
return;
301+
return null;
223302
}
224303
const img = imageData;
225304
if (!img.data || !img.width || !img.height) {
226-
resolve(null);
227-
return;
305+
return null;
228306
}
229-
// Determine image format based on kind
307+
// Determine number of channels based on kind
308+
const channels = img.kind === 1 ? 1 : img.kind === 3 ? 4 : 3;
230309
const format = img.kind === 1 ? 'grayscale' : img.kind === 3 ? 'rgba' : 'rgb';
231-
// Convert Uint8Array to base64
232-
const base64 = Buffer.from(img.data).toString('base64');
233-
resolve({
310+
// Encode raw pixel data to PNG format
311+
const pngBase64 = encodePixelsToPNG(img.data, img.width, img.height, channels);
312+
return {
234313
type: 'image',
235314
yPosition,
236315
imageData: {
@@ -239,9 +318,54 @@ export const extractPageContent = async (pdfDocument, pageNum, includeImages, so
239318
width: img.width,
240319
height: img.height,
241320
format,
242-
data: base64,
321+
data: pngBase64,
243322
},
244-
});
323+
};
324+
};
325+
// Try to get from commonObjs first if it starts with 'g_'
326+
if (imageName.startsWith('g_')) {
327+
try {
328+
const imageData = page.commonObjs.get(imageName);
329+
if (imageData) {
330+
const result = processImageData(imageData);
331+
resolve(result);
332+
return;
333+
}
334+
}
335+
catch (error) {
336+
const message = error instanceof Error ? error.message : String(error);
337+
console.warn(`[PDF Reader MCP] Error getting image from commonObjs ${imageName}: ${message}`);
338+
}
339+
}
340+
// Try synchronous get first - if image is already loaded
341+
try {
342+
const imageData = page.objs.get(imageName);
343+
if (imageData !== undefined) {
344+
const result = processImageData(imageData);
345+
resolve(result);
346+
return;
347+
}
348+
}
349+
catch (error) {
350+
const message = error instanceof Error ? error.message : String(error);
351+
console.warn(`[PDF Reader MCP] Sync image get failed for ${imageName}, trying async: ${message}`);
352+
}
353+
// Fallback to async callback-based get with timeout
354+
let resolved = false;
355+
const timeout = setTimeout(() => {
356+
if (!resolved) {
357+
resolved = true;
358+
console.warn(`[PDF Reader MCP] Image extraction timeout for ${imageName} on page ${String(pageNum)}`);
359+
resolve(null);
360+
}
361+
}, 10000); // 10 second timeout as a safety net
362+
page.objs.get(imageName, (imageData) => {
363+
if (!resolved) {
364+
resolved = true;
365+
clearTimeout(timeout);
366+
const result = processImageData(imageData);
367+
resolve(result);
368+
}
245369
});
246370
}));
247371
const resolvedImages = await Promise.all(imagePromises);

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"@modelcontextprotocol/sdk": "1.20.2",
7070
"glob": "^11.0.1",
7171
"pdfjs-dist": "^5.4.296",
72+
"pngjs": "^7.0.0",
7273
"zod": "^3.24.2",
7374
"zod-to-json-schema": "^3.24.5"
7475
},
@@ -78,6 +79,7 @@
7879
"@commitlint/config-conventional": "^19.8.0",
7980
"@types/glob": "^8.1.0",
8081
"@types/node": "^24.0.7",
82+
"@types/pngjs": "^6.0.5",
8183
"@vitest/coverage-v8": "^3.1.1",
8284
"husky": "^9.1.7",
8385
"lint-staged": "^16.2.6",

pnpm-lock.yaml

Lines changed: 19 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/handlers/readPdf.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,11 +207,11 @@ export const handleReadPdfFunc = async (
207207
text: item.textContent,
208208
});
209209
} else if (item.type === 'image' && item.imageData) {
210-
// Add image content part
210+
// Add image content part (all images are now encoded as PNG)
211211
content.push({
212212
type: 'image',
213213
data: item.imageData.data,
214-
mimeType: item.imageData.format === 'rgba' ? 'image/png' : 'image/jpeg',
214+
mimeType: 'image/png',
215215
});
216216
}
217217
}

0 commit comments

Comments
 (0)