SylphxAI
diff --git a/‎test/handlers/readPdf.test.ts‎
Lines changed: 25 additions & 0 deletions b/‎test/handlers/readPdf.test.ts‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎test/pdf/extractor.test.ts‎
Lines changed: 215 additions & 0 deletions b/‎test/pdf/extractor.test.ts‎
Lines changed: 215 additions & 0 deletions
diff --git a/‎test/pdf/loader.test.ts‎
Lines changed: 141 additions & 0 deletions b/‎test/pdf/loader.test.ts‎
Lines changed: 141 additions & 0 deletions
@@ -748,4 +748,29 @@ describe('handleReadPdfFunc Integration Tests', () => {
     );
     await expect(handler(args)).rejects.toHaveProperty('code', ErrorCode.InvalidParams);
   });
+
+  it('should handle non-Error exceptions during processing', async () => {
+    // Mock to throw non-Error at processSingleSource level
+    // We need to throw something that's not Error or McpError
+    mockGetDocument.mockReset();
+    mockGetDocument.mockImplementation(() => {
+      throw { custom: 'object error' }; // Non-Error, non-McpError
+    });
+
+    const args = { sources: [{ path: 'test.pdf' }] };
+    const result = await handler(args);
+
+    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+    if (result.content?.[0]) {
+      const parsedResult = JSON.parse(result.content[0].text) as ExpectedResultType;
+      expect(parsedResult.results[0]).toBeDefined();
+      if (parsedResult.results[0]) {
+        expect(parsedResult.results[0].success).toBe(false);
+        expect(parsedResult.results[0].error).toContain('Unknown error');
+        expect(parsedResult.results[0].error).toContain('custom');
+      }
+    } else {
+      expect.fail('result.content[0] was undefined');
+    }
+  });
 }); // End top-level describe
@@ -0,0 +1,215 @@
+import type * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import {
+  buildWarnings,
+  extractMetadataAndPageCount,
+  extractPageTexts,
+} from '../../src/pdf/extractor.js';
+
+describe('extractor', () => {
+  describe('extractMetadataAndPageCount', () => {
+    it('should extract metadata using getAll method when available', async () => {
+      const mockMetadata = {
+        info: { PDFFormatVersion: '1.7', IsLinearized: false },
+        metadata: {
+          getAll: vi.fn().mockReturnValue({ Author: 'Test Author', Title: 'Test Title' }),
+        },
+      };
+
+      const mockDocument = {
+        numPages: 5,
+        getMetadata: vi.fn().mockResolvedValue(mockMetadata),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractMetadataAndPageCount(mockDocument, true, true);
+
+      expect(result.num_pages).toBe(5);
+      expect(result.info).toEqual({ PDFFormatVersion: '1.7', IsLinearized: false });
+      expect(result.metadata).toEqual({ Author: 'Test Author', Title: 'Test Title' });
+      expect(mockMetadata.metadata.getAll).toHaveBeenCalled();
+    });
+
+    it('should extract metadata by enumerating properties when getAll is not available', async () => {
+      const mockMetadataObj = {
+        Author: 'Direct Author',
+        Title: 'Direct Title',
+        CreationDate: '2025-01-01',
+      };
+
+      const mockMetadata = {
+        info: { PDFFormatVersion: '1.6' },
+        metadata: mockMetadataObj,
+      };
+
+      const mockDocument = {
+        numPages: 3,
+        getMetadata: vi.fn().mockResolvedValue(mockMetadata),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractMetadataAndPageCount(mockDocument, true, true);
+
+      expect(result.metadata).toEqual({
+        Author: 'Direct Author',
+        Title: 'Direct Title',
+        CreationDate: '2025-01-01',
+      });
+    });
+
+    it('should handle metadata extraction errors gracefully', async () => {
+      const consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
+
+      const mockDocument = {
+        numPages: 2,
+        getMetadata: vi.fn().mockRejectedValue(new Error('Metadata error')),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractMetadataAndPageCount(mockDocument, true, true);
+
+      expect(result.num_pages).toBe(2);
+      expect(result.metadata).toBeUndefined();
+      expect(result.info).toBeUndefined();
+      expect(consoleWarnSpy).toHaveBeenCalledWith(
+        expect.stringContaining('Error extracting metadata: Metadata error')
+      );
+
+      consoleWarnSpy.mockRestore();
+    });
+
+    it('should handle non-Error metadata exceptions', async () => {
+      const consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
+
+      const mockDocument = {
+        numPages: 1,
+        getMetadata: vi.fn().mockRejectedValue('String error'),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractMetadataAndPageCount(mockDocument, true, true);
+
+      expect(result.num_pages).toBe(1);
+      expect(consoleWarnSpy).toHaveBeenCalledWith(
+        expect.stringContaining('Error extracting metadata: String error')
+      );
+
+      consoleWarnSpy.mockRestore();
+    });
+
+    it('should not extract metadata when includeMetadata is false', async () => {
+      const mockDocument = {
+        numPages: 5,
+        getMetadata: vi.fn(),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractMetadataAndPageCount(mockDocument, false, true);
+
+      expect(result.num_pages).toBe(5);
+      expect(result.metadata).toBeUndefined();
+      expect(result.info).toBeUndefined();
+      expect(mockDocument.getMetadata).not.toHaveBeenCalled();
+    });
+
+    it('should not extract page count when includePageCount is false', async () => {
+      const mockDocument = {
+        numPages: 10,
+        getMetadata: vi.fn(),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractMetadataAndPageCount(mockDocument, false, false);
+
+      expect(result.num_pages).toBeUndefined();
+    });
+  });
+
+  describe('extractPageTexts', () => {
+    let consoleWarnSpy: ReturnType<typeof vi.spyOn>;
+
+    beforeEach(() => {
+      consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
+    });
+
+    it('should extract text from specified pages', async () => {
+      const mockPage1 = {
+        getTextContent: vi.fn().mockResolvedValue({
+          items: [{ str: 'Page 1 ' }, { str: 'text' }],
+        }),
+      };
+
+      const mockPage2 = {
+        getTextContent: vi.fn().mockResolvedValue({
+          items: [{ str: 'Page 2 ' }, { str: 'content' }],
+        }),
+      };
+
+      const mockDocument = {
+        getPage: vi
+          .fn()
+          .mockImplementation((pageNum: number) =>
+            Promise.resolve(pageNum === 1 ? mockPage1 : mockPage2)
+          ),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractPageTexts(mockDocument, [1, 2], 'test.pdf');
+
+      expect(result).toEqual([
+        { page: 1, text: 'Page 1 text' },
+        { page: 2, text: 'Page 2 content' },
+      ]);
+    });
+
+    it('should handle page extraction errors gracefully', async () => {
+      const mockDocument = {
+        getPage: vi.fn().mockRejectedValue(new Error('Failed to get page')),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractPageTexts(mockDocument, [1], 'test.pdf');
+
+      expect(result).toEqual([{ page: 1, text: 'Error processing page: Failed to get page' }]);
+      expect(consoleWarnSpy).toHaveBeenCalledWith(
+        expect.stringContaining('Error getting text content for page 1 in test.pdf')
+      );
+    });
+
+    it('should handle non-Error page exceptions', async () => {
+      const mockDocument = {
+        getPage: vi.fn().mockRejectedValue('String error'),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractPageTexts(mockDocument, [1], 'test.pdf');
+
+      expect(result).toEqual([{ page: 1, text: 'Error processing page: String error' }]);
+      expect(consoleWarnSpy).toHaveBeenCalledWith(expect.stringContaining('String error'));
+    });
+
+    it('should sort pages by page number', async () => {
+      const mockPage = {
+        getTextContent: vi.fn().mockResolvedValue({
+          items: [{ str: 'text' }],
+        }),
+      };
+
+      const mockDocument = {
+        getPage: vi.fn().mockResolvedValue(mockPage),
+      } as unknown as pdfjsLib.PDFDocumentProxy;
+
+      const result = await extractPageTexts(mockDocument, [3, 1, 2], 'test.pdf');
+
+      expect(result.map((r) => r.page)).toEqual([1, 2, 3]);
+    });
+  });
+
+  describe('buildWarnings', () => {
+    it('should return empty array when no invalid pages', () => {
+      const warnings = buildWarnings([], 10);
+      expect(warnings).toEqual([]);
+    });
+
+    it('should build warning for invalid pages', () => {
+      const warnings = buildWarnings([11, 12, 15], 10);
+      expect(warnings).toEqual(['Requested page numbers 11, 12, 15 exceed total pages (10).']);
+    });
+
+    it('should build warning for single invalid page', () => {
+      const warnings = buildWarnings([20], 10);
+      expect(warnings).toEqual(['Requested page numbers 20 exceed total pages (10).']);
+    });
+  });
+});
@@ -0,0 +1,141 @@
+import fs from 'node:fs/promises';
+import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
+import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs';
+import { describe, expect, it, vi } from 'vitest';
+import { loadPdfDocument } from '../../src/pdf/loader.js';
+import * as pathUtils from '../../src/utils/pathUtils.js';
+
+vi.mock('node:fs/promises');
+vi.mock('pdfjs-dist/legacy/build/pdf.mjs');
+vi.mock('../../src/utils/pathUtils.js', async () => {
+  const actual = await vi.importActual('../../src/utils/pathUtils.js');
+  return {
+    ...actual,
+    resolvePath: vi.fn(),
+  };
+});
+
+describe('loader', () => {
+  describe('loadPdfDocument', () => {
+    it('should load PDF from local file path', async () => {
+      const mockBuffer = Buffer.from('fake pdf content');
+      const mockDocument = { numPages: 5 };
+
+      vi.mocked(pathUtils.resolvePath).mockReturnValue('/safe/path/test.pdf');
+      vi.mocked(fs.readFile).mockResolvedValue(mockBuffer);
+      vi.mocked(pdfjsLib.getDocument).mockReturnValue({
+        promise: Promise.resolve(mockDocument as unknown as pdfjsLib.PDFDocumentProxy),
+      } as pdfjsLib.PDFDocumentLoadingTask);
+
+      const result = await loadPdfDocument({ path: 'test.pdf' }, 'test.pdf');
+
+      expect(result).toBe(mockDocument);
+      expect(pathUtils.resolvePath).toHaveBeenCalledWith('test.pdf');
+      expect(fs.readFile).toHaveBeenCalledWith('/safe/path/test.pdf');
+    });
+
+    it('should load PDF from URL', async () => {
+      const mockDocument = { numPages: 3 };
+
+      vi.mocked(pdfjsLib.getDocument).mockReturnValue({
+        promise: Promise.resolve(mockDocument as unknown as pdfjsLib.PDFDocumentProxy),
+      } as pdfjsLib.PDFDocumentLoadingTask);
+
+      const result = await loadPdfDocument(
+        { url: 'https://example.com/test.pdf' },
+        'https://example.com/test.pdf'
+      );
+
+      expect(result).toBe(mockDocument);
+      expect(pdfjsLib.getDocument).toHaveBeenCalledWith({
+        url: 'https://example.com/test.pdf',
+      });
+    });
+
+    it('should throw McpError when neither path nor url provided', async () => {
+      await expect(loadPdfDocument({}, 'unknown')).rejects.toThrow(McpError);
+      await expect(loadPdfDocument({}, 'unknown')).rejects.toThrow(
+        "Source unknown missing 'path' or 'url'."
+      );
+    });
+
+    it('should handle file not found error (ENOENT)', async () => {
+      const enoentError = Object.assign(new Error('File not found'), { code: 'ENOENT' });
+
+      vi.mocked(pathUtils.resolvePath).mockReturnValue('/safe/path/missing.pdf');
+      vi.mocked(fs.readFile).mockRejectedValue(enoentError);
+
+      await expect(loadPdfDocument({ path: 'missing.pdf' }, 'missing.pdf')).rejects.toThrow(
+        McpError
+      );
+      await expect(loadPdfDocument({ path: 'missing.pdf' }, 'missing.pdf')).rejects.toThrow(
+        "File not found at 'missing.pdf'."
+      );
+    });
+
+    it('should handle generic file read errors', async () => {
+      vi.mocked(pathUtils.resolvePath).mockReturnValue('/safe/path/error.pdf');
+      vi.mocked(fs.readFile).mockRejectedValue(new Error('Permission denied'));
+
+      await expect(loadPdfDocument({ path: 'error.pdf' }, 'error.pdf')).rejects.toThrow(McpError);
+      await expect(loadPdfDocument({ path: 'error.pdf' }, 'error.pdf')).rejects.toThrow(
+        'Failed to prepare PDF source error.pdf. Reason: Permission denied'
+      );
+    });
+
+    it('should handle non-Error exceptions during file read', async () => {
+      vi.mocked(pathUtils.resolvePath).mockReturnValue('/safe/path/test.pdf');
+      vi.mocked(fs.readFile).mockRejectedValue('String error');
+
+      await expect(loadPdfDocument({ path: 'test.pdf' }, 'test.pdf')).rejects.toThrow(
+        'Failed to prepare PDF source test.pdf. Reason: String error'
+      );
+    });
+
+    it('should handle PDF.js loading errors', async () => {
+      const consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+      const mockBuffer = Buffer.from('fake pdf');
+
+      vi.mocked(pathUtils.resolvePath).mockReturnValue('/safe/path/bad.pdf');
+      vi.mocked(fs.readFile).mockResolvedValue(mockBuffer);
+      vi.mocked(pdfjsLib.getDocument).mockReturnValue({
+        promise: Promise.reject(new Error('Invalid PDF')),
+      } as pdfjsLib.PDFDocumentLoadingTask);
+
+      await expect(loadPdfDocument({ path: 'bad.pdf' }, 'bad.pdf')).rejects.toThrow(McpError);
+      await expect(loadPdfDocument({ path: 'bad.pdf' }, 'bad.pdf')).rejects.toThrow(
+        'Failed to load PDF document from bad.pdf. Reason: Invalid PDF'
+      );
+
+      expect(consoleErrorSpy).toHaveBeenCalledWith(
+        expect.stringContaining('PDF.js loading error for bad.pdf'),
+        expect.any(Error)
+      );
+
+      consoleErrorSpy.mockRestore();
+    });
+
+    it('should handle non-Error PDF.js loading exceptions', async () => {
+      const consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+
+      vi.mocked(pdfjsLib.getDocument).mockReturnValue({
+        promise: Promise.reject('Unknown error'),
+      } as pdfjsLib.PDFDocumentLoadingTask);
+
+      await expect(
+        loadPdfDocument({ url: 'https://example.com/bad.pdf' }, 'https://example.com/bad.pdf')
+      ).rejects.toThrow('Failed to load PDF document from https://example.com/bad.pdf');
+
+      consoleErrorSpy.mockRestore();
+    });
+
+    it('should propagate McpError from resolvePath', async () => {
+      const mcpError = new McpError(ErrorCode.InvalidRequest, 'Path validation failed');
+      vi.mocked(pathUtils.resolvePath).mockImplementation(() => {
+        throw mcpError;
+      });
+
+      await expect(loadPdfDocument({ path: 'test.pdf' }, 'test.pdf')).rejects.toThrow(mcpError);
+    });
+  });
+});