Skip to content

Commit 6174bf8

Browse files
authored
Merge pull request #21 -- better support for extracting text and links
Extraction links
2 parents 36bb024 + c2d6b60 commit 6174bf8

File tree

4 files changed

+112
-245
lines changed

4 files changed

+112
-245
lines changed

packages/controller-ext/src/actions/browser/GetSnapshotAction.ts

Lines changed: 10 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,6 @@ import { BrowserOSAdapter, type Snapshot, type SnapshotOptions } from '@/adapter
1313
// Input schema for getSnapshot action
1414
const GetSnapshotInputSchema = z.object({
1515
tabId: z.number().int().positive().describe('Tab ID to get snapshot from'),
16-
type: z.enum(['text', 'links']).describe('Type of snapshot to extract (text or links)'),
17-
options: z.object({
18-
context: z.enum(['visible', 'full']).optional().describe('Context to extract: visible viewport or full page'),
19-
includeSections: z.array(z.enum([
20-
'main',
21-
'navigation',
22-
'footer',
23-
'header',
24-
'article',
25-
'aside',
26-
'complementary',
27-
'contentinfo',
28-
'form',
29-
'search',
30-
'region',
31-
'other'
32-
])).optional().describe('Specific ARIA landmark sections to include')
33-
}).optional().describe('Optional snapshot extraction options')
3416
});
3517

3618
type GetSnapshotInput = z.infer<typeof GetSnapshotInputSchema>;
@@ -39,50 +21,27 @@ type GetSnapshotInput = z.infer<typeof GetSnapshotInputSchema>;
3921
export type GetSnapshotOutput = Snapshot;
4022

4123
/**
42-
* GetSnapshotAction - Extract text or links from a page
24+
* GetSnapshotAction - Extract page content snapshot
4325
*
44-
* Extracts structured content from the page:
45-
* - "text" type: Extracts readable text content organized by ARIA landmarks
46-
* - "links" type: Extracts all links with text and URLs
26+
* Extracts structured content from the page including:
27+
* - Headings (with levels)
28+
* - Text content
29+
* - Links (with URLs)
4730
*
48-
* Options:
49-
* - context: "visible" (viewport only) or "full" (entire page)
50-
* - includeSections: Array of specific ARIA landmarks to include (main, navigation, etc.)
31+
* Returns items in document order with type information.
5132
*
52-
* The snapshot is organized by sections (main, navigation, footer, etc.) for better structure.
53-
*
54-
* Example payloads:
55-
*
56-
* Get all text from visible viewport:
33+
* Example payload:
5734
* {
58-
* "tabId": 123,
59-
* "type": "text",
60-
* "options": { "context": "visible" }
61-
* }
62-
*
63-
* Get links from main content only:
64-
* {
65-
* "tabId": 123,
66-
* "type": "links",
67-
* "options": {
68-
* "context": "full",
69-
* "includeSections": ["main", "article"]
70-
* }
35+
* "tabId": 123
7136
* }
7237
*/
7338
export class GetSnapshotAction extends ActionHandler<GetSnapshotInput, GetSnapshotOutput> {
7439
readonly inputSchema = GetSnapshotInputSchema;
7540
private browserOSAdapter = BrowserOSAdapter.getInstance();
7641

7742
async execute(input: GetSnapshotInput): Promise<GetSnapshotOutput> {
78-
const { tabId, type, options } = input;
79-
80-
const snapshot = await this.browserOSAdapter.getSnapshot(
81-
tabId,
82-
type,
83-
options as SnapshotOptions | undefined
84-
);
85-
43+
const { tabId } = input;
44+
const snapshot = await this.browserOSAdapter.getSnapshot(tabId);
8645
return snapshot;
8746
}
8847
}

packages/controller-ext/src/adapters/BrowserOSAdapter.ts

Lines changed: 21 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -363,69 +363,45 @@ export class BrowserOSAdapter {
363363
}
364364

365365
/**
366-
* Get a content snapshot of the specified type from the page
366+
* Get a content snapshot from the page
367367
*/
368-
async getSnapshot(
369-
tabId: number,
370-
type: SnapshotType,
371-
options?: SnapshotOptions,
372-
): Promise<Snapshot> {
368+
async getSnapshot(tabId: number): Promise<Snapshot> {
373369
try {
374-
logger.debug(`[BrowserOSAdapter] Getting ${type} snapshot for tab ${tabId} with options: ${JSON.stringify(options)}`);
370+
logger.debug(`[BrowserOSAdapter] Getting snapshot for tab ${tabId}`);
375371

376372
return new Promise<Snapshot>((resolve, reject) => {
377-
if (options) {
378-
chrome.browserOS.getSnapshot(
379-
tabId,
380-
type,
381-
options,
382-
(snapshot: Snapshot) => {
383-
if (chrome.runtime.lastError) {
384-
reject(new Error(chrome.runtime.lastError.message));
385-
} else {
386-
logger.debug(`[BrowserOSAdapter] Retrieved ${type} snapshot with ${snapshot.sections.length} sections`);
387-
resolve(snapshot);
388-
}
389-
},
390-
);
391-
} else {
392-
chrome.browserOS.getSnapshot(tabId, type, (snapshot: Snapshot) => {
393-
if (chrome.runtime.lastError) {
394-
reject(new Error(chrome.runtime.lastError.message));
395-
} else {
396-
logger.debug(`[BrowserOSAdapter] Retrieved ${type} snapshot with ${snapshot.sections.length} sections`);
397-
resolve(snapshot);
398-
}
399-
});
400-
}
373+
chrome.browserOS.getSnapshot(tabId, (snapshot: Snapshot) => {
374+
if (chrome.runtime.lastError) {
375+
reject(new Error(chrome.runtime.lastError.message));
376+
} else {
377+
logger.debug(`[BrowserOSAdapter] Retrieved snapshot: ${JSON.stringify(snapshot)}`);
378+
resolve(snapshot);
379+
}
380+
});
401381
});
402382
} catch (error) {
403383
const errorMessage = error instanceof Error ? error.message : String(error);
404-
logger.error(`[BrowserOSAdapter] Failed to get ${type} snapshot: ${errorMessage}`);
405-
throw new Error(`Failed to get ${type} snapshot: ${errorMessage}`);
384+
logger.error(`[BrowserOSAdapter] Failed to get snapshot: ${errorMessage}`);
385+
throw new Error(`Failed to get snapshot: ${errorMessage}`);
406386
}
407387
}
408388

409389
/**
410390
* Get text content snapshot from the page
411-
* Convenience method for text snapshot
391+
* Convenience method (deprecated - use getSnapshot directly)
392+
* @deprecated Use getSnapshot(tabId) instead
412393
*/
413-
async getTextSnapshot(
414-
tabId: number,
415-
options?: SnapshotOptions,
416-
): Promise<Snapshot> {
417-
return this.getSnapshot(tabId, "text", options);
394+
async getTextSnapshot(tabId: number): Promise<Snapshot> {
395+
return this.getSnapshot(tabId);
418396
}
419397

420398
/**
421399
* Get links snapshot from the page
422-
* Convenience method for links snapshot
400+
* Convenience method (deprecated - use getSnapshot directly)
401+
* @deprecated Use getSnapshot(tabId) instead
423402
*/
424-
async getLinksSnapshot(
425-
tabId: number,
426-
options?: SnapshotOptions,
427-
): Promise<Snapshot> {
428-
return this.getSnapshot(tabId, "links", options);
403+
async getLinksSnapshot(tabId: number): Promise<Snapshot> {
404+
return this.getSnapshot(tabId);
429405
}
430406

431407
/**

packages/controller-ext/src/types/chrome-browser-os.d.ts

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -295,25 +295,6 @@ declare namespace chrome.browserOS {
295295

296296
function getSnapshot(
297297
tabId: number,
298-
type: SnapshotType,
299-
options: SnapshotOptions,
300-
callback: (snapshot: Snapshot) => void,
301-
): void;
302-
303-
function getSnapshot(
304-
tabId: number,
305-
type: SnapshotType,
306-
callback: (snapshot: Snapshot) => void,
307-
): void;
308-
309-
function getSnapshot(
310-
type: SnapshotType,
311-
options: SnapshotOptions,
312-
callback: (snapshot: Snapshot) => void,
313-
): void;
314-
315-
function getSnapshot(
316-
type: SnapshotType,
317298
callback: (snapshot: Snapshot) => void,
318299
): void;
319300

0 commit comments

Comments
 (0)