Skip to content

Commit afb0d8d

Browse files
committed
recorder: use the new DOMSnapshot.captureSnapshot() instead of DOM.getDocument() to get text nodes (the latter was hanging occasionally)
1 parent cf627a7 commit afb0d8d

File tree

2 files changed

+99
-56
lines changed

2 files changed

+99
-56
lines changed

src/recorder.js

Lines changed: 98 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ class Recorder {
138138

139139
this.stopping = true;
140140

141-
const domNodes = await this.getFullText(true);
141+
const domSnapshot = await this.getFullText(true);
142142

143143
if (this.behaviorState === BEHAVIOR_RUNNING) {
144144
this.toggleBehaviors();
@@ -159,10 +159,10 @@ class Recorder {
159159
console.log(e);
160160
}
161161

162-
await this._stop(domNodes);
162+
await this._stop(domSnapshot);
163163
}
164164

165-
async _stop(domNodes = null) {
165+
async _stop(domSnapshot = null) {
166166
clearInterval(this._updateStatusId);
167167
clearInterval(this._loopId);
168168
clearInterval(this._bgFetchId);
@@ -172,7 +172,7 @@ class Recorder {
172172
this.pendingRequests = {};
173173
this.numPending = 0;
174174

175-
await this.commitPage(this.pageInfo, domNodes, true);
175+
await this.commitPage(this.pageInfo, domSnapshot, true);
176176

177177
if (this._cleaningUp) {
178178
await this._cleanupStaleWait;
@@ -368,6 +368,8 @@ class Recorder {
368368

369369
await this.send("Runtime.enable");
370370

371+
await this.send("DOMSnapshot.enable");
372+
371373
await this.initPixRatio();
372374

373375
await this._doInjectTopFrame();
@@ -637,7 +639,8 @@ class Recorder {
637639
try {
638640
// wait upto 10s for getDocument, otherwise proceed
639641
return await Promise.race([
640-
this.send("DOM.getDocument", {"depth": -1, "pierce": true}),
642+
//this.send("DOM.getDocument", {"depth": -1, "pierce": true}),
643+
this.send("DOMSnapshot.captureSnapshot", {computedStyles: []}),
641644
sleep(10000)
642645
]);
643646
} catch(e) {
@@ -647,14 +650,14 @@ class Recorder {
647650
}
648651

649652
async unpauseAndFinish(params) {
650-
let domNodes = null;
653+
let domSnapshot = null;
651654

652655
// determine if this is the unload from the injected content script
653656
// if not, unpause but don't extract full text
654657
const ourUnload = (params.callFrames[0].url === MAIN_INJECT_URL);
655658

656659
if (ourUnload && this.behaviorState !== BEHAVIOR_WAIT_LOAD) {
657-
domNodes = await this.getFullText(true);
660+
domSnapshot = await this.getFullText(true);
658661
}
659662

660663
const currPage = this.pageInfo;
@@ -672,17 +675,17 @@ class Recorder {
672675
if (ourUnload && this.behaviorState !== BEHAVIOR_WAIT_LOAD) {
673676
this.flushPending();
674677

675-
await this.commitPage(currPage, domNodes, true);
678+
await this.commitPage(currPage, domSnapshot, true);
676679
}
677680
}
678681

679-
commitPage(currPage, domNodes, finished) {
682+
commitPage(currPage, domSnapshot, finished) {
680683
if (!currPage || !currPage.url || !currPage.ts || currPage.url === "about:blank") {
681684
return;
682685
}
683686

684-
if (domNodes) {
685-
currPage.text = this.parseTextFromDom(domNodes);
687+
if (domSnapshot) {
688+
currPage.text = this.parseTextFromDOMSnapshot(domSnapshot);
686689
} else if (!currPage.text) {
687690
console.warn("No Full Text Update");
688691
}
@@ -831,16 +834,16 @@ class Recorder {
831834

832835
const pageInfo = this.pageInfo;
833836

834-
const results = await Promise.all([
837+
const [domSnapshot, favIcon] = await Promise.all([
835838
this.getFullText(),
836839
this.getFavIcon(),
837840
]);
838841

839-
if (results[1]) {
840-
this.loadFavIcon(results[1], sessions);
842+
if (favIcon) {
843+
this.loadFavIcon(favIcon, sessions);
841844
}
842845

843-
await this.commitPage(this.pageInfo, results[0], false);
846+
await this.commitPage(this.pageInfo, domSnapshot, false);
844847

845848
this.updateStatus();
846849

@@ -1400,56 +1403,96 @@ class Recorder {
14001403
return this._doSendCommand(method, params, promise);
14011404
}
14021405

1403-
parseTextFromDom(dom) {
1404-
const accum = [];
1405-
const metadata = {};
1406+
parseTextFromDOMSnapshot(result) {
1407+
const TEXT_NODE = 3;
1408+
const ELEMENT_NODE = 1;
14061409

1407-
this._parseText(dom.root, metadata, accum);
1410+
const SKIPPED_NODES = ["SCRIPT", "STYLE", "HEADER", "FOOTER", "BANNER-DIV", "NOSCRIPT"];
14081411

1409-
return accum.join("\n");
1410-
}
1412+
const {strings, documents} = result;
14111413

1412-
_parseText(node, metadata, accum) {
1413-
const SKIPPED_NODES = ["script", "style", "header", "footer", "banner-div", "noscript"];
1414-
const EMPTY_LIST = [];
1415-
const TEXT = "#text";
1416-
const TITLE = "title";
1417-
1418-
const name = node.nodeName.toLowerCase();
1419-
1420-
if (SKIPPED_NODES.includes(name)) {
1421-
return;
1422-
}
1414+
const accum = [];
14231415

1424-
const children = node.children || EMPTY_LIST;
1416+
for (const doc of documents) {
1417+
const nodeValues = doc.nodes.nodeValue;
1418+
const nodeNames = doc.nodes.nodeName;
1419+
const nodeTypes = doc.nodes.nodeType;
1420+
const parentIndex = doc.nodes.parentIndex;
14251421

1426-
if (name === TEXT) {
1427-
const value = node.nodeValue ? node.nodeValue.trim() : "";
1428-
if (value) {
1429-
accum.push(value);
1430-
}
1431-
} else if (name === TITLE) {
1432-
const title = [];
1422+
for (let i = 0; i < nodeValues.length; i++) {
1423+
if (nodeValues[i] === -1) {
1424+
continue;
1425+
}
14331426

1434-
for (let child of children) {
1435-
this._parseText(child, null, title);
1436-
}
1437-
1438-
if (metadata) {
1439-
metadata.title = title.join(" ");
1440-
} else {
1441-
accum.push(title.join(" "));
1442-
}
1443-
} else {
1444-
for (let child of children) {
1445-
this._parseText(child, metadata, accum);
1427+
if (nodeTypes[i] === TEXT_NODE) {
1428+
const pi = parentIndex[i];
1429+
if (pi >= 0 && nodeTypes[pi] === ELEMENT_NODE) {
1430+
const name = strings[nodeNames[pi]];
1431+
1432+
if (!SKIPPED_NODES.includes(name)) {
1433+
const value = strings[nodeValues[i]].trim();
1434+
if (value) {
1435+
accum.push(value);
1436+
}
1437+
}
1438+
}
1439+
}
14461440
}
14471441

1448-
if (node.contentDocument) {
1449-
this._parseText(node.contentDocument, null, accum);
1450-
}
1442+
return accum.join("\n");
14511443
}
14521444
}
1445+
1446+
// parseTextFromDom(dom) {
1447+
// const accum = [];
1448+
// const metadata = {};
1449+
1450+
// this._parseText(dom.root, metadata, accum);
1451+
1452+
// return accum.join("\n");
1453+
// }
1454+
1455+
// _parseText(node, metadata, accum) {
1456+
// const SKIPPED_NODES = ["script", "style", "header", "footer", "banner-div", "noscript"];
1457+
// const EMPTY_LIST = [];
1458+
// const TEXT = "#text";
1459+
// const TITLE = "title";
1460+
1461+
// const name = node.nodeName.toLowerCase();
1462+
1463+
// if (SKIPPED_NODES.includes(name)) {
1464+
// return;
1465+
// }
1466+
1467+
// const children = node.children || EMPTY_LIST;
1468+
1469+
// if (name === TEXT) {
1470+
// const value = node.nodeValue ? node.nodeValue.trim() : "";
1471+
// if (value) {
1472+
// accum.push(value);
1473+
// }
1474+
// } else if (name === TITLE) {
1475+
// const title = [];
1476+
1477+
// for (let child of children) {
1478+
// this._parseText(child, null, title);
1479+
// }
1480+
1481+
// if (metadata) {
1482+
// metadata.title = title.join(" ");
1483+
// } else {
1484+
// accum.push(title.join(" "));
1485+
// }
1486+
// } else {
1487+
// for (let child of children) {
1488+
// this._parseText(child, metadata, accum);
1489+
// }
1490+
1491+
// if (node.contentDocument) {
1492+
// this._parseText(node.contentDocument, null, accum);
1493+
// }
1494+
// }
1495+
// }
14531496
}
14541497

14551498
export { Recorder };

wr-ext/bg.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)