@@ -138,7 +138,7 @@ class Recorder {
138138
139139 this . stopping = true ;
140140
141- const domNodes = await this . getFullText ( true ) ;
141+ const domSnapshot = await this . getFullText ( true ) ;
142142
143143 if ( this . behaviorState === BEHAVIOR_RUNNING ) {
144144 this . toggleBehaviors ( ) ;
@@ -159,10 +159,10 @@ class Recorder {
159159 console . log ( e ) ;
160160 }
161161
162- await this . _stop ( domNodes ) ;
162+ await this . _stop ( domSnapshot ) ;
163163 }
164164
165- async _stop ( domNodes = null ) {
165+ async _stop ( domSnapshot = null ) {
166166 clearInterval ( this . _updateStatusId ) ;
167167 clearInterval ( this . _loopId ) ;
168168 clearInterval ( this . _bgFetchId ) ;
@@ -172,7 +172,7 @@ class Recorder {
172172 this . pendingRequests = { } ;
173173 this . numPending = 0 ;
174174
175- await this . commitPage ( this . pageInfo , domNodes , true ) ;
175+ await this . commitPage ( this . pageInfo , domSnapshot , true ) ;
176176
177177 if ( this . _cleaningUp ) {
178178 await this . _cleanupStaleWait ;
@@ -368,6 +368,8 @@ class Recorder {
368368
369369 await this . send ( "Runtime.enable" ) ;
370370
371+ await this . send ( "DOMSnapshot.enable" ) ;
372+
371373 await this . initPixRatio ( ) ;
372374
373375 await this . _doInjectTopFrame ( ) ;
@@ -637,7 +639,8 @@ class Recorder {
637639 try {
638640 // wait upto 10s for getDocument, otherwise proceed
639641 return await Promise . race ( [
640- this . send ( "DOM.getDocument" , { "depth" : - 1 , "pierce" : true } ) ,
642+ //this.send("DOM.getDocument", {"depth": -1, "pierce": true}),
643+ this . send ( "DOMSnapshot.captureSnapshot" , { computedStyles : [ ] } ) ,
641644 sleep ( 10000 )
642645 ] ) ;
643646 } catch ( e ) {
@@ -647,14 +650,14 @@ class Recorder {
647650 }
648651
649652 async unpauseAndFinish ( params ) {
650- let domNodes = null ;
653+ let domSnapshot = null ;
651654
652655 // determine if this is the unload from the injected content script
653656 // if not, unpause but don't extract full text
654657 const ourUnload = ( params . callFrames [ 0 ] . url === MAIN_INJECT_URL ) ;
655658
656659 if ( ourUnload && this . behaviorState !== BEHAVIOR_WAIT_LOAD ) {
657- domNodes = await this . getFullText ( true ) ;
660+ domSnapshot = await this . getFullText ( true ) ;
658661 }
659662
660663 const currPage = this . pageInfo ;
@@ -672,17 +675,17 @@ class Recorder {
672675 if ( ourUnload && this . behaviorState !== BEHAVIOR_WAIT_LOAD ) {
673676 this . flushPending ( ) ;
674677
675- await this . commitPage ( currPage , domNodes , true ) ;
678+ await this . commitPage ( currPage , domSnapshot , true ) ;
676679 }
677680 }
678681
679- commitPage ( currPage , domNodes , finished ) {
682+ commitPage ( currPage , domSnapshot , finished ) {
680683 if ( ! currPage || ! currPage . url || ! currPage . ts || currPage . url === "about:blank" ) {
681684 return ;
682685 }
683686
684- if ( domNodes ) {
685- currPage . text = this . parseTextFromDom ( domNodes ) ;
687+ if ( domSnapshot ) {
688+ currPage . text = this . parseTextFromDOMSnapshot ( domSnapshot ) ;
686689 } else if ( ! currPage . text ) {
687690 console . warn ( "No Full Text Update" ) ;
688691 }
@@ -831,16 +834,16 @@ class Recorder {
831834
832835 const pageInfo = this . pageInfo ;
833836
834- const results = await Promise . all ( [
837+ const [ domSnapshot , favIcon ] = await Promise . all ( [
835838 this . getFullText ( ) ,
836839 this . getFavIcon ( ) ,
837840 ] ) ;
838841
839- if ( results [ 1 ] ) {
840- this . loadFavIcon ( results [ 1 ] , sessions ) ;
842+ if ( favIcon ) {
843+ this . loadFavIcon ( favIcon , sessions ) ;
841844 }
842845
843- await this . commitPage ( this . pageInfo , results [ 0 ] , false ) ;
846+ await this . commitPage ( this . pageInfo , domSnapshot , false ) ;
844847
845848 this . updateStatus ( ) ;
846849
@@ -1400,56 +1403,96 @@ class Recorder {
14001403 return this . _doSendCommand ( method , params , promise ) ;
14011404 }
14021405
1403- parseTextFromDom ( dom ) {
1404- const accum = [ ] ;
1405- const metadata = { } ;
1406+ parseTextFromDOMSnapshot ( result ) {
1407+ const TEXT_NODE = 3 ;
1408+ const ELEMENT_NODE = 1 ;
14061409
1407- this . _parseText ( dom . root , metadata , accum ) ;
1410+ const SKIPPED_NODES = [ "SCRIPT" , "STYLE" , "HEADER" , "FOOTER" , "BANNER-DIV" , "NOSCRIPT" ] ;
14081411
1409- return accum . join ( "\n" ) ;
1410- }
1412+ const { strings, documents} = result ;
14111413
1412- _parseText ( node , metadata , accum ) {
1413- const SKIPPED_NODES = [ "script" , "style" , "header" , "footer" , "banner-div" , "noscript" ] ;
1414- const EMPTY_LIST = [ ] ;
1415- const TEXT = "#text" ;
1416- const TITLE = "title" ;
1417-
1418- const name = node . nodeName . toLowerCase ( ) ;
1419-
1420- if ( SKIPPED_NODES . includes ( name ) ) {
1421- return ;
1422- }
1414+ const accum = [ ] ;
14231415
1424- const children = node . children || EMPTY_LIST ;
1416+ for ( const doc of documents ) {
1417+ const nodeValues = doc . nodes . nodeValue ;
1418+ const nodeNames = doc . nodes . nodeName ;
1419+ const nodeTypes = doc . nodes . nodeType ;
1420+ const parentIndex = doc . nodes . parentIndex ;
14251421
1426- if ( name === TEXT ) {
1427- const value = node . nodeValue ? node . nodeValue . trim ( ) : "" ;
1428- if ( value ) {
1429- accum . push ( value ) ;
1430- }
1431- } else if ( name === TITLE ) {
1432- const title = [ ] ;
1422+ for ( let i = 0 ; i < nodeValues . length ; i ++ ) {
1423+ if ( nodeValues [ i ] === - 1 ) {
1424+ continue ;
1425+ }
14331426
1434- for ( let child of children ) {
1435- this . _parseText ( child , null , title ) ;
1436- }
1437-
1438- if ( metadata ) {
1439- metadata . title = title . join ( " " ) ;
1440- } else {
1441- accum . push ( title . join ( " " ) ) ;
1442- }
1443- } else {
1444- for ( let child of children ) {
1445- this . _parseText ( child , metadata , accum ) ;
1427+ if ( nodeTypes [ i ] === TEXT_NODE ) {
1428+ const pi = parentIndex [ i ] ;
1429+ if ( pi >= 0 && nodeTypes [ pi ] === ELEMENT_NODE ) {
1430+ const name = strings [ nodeNames [ pi ] ] ;
1431+
1432+ if ( ! SKIPPED_NODES . includes ( name ) ) {
1433+ const value = strings [ nodeValues [ i ] ] . trim ( ) ;
1434+ if ( value ) {
1435+ accum . push ( value ) ;
1436+ }
1437+ }
1438+ }
1439+ }
14461440 }
14471441
1448- if ( node . contentDocument ) {
1449- this . _parseText ( node . contentDocument , null , accum ) ;
1450- }
1442+ return accum . join ( "\n" ) ;
14511443 }
14521444 }
1445+
1446+ // parseTextFromDom(dom) {
1447+ // const accum = [];
1448+ // const metadata = {};
1449+
1450+ // this._parseText(dom.root, metadata, accum);
1451+
1452+ // return accum.join("\n");
1453+ // }
1454+
1455+ // _parseText(node, metadata, accum) {
1456+ // const SKIPPED_NODES = ["script", "style", "header", "footer", "banner-div", "noscript"];
1457+ // const EMPTY_LIST = [];
1458+ // const TEXT = "#text";
1459+ // const TITLE = "title";
1460+
1461+ // const name = node.nodeName.toLowerCase();
1462+
1463+ // if (SKIPPED_NODES.includes(name)) {
1464+ // return;
1465+ // }
1466+
1467+ // const children = node.children || EMPTY_LIST;
1468+
1469+ // if (name === TEXT) {
1470+ // const value = node.nodeValue ? node.nodeValue.trim() : "";
1471+ // if (value) {
1472+ // accum.push(value);
1473+ // }
1474+ // } else if (name === TITLE) {
1475+ // const title = [];
1476+
1477+ // for (let child of children) {
1478+ // this._parseText(child, null, title);
1479+ // }
1480+
1481+ // if (metadata) {
1482+ // metadata.title = title.join(" ");
1483+ // } else {
1484+ // accum.push(title.join(" "));
1485+ // }
1486+ // } else {
1487+ // for (let child of children) {
1488+ // this._parseText(child, metadata, accum);
1489+ // }
1490+
1491+ // if (node.contentDocument) {
1492+ // this._parseText(node.contentDocument, null, accum);
1493+ // }
1494+ // }
1495+ // }
14531496}
14541497
14551498export { Recorder } ;
0 commit comments