11import { Octokit } from "@octokit/rest" ;
2- import { JSDOM } from "jsdom " ;
2+ import { parseHTML } from "linkedom " ;
33import fs , { readFile } from "fs/promises" ;
44import path , { join } from "path" ;
55import { fileURLToPath } from "url" ;
66import { execSync , spawnSync } from "child_process" ;
77import { visualizeTextDiff } from "./text-diff-visualizer" ;
8- import { getTextFromDOM } from "./text-from-element" ;
8+ import { convert } from "html-to-text" ;
9+
910
1011const __dirname = path . dirname ( fileURLToPath ( import . meta. url ) ) ;
1112
@@ -68,8 +69,8 @@ async function fetchPageContent(
6869 throw new Error ( `Failed to fetch ${ url } : ${ response . status } ` ) ;
6970 }
7071 const html = await response . text ( ) ;
71- const dom = new JSDOM ( html ) ;
72- const contentElement = dom . window . document . querySelector ( "#mw-content-text" ) ;
72+ const document = parseHTML ( html ) . document ;
73+ const contentElement = document . querySelector ( "#mw-content-text" ) ;
7374
7475 const selectorsToRemove = [
7576 ".t-navbar" ,
@@ -81,15 +82,15 @@ async function fetchPageContent(
8182 const elements = contentElement ?. querySelectorAll ( selector ) ;
8283 elements ?. forEach ( ( el ) => el . remove ( ) ) ;
8384 }
84- const headingElement = dom . window . document . querySelector ( "#firstHeading" ) ;
85+ const headingElement = document . querySelector ( "#firstHeading" ) ;
8586 if ( ! contentElement ) {
8687 throw new Error ( "Could not find #mw-content-text" ) ;
8788 }
8889 return {
8990 html : contentElement . innerHTML ,
9091 title : headingElement ?. textContent ?. trim ( ) || "" ,
9192 url,
92- innerText : getTextFromDOM ( contentElement ) ,
93+ innerText : ( contentElement as HTMLDivElement ) . innerText
9394 } ;
9495}
9596
@@ -304,16 +305,16 @@ async function createPullRequest(
304305
305306 const newInnerText = await readFile ( getRelativeHTMLPath ( url ) , "utf8" )
306307 . then ( ( data ) => {
307- const dom = new JSDOM ( data ) ;
308- const contentElement = dom . window . document . querySelector ( "main" ) ;
308+ const document = parseHTML ( data ) . document ;
309+ const contentElement = document . querySelector ( "main" ) ;
309310 const selectorsToRemove = [ ".sl-anchor-link" ] ;
310311 for ( const selector of selectorsToRemove ) {
311312 const elements = contentElement ?. querySelectorAll ( selector ) ;
312313 elements ?. forEach ( ( el ) => el . remove ( ) ) ;
313314 }
314315
315316 if ( ! contentElement ) return "" ;
316- return getTextFromDOM ( contentElement ) ;
317+ return ( contentElement as HTMLDivElement ) . innerText ;
317318 } )
318319 . catch ( ( ) => "" ) ;
319320
0 commit comments