Skip to content

Commit 82ed926

Browse files
refactor(migrate): switch to linkedom
1 parent 43a4182 commit 82ed926

File tree

4 files changed

+75
-678
lines changed

4 files changed

+75
-678
lines changed

migrate/migrate-bot.ts

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import { Octokit } from "@octokit/rest";
2-
import { JSDOM } from "jsdom";
2+
import { parseHTML } from "linkedom";
33
import fs, { readFile } from "fs/promises";
44
import path, { join } from "path";
55
import { fileURLToPath } from "url";
66
import { execSync, spawnSync } from "child_process";
77
import { visualizeTextDiff } from "./text-diff-visualizer";
8-
import { getTextFromDOM } from "./text-from-element";
8+
import { convert } from "html-to-text";
9+
910

1011
const __dirname = path.dirname(fileURLToPath(import.meta.url));
1112

@@ -68,8 +69,8 @@ async function fetchPageContent(
6869
throw new Error(`Failed to fetch ${url}: ${response.status}`);
6970
}
7071
const html = await response.text();
71-
const dom = new JSDOM(html);
72-
const contentElement = dom.window.document.querySelector("#mw-content-text");
72+
const document = parseHTML(html).document;
73+
const contentElement = document.querySelector("#mw-content-text");
7374

7475
const selectorsToRemove = [
7576
".t-navbar",
@@ -81,15 +82,15 @@ async function fetchPageContent(
8182
const elements = contentElement?.querySelectorAll(selector);
8283
elements?.forEach((el) => el.remove());
8384
}
84-
const headingElement = dom.window.document.querySelector("#firstHeading");
85+
const headingElement = document.querySelector("#firstHeading");
8586
if (!contentElement) {
8687
throw new Error("Could not find #mw-content-text");
8788
}
8889
return {
8990
html: contentElement.innerHTML,
9091
title: headingElement?.textContent?.trim() || "",
9192
url,
92-
innerText: getTextFromDOM(contentElement),
93+
innerText: (contentElement as HTMLDivElement).innerText
9394
};
9495
}
9596

@@ -304,16 +305,16 @@ async function createPullRequest(
304305

305306
const newInnerText = await readFile(getRelativeHTMLPath(url), "utf8")
306307
.then((data) => {
307-
const dom = new JSDOM(data);
308-
const contentElement = dom.window.document.querySelector("main");
308+
const document = parseHTML(data).document;
309+
const contentElement = document.querySelector("main");
309310
const selectorsToRemove = [".sl-anchor-link"];
310311
for (const selector of selectorsToRemove) {
311312
const elements = contentElement?.querySelectorAll(selector);
312313
elements?.forEach((el) => el.remove());
313314
}
314315

315316
if (!contentElement) return "";
316-
return getTextFromDOM(contentElement);
317+
return (contentElement as HTMLDivElement).innerText;
317318
})
318319
.catch(() => "");
319320

migrate/text-from-element.ts

Lines changed: 0 additions & 143 deletions
This file was deleted.

0 commit comments

Comments
 (0)