Skip to content

Commit 4b575c9

Browse files
feat(migrate): improve diff
1 parent 622bf6b commit 4b575c9

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

migrate/migrate-bot.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,14 @@ async function fetchPageContent(
8484
if (!contentElement) {
8585
throw new Error("Could not find #mw-content-text");
8686
}
87+
88+
const title = headingElement?.textContent?.trim() || "";
89+
8790
return {
8891
html: contentElement.innerHTML,
89-
title: headingElement?.textContent?.trim() || "",
92+
title,
9093
url,
91-
innerText: (contentElement as HTMLDivElement).innerText,
94+
innerText: title + "\n" + (contentElement as HTMLDivElement).innerText,
9295
};
9396
}
9497

migrate/text-diff-visualizer.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ function parseText(text: string): LineData[] {
3636
return text.split("\n").map((line) => {
3737
const parts = line
3838
.trimEnd()
39-
.replace(/[^\p{L}\p{N}]/gu, " ")
39+
.replace(/[,""''"".]/gu, " ")
4040
.split(/\s+/)
4141
.filter((p) => p.length > 0);
4242
const tokens = parts
@@ -105,19 +105,21 @@ export function visualizeTextDiff(textA: string, textB: string) {
105105

106106
let hasDifference = false;
107107

108-
const checkTokens = (tokens: WordToken[] | undefined) => {
108+
const checkTokens = (tokens: WordToken[] | undefined, isRowA: boolean) => {
109109
if (!tokens) return false;
110110
for (const t of tokens) {
111111
if (!t.key) continue;
112112
const countA = freqA.get(t.key) || 0;
113113
const countB = freqB.get(t.key) || 0;
114-
if (countA !== countB) return true;
114+
if (isRowA ? countA > countB : countB > countA) {
115+
return true;
116+
}
115117
}
116118
return false;
117119
};
118120

119-
const diffA = checkTokens(lineRowA?.tokens);
120-
const diffB = checkTokens(lineRowB?.tokens);
121+
const diffA = checkTokens(lineRowA?.tokens, true);
122+
const diffB = checkTokens(lineRowB?.tokens, false);
121123

122124
const emptyA = !lineRowA || lineRowA.tokens.length === 0;
123125
const emptyB = !lineRowB || lineRowB.tokens.length === 0;

0 commit comments

Comments
 (0)