Skip to content

Commit 04d20e7

Browse files
feat(migrate): improve diff
1 parent 3d8f987 commit 04d20e7

File tree

2 files changed

+153
-9
lines changed

2 files changed

+153
-9
lines changed

migrate/migrate-bot.ts

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import path, { join } from "path";
55
import { fileURLToPath } from "url";
66
import { execSync, spawnSync } from "child_process";
77
import { visualizeTextDiff } from "./text-diff-visualizer";
8+
import { getTextFromDOM } from "./text-from-element";
89

910
const __dirname = path.dirname(fileURLToPath(import.meta.url));
1011

@@ -88,7 +89,7 @@ async function fetchPageContent(
8889
html: contentElement.innerHTML,
8990
title: headingElement?.textContent?.trim() || "",
9091
url,
91-
innerText: contentElement.textContent?.trim() || "",
92+
innerText: getTextFromDOM(contentElement),
9293
};
9394
}
9495

@@ -101,7 +102,7 @@ async function convertToMDX(
101102
"{{LLM_DOCS}}",
102103
await readFile(
103104
__dirname +
104-
"/../src/content/docs/development/guide/component-docs-for-llm.mdx",
105+
"/../src/content/docs/development/guide/component-docs-for-llm.mdx",
105106
"utf8"
106107
)
107108
);
@@ -305,13 +306,14 @@ async function createPullRequest(
305306
.then((data) => {
306307
const dom = new JSDOM(data);
307308
const contentElement = dom.window.document.querySelector("main");
308-
const selectorsToRemove = ['.sl-anchor-link']
309+
const selectorsToRemove = [".sl-anchor-link"];
309310
for (const selector of selectorsToRemove) {
310311
const elements = contentElement?.querySelectorAll(selector);
311312
elements?.forEach((el) => el.remove());
312313
}
313314

314-
return contentElement?.textContent?.trim() || "";
315+
if (!contentElement) return "";
316+
return getTextFromDOM(contentElement);
315317
})
316318
.catch(() => "");
317319

@@ -453,11 +455,11 @@ async function main() {
453455
if (res.status !== 0) {
454456
throw new Error(
455457
"构建失败,可能生成的MDX有问题:" +
456-
res.stderr?.toString() +
457-
res.stdout?.toString() +
458-
res.error?.toString() +
459-
" exit code " +
460-
res.status
458+
res.stderr?.toString() +
459+
res.stdout?.toString() +
460+
res.error?.toString() +
461+
" exit code " +
462+
res.status
461463
);
462464
}
463465

migrate/text-from-element.ts

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
interface GetTextOptions {
2+
treatBlockAsNewline?: boolean;
3+
collapseSpaces?: boolean;
4+
trimResult?: boolean;
5+
}
6+
7+
const BLOCK_ELEMENTS = [
8+
'div', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
9+
'ul', 'ol', 'li', 'table', 'tr', 'td', 'th',
10+
'section', 'article', 'header', 'footer', 'nav',
11+
'aside', 'main', 'figure', 'figcaption', 'blockquote',
12+
'pre', 'form', 'fieldset', 'legend', 'dl', 'dt', 'dd',
13+
'hr', 'br'
14+
];
15+
16+
function isBlockElement(node: Node): boolean {
17+
if (node.nodeType !== Node.ELEMENT_NODE) return false;
18+
const element = node as HTMLElement;
19+
20+
21+
if (BLOCK_ELEMENTS.includes(element.tagName.toLowerCase())) {
22+
return true;
23+
}
24+
25+
26+
const style = window.getComputedStyle(element);
27+
return style.display === 'block' ||
28+
style.display === 'flex' ||
29+
style.display === 'grid' ||
30+
style.display.startsWith('table');
31+
}
32+
33+
function isElementVisible(element: HTMLElement): boolean {
34+
const style = window.getComputedStyle(element);
35+
return style.display !== 'none' &&
36+
style.visibility !== 'hidden' &&
37+
style.opacity !== '0';
38+
}
39+
40+
export function getTextFromDOM(
41+
node: Node,
42+
options: GetTextOptions = {}
43+
): string {
44+
const {
45+
treatBlockAsNewline = true,
46+
collapseSpaces = true,
47+
trimResult = true
48+
} = options;
49+
50+
let result = '';
51+
let lastChar = '';
52+
53+
function processNode(currentNode: Node, isBlockContext: boolean) {
54+
if (!currentNode) return;
55+
if (currentNode.nodeType === Node.ELEMENT_NODE) {
56+
const element = currentNode as HTMLElement;
57+
if (!isElementVisible(element)) return;
58+
59+
const isBlock = isBlockElement(currentNode);
60+
const tagName = element.tagName.toLowerCase();
61+
62+
if (tagName === 'br') {
63+
result += '\n';
64+
lastChar = '\n';
65+
return;
66+
}
67+
68+
if (tagName === 'hr') {
69+
result += '\n---\n';
70+
lastChar = '\n';
71+
return;
72+
}
73+
74+
75+
if (tagName === 'pre') {
76+
const text = element.textContent || '';
77+
if (text) {
78+
result += text;
79+
lastChar = text[text.length - 1] || '';
80+
}
81+
return;
82+
}
83+
84+
const shouldAddNewline = treatBlockAsNewline && isBlock;
85+
const separator = shouldAddNewline ? '\n' : ' ';
86+
87+
if (isBlock && result.length > 0 && lastChar !== '\n') {
88+
result += separator;
89+
lastChar = separator;
90+
}
91+
92+
const currentIsBlockContext = isBlock || isBlockContext;
93+
for (const childNode of Array.from(element.childNodes)) {
94+
processNode(childNode, currentIsBlockContext);
95+
}
96+
97+
if (isBlock && result.length > 0 && lastChar !== '\n') {
98+
result += separator;
99+
lastChar = separator;
100+
}
101+
} else if (currentNode.nodeType === Node.TEXT_NODE) {
102+
let text = currentNode.textContent || '';
103+
if (text.trim() === '') return;
104+
text = text.replace(/\s+/g, ' ');
105+
if (text.startsWith(' ')) {
106+
if (result.length > 0 && lastChar !== ' ' && lastChar !== '\n') {
107+
result += ' ';
108+
lastChar = ' ';
109+
}
110+
text = text.substring(1);
111+
}
112+
113+
if (text) {
114+
const endsWithSpace = text.endsWith(' ');
115+
const cleanText = endsWithSpace ? text.slice(0, -1) : text;
116+
result += cleanText;
117+
lastChar = cleanText[cleanText.length - 1] || '';
118+
if (endsWithSpace && lastChar !== ' ' && lastChar !== '\n') {
119+
result += ' ';
120+
lastChar = ' ';
121+
}
122+
}
123+
} else {
124+
return;
125+
}
126+
}
127+
128+
129+
const initialIsBlock = isBlockElement(node);
130+
processNode(node, initialIsBlock);
131+
132+
if (collapseSpaces) {
133+
result = result.replace(/[ \t]+/g, ' ');
134+
result = result.replace(/\n{3,}/g, '\n\n');
135+
}
136+
137+
if (trimResult) {
138+
result = result.trim();
139+
}
140+
141+
return result;
142+
}

0 commit comments

Comments
 (0)