Generate llms.txt (#784)

petyosi · web-flow · commit 2155e440aacc · 2025-01-08T17:55:54.000+02:00
diff --git a/docs/plugins/build_llms_txt.py b/docs/plugins/build_llms_txt.py
@@ -0,0 +1,35 @@
+from __future__ import annotations as _annotations
+
+import os
+
+from bs4 import BeautifulSoup
+from markdownify import MarkdownConverter
+from mkdocs.config.defaults import MkDocsConfig
+from mkdocs.structure.files import Files
+from mkdocs.structure.pages import Page
+
+
+def on_config(config: MkDocsConfig):
+    os.mkdir(config.site_dir)
+    llms_path = os.path.join(config.site_dir, 'llms.txt')
+    with open(llms_path, 'w') as f:
+        f.write('')
+
+
+def on_page_content(html: str, page: Page, config: MkDocsConfig, files: Files) -> str:
+    soup = BeautifulSoup(html, 'html.parser')
+
+    # Clean up presentational and UI elements
+    for element in soup.find_all(
+        ['a', 'div', 'img'], attrs={'class': ['headerlink', 'tabbed-labels', 'twemoji lg middle', 'twemoji']}
+    ):
+        element.decompose()
+
+    # The API reference generates HTML tables with line numbers, this strips the line numbers cell and goes back to a code block
+    for extra in soup.find_all('table', attrs={'class': 'highlighttable'}):
+        extra.replace_with(BeautifulSoup(f'<pre>{extra.find('code').get_text()}</pre>', 'html.parser'))
+
+    with open(os.path.join(config.site_dir, 'llms.txt'), 'a', encoding='utf-8') as f:
+        f.write(MarkdownConverter().convert_soup(soup))  # type: ignore[reportUnknownMemberType]
+
+    return html
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -308,4 +308,5 @@ plugins:
         "get-started/traces.md": "concepts.md"
 hooks:
   - docs/plugins/main.py
+  - docs/plugins/build_llms_txt.py
   - docs/plugins/build_index.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -174,6 +174,7 @@ docs = [
     "griffe",
     "bs4>=0.0.2",
     "algoliasearch>=3,<4",
+    "markdownify>=0.14.1",
 ]
 
 [tool.inline-snapshot]
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -174,6 +174,7 @@ docs = [`
`174`	`174`	`"griffe",`
`175`	`175`	`"bs4>=0.0.2",`
`176`	`176`	`"algoliasearch>=3,<4",`
	`177`	`+ "markdownify>=0.14.1",`
`177`	`178`	`]`
`178`	`179`
`179`	`180`	`[tool.inline-snapshot]`