|
2 | 2 | Fixtures for the testing of Mkdocs-Macros (pytest) |
3 | 3 | This program must be in the test directory. |
4 | 4 |
|
| 5 | +This is the two classes: |
| 6 | +
|
| 7 | +- DocProject |
| 8 | +- TestMarkdownPage |
| 9 | + |
| 10 | +
|
5 | 11 | (C) Laurent Franceschetti 2024 |
6 | 12 | """ |
7 | 13 |
|
8 | 14 | import os |
9 | | -from io import StringIO |
10 | 15 | import yaml |
11 | 16 | import subprocess |
12 | 17 | import re |
13 | 18 | from dataclasses import dataclass, field |
14 | 19 | from typing import List |
15 | 20 | import json |
16 | 21 | from typing import Any, List |
17 | | -import inspect |
| 22 | + |
18 | 23 |
|
19 | 24 |
|
20 | 25 | # from rich import print |
21 | | -import markdown |
22 | 26 | from bs4 import BeautifulSoup |
23 | | -import pandas as pd |
24 | | -import rich |
25 | | -from rich.table import Table |
26 | 27 |
|
27 | 28 |
|
28 | 29 | "A dictionary where the keys are also accessible with the dot notation" |
29 | 30 | from mkdocs_macros.util import SuperDict |
| 31 | +from .fixture_util import (get_frontmatter, markdown_to_html, get_first_h1, |
| 32 | + find_in_html, find_after, list_markdown_files, find_page, |
| 33 | + run_command) |
30 | 34 |
|
31 | 35 | # --------------------------- |
32 | 36 | # Initialization |
@@ -63,269 +67,6 @@ def list_doc_projects(directory:str): |
63 | 67 | "The error string" |
64 | 68 | MACRO_ERROR_STRING = '# _Macro Rendering Error_' |
65 | 69 |
|
66 | | - |
67 | | -# --------------------------- |
68 | | -# Print functions |
69 | | -# --------------------------- |
70 | | -std_print = print |
71 | | -from rich import print |
72 | | -from rich.panel import Panel |
73 | | - |
74 | | -TITLE_COLOR = 'green' |
75 | | -def h1(s:str, color:str=TITLE_COLOR): |
76 | | - "Color print a 1st level title to the console" |
77 | | - print() |
78 | | - print(Panel(f"[{color} bold]{s}", style=color, width=80)) |
79 | | - |
80 | | -def h2(s:str, color:str=TITLE_COLOR): |
81 | | - "Color print a 2nd level title to the consule" |
82 | | - print() |
83 | | - print(f"[green bold underline]{s}") |
84 | | - |
85 | | -def h3(s:str, color:str=TITLE_COLOR): |
86 | | - "Color print a 2nd level title to the consule" |
87 | | - print() |
88 | | - print(f"[green underline]{s}") |
89 | | - |
90 | | -# --------------------------- |
91 | | -# Low-level functions |
92 | | -# --------------------------- |
93 | | - |
94 | | -def find_after(s:str, word:str, pattern:str): |
95 | | - """ |
96 | | - Find the the first occurence of a pattern after a word |
97 | | - (Both word and pattern can be regex, and the matching |
98 | | - is case insensitive.) |
99 | | - """ |
100 | | - word_pattern = re.compile(word, re.IGNORECASE) |
101 | | - parts = word_pattern.split(s, maxsplit=1) |
102 | | - # parts = s.split(word, 1) |
103 | | - |
104 | | - if len(parts) > 1: |
105 | | - # Strip the remainder and search for the pattern |
106 | | - remainder = parts[1].strip() |
107 | | - match = re.search(pattern, remainder, flags=re.IGNORECASE) |
108 | | - return match.group(0) if match else None |
109 | | - else: |
110 | | - return None |
111 | | - |
112 | | -def list_markdown_files(directory:str): |
113 | | - """ |
114 | | - Makes a list of markdown files in a directory |
115 | | - """ |
116 | | - markdown_files = [] |
117 | | - for root, dirs, files in os.walk(directory): |
118 | | - for file in files: |
119 | | - if file.endswith('.md') or file.endswith('.markdown'): |
120 | | - relative_path = os.path.relpath(os.path.join(root, file), directory) |
121 | | - markdown_files.append(relative_path) |
122 | | - return markdown_files |
123 | | - |
124 | | - |
125 | | -def markdown_to_html(markdown_text): |
126 | | - """Convert markdown text to HTML.""" |
127 | | - html = markdown.markdown(markdown_text, extensions=["tables"]) |
128 | | - # print("HTML:") |
129 | | - # print(html) |
130 | | - return html |
131 | | - |
132 | | - |
133 | | -def style_dataframe(df:pd.DataFrame): |
134 | | - """ |
135 | | - Apply beautiful and colorful styling to any dataframe |
136 | | - (patches the dataframe). |
137 | | - """ |
138 | | - def _rich_str(self): |
139 | | - table = Table(show_header=True, header_style="bold magenta") |
140 | | - |
141 | | - # Add columns |
142 | | - for col in self.columns: |
143 | | - table.add_column(col, style="dim", width=12) |
144 | | - |
145 | | - # Add rows |
146 | | - for row in self.itertuples(index=False): |
147 | | - table.add_row(*map(str, row)) |
148 | | - |
149 | | - return table |
150 | | - |
151 | | - # reassign str to rich (to avoid messing up when rich.print is used) |
152 | | - df.__rich__ = _rich_str.__get__(df) |
153 | | - |
154 | | -def extract_tables_from_html(html:str, formatter:callable=None): |
155 | | - """ |
156 | | - Extract tables from a HTML source and convert them into dataframes |
157 | | - """ |
158 | | - soup = BeautifulSoup(html, 'html.parser') |
159 | | - tables = soup.find_all('table') |
160 | | - |
161 | | - dataframes = {} |
162 | | - unnamed_table_count = 0 |
163 | | - for table in tables: |
164 | | - print("Found a table") |
165 | | - # Find the nearest header |
166 | | - header = table.find_previous(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']) |
167 | | - if header: |
168 | | - header_text = header.get_text() |
169 | | - else: |
170 | | - unnamed_table_count += 1 |
171 | | - header_text = f"Unnamed Table {unnamed_table_count}" |
172 | | - |
173 | | - # Convert HTML table to DataFrame |
174 | | - df = pd.read_html(StringIO(str(table)))[0] |
175 | | - if formatter: |
176 | | - formatter(df) |
177 | | - # Add DataFrame to dictionary with header as key |
178 | | - dataframes[header_text] = df |
179 | | - |
180 | | - return dataframes |
181 | | - |
182 | | - |
183 | | -def get_frontmatter(text:str) -> tuple[str, dict]: |
184 | | - "Get the front matter from a markdown file" |
185 | | - # Split the content to extract the YAML front matter |
186 | | - parts = text.split('---',maxsplit=2) |
187 | | - if len(parts) > 1: |
188 | | - frontmatter = parts[1] |
189 | | - metadata = SuperDict(yaml.safe_load(frontmatter)) |
190 | | - try: |
191 | | - markdown = parts[2] |
192 | | - except IndexError: |
193 | | - markdown = '' |
194 | | - return (markdown.strip(), frontmatter, metadata) |
195 | | - else: |
196 | | - return (text, '', {}) |
197 | | - |
198 | | -def find_in_html(html: str, |
199 | | - pattern: str, |
200 | | - header: str = None, header_level: int = None) -> str | None: |
201 | | - """ |
202 | | - Find a text or regex pattern in a HTML document (case-insensitive) |
203 | | - |
204 | | - Arguments |
205 | | - --------- |
206 | | - - html: the html string |
207 | | - - pattern: the text or regex |
208 | | - - header (text or regex): if specified, it finds it first, |
209 | | - and then looks for the text between that header and the next one |
210 | | - (any level). |
211 | | - - header_level: you can speciy it, if there is a risk of ambiguity. |
212 | | -
|
213 | | - Returns |
214 | | - ------- |
215 | | - The line where the pattern was found, or None |
216 | | - """ |
217 | | - if not isinstance(pattern, str): |
218 | | - pattern = str(pattern) |
219 | | - |
220 | | - soup = BeautifulSoup(html, 'html.parser') |
221 | | - |
222 | | - # Compile regex patterns with case-insensitive flag |
223 | | - pattern_regex = re.compile(pattern, re.IGNORECASE) |
224 | | - |
225 | | - if header: |
226 | | - header_regex = re.compile(header, re.IGNORECASE) |
227 | | - |
228 | | - # Find all headers (h1 to h6) |
229 | | - headers = soup.find_all(re.compile('^h[1-6]$', re.IGNORECASE)) |
230 | | - |
231 | | - for hdr in headers: |
232 | | - if header_regex.search(hdr.text): |
233 | | - # Check if header level is specified and matches |
234 | | - if header_level and hdr.name != f'h{header_level}': |
235 | | - continue |
236 | | - |
237 | | - # Extract text until the next header |
238 | | - text = [] |
239 | | - for sibling in hdr.find_next_siblings(): |
240 | | - if sibling.name and re.match('^h[1-6]$', sibling.name, re.IGNORECASE): |
241 | | - break |
242 | | - text.append(sibling.get_text(separator='\n', strip=True)) |
243 | | - |
244 | | - full_text = '\n'.join(text) |
245 | | - |
246 | | - # Search for the pattern in the extracted text |
247 | | - match = pattern_regex.search(full_text) |
248 | | - if match: |
249 | | - # Find the full line containing the match |
250 | | - lines = full_text.split('\n') |
251 | | - for line in lines: |
252 | | - if pattern_regex.search(line): |
253 | | - return line |
254 | | - else: |
255 | | - # Extract all text from the document |
256 | | - full_text = soup.get_text(separator='\n', strip=True) |
257 | | - |
258 | | - # Search for the pattern in the full text |
259 | | - match = pattern_regex.search(full_text) |
260 | | - if match: |
261 | | - # Find the full line containing the match |
262 | | - lines = full_text.split('\n') |
263 | | - for line in lines: |
264 | | - if pattern_regex.search(line): |
265 | | - return line |
266 | | - |
267 | | - return None |
268 | | - |
269 | | - |
270 | | - |
271 | | - |
272 | | - |
273 | | - |
274 | | -def get_first_h1(markdown_text: str): |
275 | | - """ |
276 | | - Get the first h1 in a markdown file, |
277 | | - ignoring YAML frontmatter and comments. |
278 | | - """ |
279 | | - # Remove YAML frontmatter |
280 | | - yaml_frontmatter_pattern = re.compile(r'^---\s*\n(.*?\n)?---\s*\n', |
281 | | - re.DOTALL) |
282 | | - markdown_text = yaml_frontmatter_pattern.sub('', markdown_text) |
283 | | - # Regular expression to match both syntaxes for level 1 headers |
284 | | - h1_pattern = re.compile(r'^(# .+|.+\n=+)', re.MULTILINE) |
285 | | - match = h1_pattern.search(markdown_text) |
286 | | - if match: |
287 | | - header = match.group(0) |
288 | | - # Remove formatting |
289 | | - if header.startswith('#'): |
290 | | - return header.lstrip('# ').strip() |
291 | | - else: |
292 | | - return header.split('\n')[0].strip() |
293 | | - return None |
294 | | - |
295 | | - |
296 | | - |
297 | | -def get_tables(markdown_text:str) -> dict[pd.DataFrame]: |
298 | | - """ |
299 | | - Convert markdown text to HTML, extract tables, |
300 | | - and convert them to dataframes. |
301 | | - """ |
302 | | - html = markdown_to_html(markdown_text) |
303 | | - dataframes = extract_tables_from_html(html, |
304 | | - formatter=style_dataframe) |
305 | | - return dataframes |
306 | | - |
307 | | - |
308 | | - |
309 | | -# --------------------------- |
310 | | -# OS Functions |
311 | | -# --------------------------- |
312 | | -def run_command(command, *args) -> subprocess.CompletedProcess: |
313 | | - "Execute a command" |
314 | | - full_command = [command] + list(args) |
315 | | - return subprocess.run(full_command, capture_output=True, text=True) |
316 | | - |
317 | | -def get_caller_directory(): |
318 | | - "Get the caller's directory name (to be called from a function)" |
319 | | - # Get the current frame |
320 | | - current_frame = inspect.currentframe() |
321 | | - # Get the caller's frame |
322 | | - caller_frame = inspect.getouterframes(current_frame, 2) |
323 | | - # Get the file name of the caller |
324 | | - caller_file = caller_frame[1].filename |
325 | | - # Get the absolute path of the directory containing the caller file |
326 | | - directory_abspath = os.path.abspath(os.path.dirname(caller_file)) |
327 | | - return directory_abspath |
328 | | - |
329 | 70 | # --------------------------- |
330 | 71 | # Log parsing |
331 | 72 | # --------------------------- |
@@ -824,24 +565,19 @@ def pages(self) -> List[TestMarkdownPage]: |
824 | 565 | return self._pages |
825 | 566 |
|
826 | 567 | def get_page(self, name:str): |
827 | | - "Get the page by its filename or a substring" |
828 | | - print("SEARCHING:", name) |
829 | | - for page in self.pages: |
830 | | - # give priority to exact matches |
831 | | - if name == page.filename: |
832 | | - return page |
833 | | - # try without extension |
834 | | - stem, _ = os.path.splitext(page.filename) |
835 | | - if name == stem: |
836 | | - return page |
837 | | - # try again without full path |
| 568 | + """ |
| 569 | + Find a name in the list of Markdown pages (filenames) |
| 570 | + using a name (full or partial, with or without extension). |
| 571 | + """ |
| 572 | + # get all the filenames of pages: |
| 573 | + filenames = [page.filename for page in self.pages] |
| 574 | + # get the filename we want, from that list: |
| 575 | + filename = find_page(name, filenames) |
| 576 | + # return the corresponding page: |
838 | 577 | for page in self.pages: |
839 | | - if page.filename.endswith(name): |
| 578 | + if page.filename == filename: |
840 | 579 | return page |
841 | | - stem, _ = os.path.splitext(page.filename) |
842 | | - if stem.endswith(name): |
843 | | - return page |
844 | | - print("- NOT FOUND") |
| 580 | + |
845 | 581 |
|
846 | 582 | def get_plugin(self, name:str) -> SuperDict: |
847 | 583 | "Get the plugin by its plugin name" |
|
0 commit comments