From 96c590f3a5630e0e96c88d98fa329519d79575d2 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 4 Sep 2025 13:26:10 -0700 Subject: [PATCH 1/7] WIP: Refactor node handling --- html_tstring/element.py | 119 ------------- html_tstring/element_test.py | 236 ------------------------- html_tstring/html.py | 164 +++++++++++------ html_tstring/html_test.py | 333 ++++++++++++++++++----------------- html_tstring/nodes.py | 117 ++++++++++++ html_tstring/nodes_test.py | 208 ++++++++++++++++++++++ uv.lock | 52 +++--- 7 files changed, 630 insertions(+), 599 deletions(-) delete mode 100644 html_tstring/element.py delete mode 100644 html_tstring/element_test.py create mode 100644 html_tstring/nodes.py create mode 100644 html_tstring/nodes_test.py diff --git a/html_tstring/element.py b/html_tstring/element.py deleted file mode 100644 index 9a46e55..0000000 --- a/html_tstring/element.py +++ /dev/null @@ -1,119 +0,0 @@ -import typing as t -from dataclasses import dataclass, field -from html import escape - -# See https://developer.mozilla.org/en-US/docs/Glossary/Void_element -VOID_ELEMENTS = frozenset( - [ - "area", - "base", - "br", - "col", - "embed", - "hr", - "img", - "input", - "link", - "meta", - "param", - "source", - "track", - "wbr", - ] -) - - -# TODO: I'm not yet happy with how significant whitespace is handled -# when pretty-printing. It's possible that __str__() should not use -# indentation at all? I need to think about this more. - - -@dataclass(frozen=True) -class Element: - """Represents an HTML element or fragment.""" - - tag: str = "" - attrs: t.Mapping[str, str | None] = field(default_factory=dict) - children: t.Sequence[Element | str] = field(default_factory=tuple) - - @property - def is_void(self) -> bool: - """Return True if the element is a void element.""" - return self.tag in VOID_ELEMENTS - - @property - def is_fragment(self) -> bool: - """Return True if the element is a fragment (i.e., has no tag).""" - return self.tag == "" - - @property - def has_children(self) -> bool: - """Return True if the element has children.""" - return bool(self.children) - - def __post_init__(self): - """Ensure all preconditions are met.""" - # Void elements cannot have children - if self.is_void and self.has_children: - raise ValueError(f"Void element <{self.tag}> cannot have children.") - - # Fragments cannot have attributes - if self.is_fragment and self.attrs: - raise ValueError("Fragment elements cannot have attributes.") - - def _render(self, *, indent: str, level: int) -> str: - """Internal method to render the element with indentation.""" - newline = "\n" if indent else "" - indent_str = indent * level - - attrs_str = "".join( - f" {key}" if value is None else f' {key}="{escape(value, quote=True)}"' - for key, value in self.attrs.items() - ) - - if self.is_fragment: - return newline.join( - child._render(indent=indent, level=level) - if isinstance(child, Element) - else f"{indent_str}{escape(child, quote=False)}" - for child in self.children - ) - - if self.is_void: - return f"{indent_str}<{self.tag}{attrs_str} />" - - if not self.has_children: - return f"{indent_str}<{self.tag}{attrs_str}>" - - children_str = newline.join( - child._render(indent=indent, level=level + 1) - if isinstance(child, Element) - else f"{indent_str}{indent}{escape(child, quote=False)}" - for child in self.children - ) - return f"{indent_str}<{self.tag}{attrs_str}>{newline}{children_str}{newline}{indent_str}" - - def render(self, *, indent: int = 0, level: int = 0) -> str: - """Render the element as a string with optional indentation.""" - return self._render(indent=" " * indent, level=level) - - def __html__(self) -> str: - """ - Return the HTML representation of the element. - - Useful for integration with templating engines that recognize the - __html__ dunder, like Django and Jinja2. - """ - return self.render() - - def __str__(self) -> str: - """Return a pretty-printed string representation for the element.""" - return self.render(indent=2) - - def append_child(self, child: "Element | str") -> "Element": - """Return a new Element with the given child appended.""" - return Element( - tag=self.tag, - attrs=self.attrs, - children=(*self.children, child), - ) diff --git a/html_tstring/element_test.py b/html_tstring/element_test.py deleted file mode 100644 index 6f57657..0000000 --- a/html_tstring/element_test.py +++ /dev/null @@ -1,236 +0,0 @@ -import pytest - -from .html import Element - - -def test_empty_fragment(): - fragment = Element("") - assert fragment.is_fragment - assert fragment.render() == "" - assert str(fragment) == "" - - -def test_fragment_with_attributes(): - with pytest.raises(ValueError): - _ = Element("", attrs={"id": "test"}) - - -def test_fragment_with_text(): - fragment = Element("", children=["test"]) - assert fragment.render() == "test" - assert str(fragment) == "test" - - -def test_fragment_with_children(): - fragment = Element("", children=[Element("div"), "text", Element("span")]) - assert fragment.render() == "
text" - assert str(fragment) == "
\ntext\n" - - -def test_element_with_fragment_with_children(): - div = Element( - "div", - children=[ - Element("", children=[Element("div", children=["wow"]), "inside fragment"]) - ], - ) - assert div.render() == "
wow
inside fragment
" - assert str(div) == "
\n
\n wow\n
\n inside fragment\n
" - - -def test_void_element(): - br = Element("br") - assert br.is_void - assert not br.is_fragment - assert not br.has_children - assert br.render() == "
" - assert str(br) == "
" - - -def test_void_element_with_attributes(): - br = Element("br", attrs={"class": "line-break", "hidden": None}) - assert br.render() == '' - assert str(br) == '' - - -def test_void_element_with_children(): - with pytest.raises(ValueError): - _ = Element("br", children=["should not be here"]) - - -def test_standard_element(): - div = Element("div") - assert not div.is_void - assert not div.is_fragment - assert not div.has_children - assert div.render() == "
" - assert str(div) == "
" - - -def test_standard_element_with_attributes(): - div = Element( - "div", - attrs={"id": "main", "data-role": "container", "hidden": None}, - ) - assert div.render() == '' - assert str(div) == '' - - -def test_standard_element_with_text_child(): - div = Element("div", children=["Hello, world!"]) - assert div.has_children - assert div.render() == "
Hello, world!
" - assert str(div) == "
\n Hello, world!\n
" - - -def test_standard_element_with_element_children(): - div = Element( - "div", - children=[ - Element("h1", children=["Title"]), - Element("p", children=["This is a paragraph."]), - ], - ) - assert div.has_children - assert div.render() == "

Title

This is a paragraph.

" - assert ( - str(div) == "
\n" - "

\n" - " Title\n" - "

\n" - "

\n" - " This is a paragraph.\n" - "

\n" - "
" - ) - - -def test_standard_element_with_mixed_children(): - div = Element( - "div", - children=[ - "Intro text.", - Element("h1", children=["Title"]), - "Some more text.", - Element("hr"), - Element("p", children=["This is a paragraph."]), - ], - ) - assert div.has_children - assert div.render() == ( - "
Intro text.

Title

Some more text.

This is a paragraph.

" - ) - assert ( - str(div) == "
\n" - " Intro text.\n" - "

\n" - " Title\n" - "

\n" - " Some more text.\n" - "
\n" - "

\n" - " This is a paragraph.\n" - "

\n" - "
" - ) - - -def test_complex_tree(): - html = Element( - "html", - children=[ - Element( - "head", - children=[ - Element("title", children=["Test Page"]), - Element("meta", attrs={"charset": "UTF-8"}), - ], - ), - Element( - "body", - attrs={"class": "main-body"}, - children=[ - Element("h1", children=["Welcome to the Test Page"]), - Element( - "p", - children=[ - "This is a sample paragraph with ", - Element("strong", children=["bold text"]), - " and ", - Element("em", children=["italic text"]), - ".", - ], - ), - Element("br"), - Element( - "ul", - children=[ - Element("li", children=["Item 1"]), - Element("li", children=["Item 2"]), - Element("li", children=["Item 3"]), - ], - ), - ], - ), - ], - ) - assert html.render() == ( - 'Test Page' - '

Welcome to the Test Page

' - "

This is a sample paragraph with bold text and " - "italic text.


" - ) - assert ( - str(html) == "\n" - " \n" - " \n" - " Test Page\n" - " \n" - ' \n' - " \n" - ' \n' - "

\n" - " Welcome to the Test Page\n" - "

\n" - "

\n" - " This is a sample paragraph with \n" - " \n" - " bold text\n" - " \n" - " and \n" - " \n" - " italic text\n" - " \n" - " .\n" - "

\n" - "
\n" - " \n" - " \n" - "" - ) - - -def test_dunder_html_method(): - div = Element("div", children=["Hello"]) - assert div.__html__() == div.render() - - -def test_escaping_of_text_content(): - div = Element("div", children=[""]) - assert div.render() == "
<script>alert('XSS')</script>
" - - -def test_escaping_of_attribute_values(): - div = Element("div", attrs={"class": '">XSS<'}) - assert div.render() == '
' diff --git a/html_tstring/html.py b/html_tstring/html.py index c9e642b..0769c70 100644 --- a/html_tstring/html.py +++ b/html_tstring/html.py @@ -2,13 +2,32 @@ from html.parser import HTMLParser from string.templatelib import Interpolation, Template -from .element import VOID_ELEMENTS, Element +from .nodes import ( + VOID_ELEMENTS, + Comment, + DocumentType, + Element, + Fragment, + HasHTMLDunder, + Node, + Text, +) # For performance, a mutable tuple is used while parsing. -type ElementTuple = tuple[str, dict[str, str | None], list["ElementTuple | str"]] -ELT_TAG = 0 -ELT_ATTRS = 1 -ELT_CHILDREN = 2 +KIND_FRAGMENT = 0 +KIND_ELEMENT = 1 +KIND_TEXT = 2 +KIND_COMMENT = 3 +KIND_DOCTYPE = 4 + +type NodeTuple = tuple[ + int, str, dict[str, str | None], list["NodeTuple"], str | HasHTMLDunder | None +] +NODE_KIND = 0 +NODE_TAG = 1 +NODE_ATTRS = 2 +NODE_CHILDREN = 3 +NODE_TEXT = 4 # TODO this is being put together super rapidly and so far it's a mess. @@ -148,44 +167,43 @@ def _process_attr_key(key: str, value: object) -> dict[str, str | None]: def _children( - children: list["ElementTuple | str"], bookkeep: dict[str, Interpolation] -) -> tuple[Element | str, ...]: + children: list[NodeTuple], bookkeep: dict[str, Interpolation] +) -> tuple[Node, ...]: """Substitute any bookkeeping keys in children.""" # TODO XXX: this satisfies the test cases but does not yet recurse. - result: list[Element | str] = [] + result: list[Node] = [] for child in children: if isinstance(child, str): if child in bookkeep: bk_value = _format_interpolation(bookkeep[child]) - if isinstance(bk_value, (Element, str)): + if isinstance(bk_value, (Element, Text)): result.append(bk_value) elif isinstance(bk_value, Template): result.append(html(bk_value)) elif isinstance(bk_value, (list, tuple)): # TODO XXX this should recurse for item in bk_value: - if isinstance(item, (Element, str)): + if isinstance(item, (Element, Text)): result.append(item) elif isinstance(item, Template): result.append(html(item)) elif item is False: pass else: - result.append(str(item)) + result.append(Text(str(item))) elif bk_value is False: pass else: # TODO: should I handle more types here? - result.append(str(bk_value)) + result.append(Text(str(bk_value))) + elif isinstance(child, Fragment): + result.extend(child.children) elif isinstance(child, Element): - if child.is_fragment: - result.extend(child.children) - else: - result.append(child) - else: result.append(child) + else: + result.append(Text(child)) else: - elements = list(_element_or_elements_from_tuple(child, bookkeep)) + elements = list(_node_or_nodes_from_tuple(child, bookkeep)) result.extend(elements) return tuple(result) @@ -194,8 +212,8 @@ def _resolve_tag( tag: str, bookkeep: dict[str, Interpolation], attrs: dict[str, str | None], - children: tuple[Element | str, ...], -) -> str | Element: + children: tuple[Node, ...], +) -> str | Node: if tag in bookkeep: bk_value = _format_interpolation(bookkeep[tag]) if isinstance(bk_value, str): @@ -215,41 +233,67 @@ def _resolve_tag( return tag -def _element_or_elements_from_tuple( - element: ElementTuple, bookkeep: dict[str, Interpolation] -) -> t.Iterable[Element | str]: - attrs = _attrs(element[ELT_ATTRS], bookkeep) - children = _children(element[ELT_CHILDREN], bookkeep) - tag_or_elt = _resolve_tag(element[ELT_TAG], bookkeep, attrs, children) - if isinstance(tag_or_elt, str): - yield Element(tag=tag_or_elt, attrs=attrs, children=children) - elif tag_or_elt.is_fragment: - yield from tag_or_elt.children +def _node_or_nodes_from_tuple( + node: NodeTuple, bookkeep: dict[str, Interpolation] +) -> t.Iterable[Node]: + if node[NODE_KIND] == KIND_TEXT: + text = node[NODE_TEXT] + assert text is not None + if text in bookkeep: + bk_value = _format_interpolation(bookkeep[str(text)].value) + yield Text(str(bk_value)) + else: + yield Text(text) + return + elif node[NODE_KIND] == KIND_COMMENT: + text = node[NODE_TEXT] + # TODO: XXX handle __html__ here? + assert isinstance(text, str) + yield Comment(text) + return + elif node[NODE_KIND] == KIND_DOCTYPE: + text = node[NODE_TEXT] + # TODO: XXX handle __html__ here? + assert isinstance(text, str) or text is None + yield DocumentType(text or "html") + return + elif node[NODE_KIND] not in (KIND_ELEMENT, KIND_FRAGMENT): + raise ValueError(f"Invalid node kind: {node[NODE_KIND]!r}") + attrs = _attrs(node[NODE_ATTRS], bookkeep) + children = _children(node[NODE_CHILDREN], bookkeep) + tag_or_node = _resolve_tag(node[NODE_TAG], bookkeep, attrs, children) + if isinstance(tag_or_node, str): + if tag_or_node == "": + # Fragment + yield Fragment(children=children) + else: + yield Element(tag=tag_or_node, attrs=attrs, children=children) + elif isinstance(tag_or_node, Fragment): + yield from tag_or_node.children else: - yield tag_or_elt + yield tag_or_node -def _element_from_tuple( - element: ElementTuple, bookkeep: dict[str, Interpolation] -) -> Element: - elements = list(_element_or_elements_from_tuple(element, bookkeep)) - if len(elements) == 1 and isinstance(elements[0], Element): - return elements[0] +def _node_from_tuple(node: NodeTuple, bookkeep: dict[str, Interpolation]) -> Node: + nodes = list(_node_or_nodes_from_tuple(node, bookkeep)) + print("HERE ARE THE NODES: ", nodes) + if len(nodes) == 1: + return nodes[0] else: - return Element(tag="", attrs={}, children=tuple(elements)) + return Fragment(children=tuple(nodes)) class ElementParser(HTMLParser): - stack: list[ElementTuple] + stack: list[NodeTuple] def __init__(self): super().__init__() - self.stack = [("", {}, [])] + self.stack = [(KIND_FRAGMENT, "", {}, [], None)] def handle_starttag( self, tag: str, attrs: t.Sequence[tuple[str, str | None]] ) -> None: - element = (tag, dict(attrs), []) + element = (KIND_ELEMENT, tag, dict(attrs), [], None) self.stack.append(element) # Unfortunately, Python's built-in HTMLParser has inconsistent behavior @@ -270,8 +314,8 @@ def handle_endtag(self, tag: str) -> None: # Special case to handle void elements that are not self-closed aka # cpython #69445. if tag in VOID_ELEMENTS: - children = self.stack[0][ELT_CHILDREN] - if isinstance(children[-1], tuple) and children[-1][ELT_TAG] == tag: + children = self.stack[0][NODE_CHILDREN] + if isinstance(children[-1], tuple) and children[-1][NODE_TAG] == tag: # The last child is the void element we just added. return raise ValueError( @@ -279,27 +323,37 @@ def handle_endtag(self, tag: str) -> None: ) element = self.stack.pop() - if element[ELT_TAG] != tag: + if element[NODE_TAG] != tag: raise ValueError( - f"Mismatched closing tag for <{element[ELT_TAG]}>." + f"Mismatched closing tag for <{element[NODE_TAG]}>." ) self.append_child(element) def handle_data(self, data: str) -> None: - self.append_child(data) + print("Handling data:", data) + text = (KIND_TEXT, "", {}, [], data) + self.append_child(text) - def append_child(self, child: "ElementTuple | str") -> None: - self.stack[-1][ELT_CHILDREN].append(child) + def append_child(self, child: NodeTuple) -> None: + self.stack[-1][NODE_CHILDREN].append(child) - def get_root(self) -> ElementTuple: + def get_root(self) -> NodeTuple: if len(self.stack) != 1: raise ValueError("Invalid HTML structure: unclosed tags remain.") root = self.stack[0] - if len(root[ELT_CHILDREN]) == 1 and isinstance(root[ELT_CHILDREN][0], tuple): - return t.cast(ElementTuple, root[ELT_CHILDREN][0]) + assert root[NODE_KIND] == KIND_FRAGMENT + print(root) + + if len(root[NODE_CHILDREN]) == 1: + print("Single root element detected.") + print(root) + return root[NODE_CHILDREN][0] + + if len(root[NODE_CHILDREN]) == 0: + return (KIND_TEXT, "", {}, [], "") return root @@ -364,15 +418,19 @@ def _format_interpolation(interp: Interpolation) -> object: return _format(interp.value, interp.format_spec, interp.conversion) -def html(template: Template) -> Element: +def html(template: Template) -> Node: """Create an HTML element from a string.""" + # TODO: pick a better prefix that is less likely to collide + _prefix = "ts-bk-" count: int = 0 callables: dict[t.Callable, str] = {} bookkeep: dict[str, Interpolation] = {} parser = ElementParser() + print("HERE I AM") for part in template: if isinstance(part, str): + print(f"FEEDING STRING: '{part}'") parser.feed(part) elif hasattr(part.value, "__html__"): # Parse the HTML, which is presumed safe @@ -396,4 +454,4 @@ def html(template: Template) -> Element: parser.feed(key) parser.close() root = parser.get_root() - return _element_from_tuple(root, bookkeep) + return _node_from_tuple(root, bookkeep) diff --git a/html_tstring/html_test.py b/html_tstring/html_test.py index d2b0b23..b691aea 100644 --- a/html_tstring/html_test.py +++ b/html_tstring/html_test.py @@ -2,8 +2,8 @@ import pytest -from .element import Element from .html import SafeHTML, clsx, html +from .nodes import Element, Fragment, Text # -------------------------------------------------------------------------- # clsx tests @@ -87,36 +87,34 @@ def test_clsx_kitchen_sink(): def test_parse_empty(): - element = html(t"") - assert element == Element() - assert element.render() == "" + node = html(t"") + assert node == Text("") + assert str(node) == "" def test_parse_text(): - element = html(t"Hello, world!") - assert element == Element("", {}, ("Hello, world!",)) - assert element.render() == "Hello, world!" + node = html(t"Hello, world!") + assert node == Text("Hello, world!") + assert str(node) == "Hello, world!" def test_parse_void_element(): - element = html(t"
") - assert element == Element("br") - assert element.render() == "
" + node = html(t"
") + assert node == Element("br") + assert str(node) == "
" def test_parse_void_element_self_closed(): - element = html(t"
") - assert element == Element("br") - assert element.render() == "
" + node = html(t"
") + assert node == Element("br") + assert str(node) == "
" def test_parse_chain_of_void_elements(): # Make sure our handling of CPython issue #69445 is reasonable. - element = html(t"



") - assert element == Element( - "", - {}, - ( + node = html(t"



") + assert node == Fragment( + children=( Element("br"), Element("hr"), Element("img", attrs={"src": "image.png"}), @@ -124,35 +122,35 @@ def test_parse_chain_of_void_elements(): Element("hr"), ), ) - assert element.render() == '



' + assert str(node) == '



' def test_parse_element_with_text(): - element = html(t"

Hello, world!

") - assert element == Element("p", children=("Hello, world!",)) - assert element.render() == "

Hello, world!

" + node = html(t"

Hello, world!

") + assert node == Element("p", children=(Text("Hello, world!"),)) + assert str(node) == "

Hello, world!

" def test_parse_element_with_attributes(): - element = html(t'Link') - assert element == Element( + node = html(t'Link') + assert node == Element( "a", attrs={"href": "https://example.com", "target": "_blank"}, - children=("Link",), + children=(Text("Link"),), ) - assert element.render() == 'Link' + assert str(node) == 'Link' def test_parse_nested_elements(): - element = html(t"

Hello

World

") - assert element == Element( + node = html(t"

Hello

World

") + assert node == Element( "div", children=( - Element("p", children=("Hello",)), - Element("p", children=("World",)), + Element("p", children=(Text("Hello"),)), + Element("p", children=(Text("World"),)), ), ) - assert element.render() == "

Hello

World

" + assert str(node) == "

Hello

World

" # -------------------------------------------------------------------------- @@ -162,16 +160,18 @@ def test_parse_nested_elements(): def text_interpolated_text_content(): name = "Alice" - element = html(t"

Hello, {name}!

") - assert element == Element("p", children=("Hello, ", "Alice", "!")) - assert element.render() == "

Hello, Alice!

" + node = html(t"

Hello, {name}!

") + assert node == Element("p", children=(Text("Hello, "), Text("Alice"), Text("!"))) + assert str(node) == "

Hello, Alice!

" def test_escaping_of_interpolated_text_content(): name = "" - element = html(t"

Hello, {name}!

") - assert element == Element("p", children=("Hello, ", "", "!")) - assert element.render() == "

Hello, <Alice & Bob>!

" + node = html(t"

Hello, {name}!

") + assert node == Element( + "p", children=(Text("Hello, "), Text(""), Text("!")) + ) + assert str(node) == "

Hello, <Alice & Bob>!

" class Convertible: @@ -186,14 +186,13 @@ def test_conversions(): c = Convertible() assert f"{c!s}" == "string" assert f"{c!r}" == "repr" - element = html(t"
  • {c!s}
  • {c!r}
  • {'😊'!a}
  • ") - print(element.render()) - assert element == Element( + node = html(t"
  • {c!s}
  • {c!r}
  • {'😊'!a}
  • ") + assert node == Element( "", children=( - Element("li", children=("string",)), - Element("li", children=("repr",)), - Element("li", children=("'\\U0001f60a'",)), + Element("li", children=(Text("string"),)), + Element("li", children=(Text("repr"),)), + Element("li", children=(Text("'\\U0001f60a'"),)), ), ) @@ -205,11 +204,11 @@ def test_conversions(): def test_raw_html_injection_with_helper(): raw_content = SafeHTML("I am bold") - element = html(t"
    {raw_content}
    ") - assert element == Element( - "div", children=(Element("strong", children=("I am bold",)),) + node = html(t"
    {raw_content}
    ") + assert node == Element( + "div", children=(Element("strong", children=(Text("I am bold"),)),) ) - assert element.render() == "
    I am bold
    " + assert str(node) == "
    I am bold
    " def test_raw_html_injection_with_dunder_html_protocol(): @@ -222,20 +221,26 @@ def __html__(self): return f"{self._text}" content = SafeContent("emphasized") - element = html(t"

    Here is some {content}.

    ") - assert element == Element( - "p", children=("Here is some ", Element("em", children=("emphasized",)), ".") + node = html(t"

    Here is some {content}.

    ") + assert node == Element( + "p", children=(Text("Here is some "), Text(content), Text(".")) ) - assert element.render() == "

    Here is some emphasized.

    " + assert str(node) == "

    Here is some emphasized.

    " def test_raw_html_injection_with_format_spec(): raw_content = "underlined" - element = html(t"

    This is {raw_content:safe} text.

    ") - assert element == Element( - "p", children=("This is ", Element("u", children=("underlined",)), " text.") + node = html(t"

    This is {raw_content:safe} text.

    ") + # TODO XXX: this is wrong; raw_content should be wrapped in Text + assert node == Element( + "p", + children=( + Text("This is "), + Element("u", children=(Text("underlined"),)), + Text(" text."), + ), ) - assert element.render() == "

    This is underlined text.

    " + assert str(node) == "

    This is underlined text.

    " # -------------------------------------------------------------------------- @@ -247,33 +252,35 @@ def test_conditional_rendering_with_if_else(): is_logged_in = True user_profile = t"Welcome, User!" login_prompt = t"Please log in" - element = html(t"
    {user_profile if is_logged_in else login_prompt}
    ") + node = html(t"
    {user_profile if is_logged_in else login_prompt}
    ") - assert element == Element( - "div", children=(Element("span", children=("Welcome, User!",)),) + assert node == Element( + "div", children=(Element("span", children=(Text("Welcome, User!"),)),) ) - assert element.render() == "
    Welcome, User!
    " + assert str(node) == "
    Welcome, User!
    " is_logged_in = False - element = html(t"
    {user_profile if is_logged_in else login_prompt}
    ") - assert element.render() == '
    Please log in
    ' + node = html(t"
    {user_profile if is_logged_in else login_prompt}
    ") + assert str(node) == '
    Please log in
    ' def test_conditional_rendering_with_and(): show_warning = True warning_message = t'
    Warning!
    ' - element = html(t"
    {show_warning and warning_message}
    ") + node = html(t"
    {show_warning and warning_message}
    ") - assert element == Element( + assert node == Element( "main", - children=(Element("div", attrs={"class": "warning"}, children=("Warning!",)),), + children=( + Element("div", attrs={"class": "warning"}, children=(Text("Warning!"),)), + ), ) - assert element.render() == '
    Warning!
    ' + assert str(node) == '
    Warning!
    ' show_warning = False - element = html(t"
    {show_warning and warning_message}
    ") + node = html(t"
    {show_warning and warning_message}
    ") # Assuming False renders nothing - assert element.render() == "
    " + assert str(node) == "
    " # -------------------------------------------------------------------------- @@ -283,36 +290,39 @@ def test_conditional_rendering_with_and(): def test_interpolated_template_content(): child = t"Child" - element = html(t"
    {child}
    ") - assert element == Element("div", children=(html(child),)) - assert element.render() == "
    Child
    " + node = html(t"
    {child}
    ") + assert node == Element("div", children=(html(child),)) + assert str(node) == "
    Child
    " def test_interpolated_element_content(): child = html(t"Child") - element = html(t"
    {child}
    ") - assert element == Element("div", children=(child,)) - assert element.render() == "
    Child
    " + node = html(t"
    {child}
    ") + assert node == Element("div", children=(child,)) + assert str(node) == "
    Child
    " def test_interpolated_nonstring_content(): number = 42 - element = html(t"

    The answer is {number}.

    ") - assert element == Element("p", children=("The answer is ", "42", ".")) - assert element.render() == "

    The answer is 42.

    " + node = html(t"

    The answer is {number}.

    ") + assert node == Element( + "p", children=(Text("The answer is "), Text("42"), Text(".")) + ) + assert str(node) == "

    The answer is 42.

    " def test_list_items(): items = ["Apple", "Banana", "Cherry"] - element = html(t"
      {[t'
    • {item}
    • ' for item in items]}
    ") - assert element.tag == "ul" - assert len(element.attrs) == 0 - assert element.children == ( - Element("li", children=("Apple",)), - Element("li", children=("Banana",)), - Element("li", children=("Cherry",)), + node = html(t"
      {[t'
    • {item}
    • ' for item in items]}
    ") + assert node == Element( + "ul", + children=( + Element("li", children=(Text("Apple"),)), + Element("li", children=(Text("Banana"),)), + Element("li", children=(Text("Cherry"),)), + ), ) - assert element.render() == "
    • Apple
    • Banana
    • Cherry
    " + assert str(node) == "
    • Apple
    • Banana
    • Cherry
    " def test_nested_list_items(): @@ -321,20 +331,20 @@ def test_nested_list_items(): inner = ["apple", "banana", "cherry"] inner_items = [t"
  • {item}
  • " for item in inner] outer_items = [t"
  • {category}
      {inner_items}
  • " for category in outer] - element = html(t"
      {outer_items}
    ") - assert element == Element( + node = html(t"
      {outer_items}
    ") + assert node == Element( "ul", children=( Element( "li", children=( - "fruit", + Text("fruit"), Element( "ul", children=( - Element("li", children=("apple",)), - Element("li", children=("banana",)), - Element("li", children=("cherry",)), + Element("li", children=(Text("apple"),)), + Element("li", children=(Text("banana"),)), + Element("li", children=(Text("cherry"),)), ), ), ), @@ -342,13 +352,13 @@ def test_nested_list_items(): Element( "li", children=( - "more fruit", + Text("more fruit"), Element( "ul", children=( - Element("li", children=("apple",)), - Element("li", children=("banana",)), - Element("li", children=("cherry",)), + Element("li", children=(Text("apple"),)), + Element("li", children=(Text("banana"),)), + Element("li", children=(Text("cherry"),)), ), ), ), @@ -356,7 +366,7 @@ def test_nested_list_items(): ), ) assert ( - element.render() + str(node) == "
    • fruit
      • apple
      • banana
      • cherry
    • more fruit
      • apple
      • banana
      • cherry
    " ) @@ -368,73 +378,73 @@ def test_nested_list_items(): def test_interpolated_attribute_value(): url = "https://example.com/" - element = html(t'Link') - assert element == Element( - "a", attrs={"href": "https://example.com/"}, children=("Link",) + node = html(t'Link') + assert node == Element( + "a", attrs={"href": "https://example.com/"}, children=(Text("Link"),) ) - assert element.render() == 'Link' + assert str(node) == 'Link' def test_escaping_of_interpolated_attribute_value(): url = 'https://example.com/?q="test"&lang=en' - element = html(t'Link') - assert element == Element( + node = html(t'Link') + assert node == Element( "a", attrs={"href": 'https://example.com/?q="test"&lang=en'}, - children=("Link",), + children=(Text("Link"),), ) assert ( - element.render() + str(node) == 'Link' ) def test_interpolated_unquoted_attribute_value(): id = "roquefort" - element = html(t"
    Cheese
    ") - assert element == Element("div", attrs={"id": "roquefort"}, children=("Cheese",)) - assert element.render() == '
    Cheese
    ' + node = html(t"
    Cheese
    ") + assert node == Element("div", attrs={"id": "roquefort"}, children=(Text("Cheese"),)) + assert str(node) == '
    Cheese
    ' def test_interpolated_attribute_value_true(): disabled = True - element = html(t"") - assert element == Element( - "button", attrs={"disabled": None}, children=("Click me",) + node = html(t"") + assert node == Element( + "button", attrs={"disabled": None}, children=(Text("Click me"),) ) - assert element.render() == "" + assert str(node) == "" def test_interpolated_attribute_value_falsy(): disabled = False crumpled = None - element = html(t"") - assert element == Element("button", attrs={}, children=("Click me",)) - assert element.render() == "" + node = html(t"") + assert node == Element("button", attrs={}, children=(Text("Click me"),)) + assert str(node) == "" def test_interpolated_attribute_spread_dict(): attrs = {"href": "https://example.com/", "target": "_blank"} - element = html(t"Link") - assert element == Element( + node = html(t"Link") + assert node == Element( "a", attrs={"href": "https://example.com/", "target": "_blank"}, - children=("Link",), + children=(Text("Link"),), ) - assert element.render() == 'Link' + assert str(node) == 'Link' def test_interpolated_mixed_attribute_values_and_spread_dict(): attrs = {"href": "https://example.com/", "id": "link1"} target = "_blank" - element = html(t'Link') - assert element == Element( + node = html(t'Link') + assert node == Element( "a", attrs={"href": "https://example.com/", "id": "link1", "target": "_blank"}, - children=("Link",), + children=(Text("Link"),), ) assert ( - element.render() + str(node) == 'Link' ) @@ -442,77 +452,72 @@ def test_interpolated_mixed_attribute_values_and_spread_dict(): def test_multiple_attribute_spread_dicts(): attrs1 = {"href": "https://example.com/", "id": "overwrtten"} attrs2 = {"target": "_blank", "id": "link1"} - element = html(t"Link") - assert element == Element( + node = html(t"Link") + assert node == Element( "a", attrs={"href": "https://example.com/", "id": "link1", "target": "_blank"}, - children=("Link",), + children=(Text("Link"),), ) assert ( - element.render() + str(node) == 'Link' ) def test_interpolated_class_attribute(): classes = ["btn", "btn-primary", False and "disabled", None, {"active": True}] - element = html(t'') - assert element == Element( - "button", attrs={"class": "btn btn-primary active"}, children=("Click me",) - ) - assert ( - element.render() == '' + node = html(t'') + assert node == Element( + "button", + attrs={"class": "btn btn-primary active"}, + children=(Text("Click me"),), ) + assert str(node) == '' def test_interpolated_attribute_spread_with_class_attribute(): attrs = {"id": "button1", "class": ["btn", "btn-primary"]} - element = html(t"") - assert element == Element( + node = html(t"") + assert node == Element( "button", attrs={"id": "button1", "class": "btn btn-primary"}, - children=("Click me",), - ) - assert ( - element.render() - == '' + children=(Text("Click me"),), ) + assert str(node) == '' def test_interpolated_data_attributes(): data = {"user-id": 123, "role": "admin"} - element = html(t"
    User Info
    ") - assert element == Element( + node = html(t"
    User Info
    ") + assert node == Element( "div", attrs={"data-user-id": "123", "data-role": "admin"}, - children=("User Info",), - ) - assert ( - element.render() == '
    User Info
    ' + children=(Text("User Info"),), ) + assert str(node) == '
    User Info
    ' def test_interpolated_aria_attributes(): aria = {"label": "Close", "hidden": True} - element = html(t"") - assert element == Element( - "button", attrs={"aria-label": "Close", "aria-hidden": "True"}, children=("X",) - ) - assert ( - element.render() == '' + node = html(t"") + assert node == Element( + "button", + attrs={"aria-label": "Close", "aria-hidden": "True"}, + children=(Text("X"),), ) + assert str(node) == '' def test_interpolated_style_attribute(): styles = {"color": "red", "font-weight": "bold", "font-size": "16px"} - element = html(t"

    Warning!

    ") - assert element == Element( + node = html(t"

    Warning!

    ") + assert node == Element( "p", attrs={"style": "color: red; font-weight: bold; font-size: 16px"}, - children=("Warning!",), + children=(Text("Warning!"),), ) assert ( - element.render() + str(node) == '

    Warning!

    ' ) @@ -534,10 +539,10 @@ def TemplateComponent( def test_interpolated_template_component(): - element = html( + node = html( t'<{TemplateComponent} first=1 second={99} third="comp1" class="my-comp">Hello, Component!' ) - assert element == Element( + assert node == Element( "div", attrs={ "id": "comp1", @@ -545,10 +550,10 @@ def test_interpolated_template_component(): "data-second": "99", "class": "my-comp", }, - children=("Component: ", "Hello, Component!"), + children=(Text("Component: "), Text("Hello, Component!")), ) assert ( - element.render() + str(node) == '
    Component: Hello, Component!
    ' ) @@ -565,19 +570,17 @@ def ColumnsComponent() -> Template: def test_fragment_from_component(): # This test assumes that if a component returns a template that parses # into multiple root elements, they are treated as a fragment. - element = html(t"<{ColumnsComponent} />
    ") - assert element == Element( + node = html(t"<{ColumnsComponent} />
    ") + assert node == Element( "table", children=( Element( "tr", children=( - Element("td", children=("Column 1",)), - Element("td", children=("Column 2",)), + Element("td", children=(Text("Column 1"),)), + Element("td", children=(Text("Column 2"),)), ), ), ), ) - assert ( - element.render() == "
    Column 1Column 2
    " - ) + assert str(node) == "
    Column 1Column 2
    " diff --git a/html_tstring/nodes.py b/html_tstring/nodes.py new file mode 100644 index 0000000..5571af1 --- /dev/null +++ b/html_tstring/nodes.py @@ -0,0 +1,117 @@ +import typing as t +from dataclasses import dataclass, field +from functools import cached_property +from html import escape + +# See https://developer.mozilla.org/en-US/docs/Glossary/Void_element +VOID_ELEMENTS = frozenset( + [ + "area", + "base", + "br", + "col", + "embed", + "hr", + "img", + "input", + "link", + "meta", + "param", + "source", + "track", + "wbr", + ] +) + + +# TODO: add a pretty-printer for nodes for debugging +# TODO: consider how significant whitespace is handled from t-string to nodes + + +@t.runtime_checkable +class HasHTMLDunder(t.Protocol): + def __html__(self) -> str: ... + + +type HTMLDunder = t.Callable[[], str] + + +@dataclass(frozen=True) +class Node(HasHTMLDunder): + def __html__(self) -> str: + """Return the HTML representation of the node.""" + # By default, just return the string representation + return str(self) + + +@dataclass(frozen=True) +class Text(Node): + # Django's `SafeString` and Markupsafe/Jinja2's `Markup` both inherit + # from `str`, but that is not a requirement for the `__html__` dunder. + text: str | HasHTMLDunder + + @cached_property + def _cached_str(self) -> str: + if isinstance(self.text, HasHTMLDunder): + return self.text.__html__() + return escape(self.text, quote=False) + + def __str__(self) -> str: + return self._cached_str + + +@dataclass(frozen=True) +class Fragment(Node): + children: t.Sequence[Node] = field(default_factory=tuple) + + def __str__(self) -> str: + return "".join(str(child) for child in self.children) + + +@dataclass(frozen=True) +class Comment(Node): + text: str + + def __str__(self) -> str: + return f"" + + +@dataclass(frozen=True) +class DocumentType(Node): + text: str = "html" + + def __str__(self) -> str: + return f"" + + +@dataclass(frozen=True) +class Element(Node): + tag: str + attrs: t.Mapping[str, str | None] = field(default_factory=dict) + children: t.Sequence[Node] = field(default_factory=tuple) + + def __post_init__(self): + """Ensure all preconditions are met.""" + if not self.tag: + raise ValueError("Element tag cannot be empty.") + + # Void elements cannot have children + if self.is_void and self.children: + raise ValueError(f"Void element <{self.tag}> cannot have children.") + + @property + def is_void(self) -> bool: + return self.tag in VOID_ELEMENTS + + def __str__(self) -> str: + # TODO: CONSIDER: should values in attrs support the __html__ dunder? + attrs_str = "".join( + f" {key}" if value is None else f' {key}="{escape(value, quote=True)}"' + for key, value in self.attrs.items() + ) + if self.is_void: + return f"<{self.tag}{attrs_str} />" + if not self.children: + return f"<{self.tag}{attrs_str}>" + children_str = "".join(str(child) for child in self.children) + return f"<{self.tag}{attrs_str}>{children_str}" diff --git a/html_tstring/nodes_test.py b/html_tstring/nodes_test.py new file mode 100644 index 0000000..4f04778 --- /dev/null +++ b/html_tstring/nodes_test.py @@ -0,0 +1,208 @@ +import pytest + +from .nodes import Comment, DocumentType, Element, Fragment, Text + + +def test_comment(): + comment = Comment("This is a comment") + assert str(comment) == "" + + +def test_comment_empty(): + comment = Comment("") + assert str(comment) == "" + + +def test_comment_special_chars(): + comment = Comment("Special chars: <>&\"'") + assert str(comment) == "" + + +def test_doctype_default(): + doctype = DocumentType() + assert str(doctype) == "" + + +def test_doctype_custom(): + doctype = DocumentType("xml") + assert str(doctype) == "" + + +def test_text(): + text = Text("Hello, world!") + assert str(text) == "Hello, world!" + + +def test_text_escaping(): + text = Text("") + assert str(text) == "<script>alert('XSS')</script>" + + +def test_text_safe(): + class CustomHTML(str): + def __html__(self) -> str: + return "Bold Text" + + text = Text(CustomHTML()) + assert str(text) == "Bold Text" + + +def test_fragment_empty(): + fragment = Fragment() + assert str(fragment) == "" + + +def test_fragment_with_text(): + fragment = Fragment(children=[Text("test")]) + assert str(fragment) == "test" + + +def test_fragment_with_multiple_texts(): + fragment = Fragment(children=[Text("Hello"), Text(" "), Text("World")]) + assert str(fragment) == "Hello World" + + +def test_element_no_children(): + div = Element("div") + assert not div.is_void + assert str(div) == "
    " + + +def test_void_element_no_children(): + br = Element("br") + assert br.is_void + assert str(br) == "
    " + + +def test_void_element_with_attributes(): + br = Element("br", attrs={"class": "line-break", "hidden": None}) + assert str(br) == '' + + +def test_void_element_with_children(): + with pytest.raises(ValueError): + _ = Element("br", children=[Text("should not be here")]) + + +def test_standard_element_with_attributes(): + div = Element( + "div", + attrs={"id": "main", "data-role": "container", "hidden": None}, + ) + assert str(div) == '' + + +def test_standard_element_with_text_child(): + div = Element("div", children=[Text("Hello, world!")]) + assert str(div) == "
    Hello, world!
    " + + +def test_standard_element_with_element_children(): + div = Element( + "div", + children=[ + Element("h1", children=[Text("Title")]), + Element("p", children=[Text("This is a paragraph.")]), + ], + ) + assert str(div) == "

    Title

    This is a paragraph.

    " + + +def test_element_with_fragment_with_children(): + div = Element( + "div", + children=[ + Fragment( + children=[ + Element("div", children=[Text("wow")]), + Text("inside fragment"), + ] + ) + ], + ) + assert str(div) == "
    wow
    inside fragment
    " + + +def test_standard_element_with_mixed_children(): + div = Element( + "div", + children=[ + Text("Intro text."), + Element("h1", children=[Text("Title")]), + Text("Some more text."), + Element("hr"), + Element("p", children=[Text("This is a paragraph.")]), + ], + ) + assert str(div) == ( + "
    Intro text.

    Title

    Some more text.

    This is a paragraph.

    " + ) + + +def test_complex_tree(): + html = Fragment( + children=[ + DocumentType(), + Element( + "html", + children=[ + Element( + "head", + children=[ + Element("title", children=[Text("Test Page")]), + Element("meta", attrs={"charset": "UTF-8"}), + ], + ), + Element( + "body", + attrs={"class": "main-body"}, + children=[ + Element("h1", children=[Text("Welcome to the Test Page")]), + Element( + "p", + children=[ + Text("This is a sample paragraph with "), + Element("strong", children=[Text("bold text")]), + Text(" and "), + Element("em", children=[Text("italic text")]), + Text("."), + ], + ), + Element("br"), + Element( + "ul", + children=[ + Element("li", children=[Text("Item 1")]), + Element("li", children=[Text("Item 2")]), + Element("li", children=[Text("Item 3")]), + ], + ), + ], + ), + ], + ), + ] + ) + assert str(html) == ( + "Test Page" + '' + "

    Welcome to the Test Page

    " + "

    This is a sample paragraph with bold text and " + "italic text.


    • Item 1
    • Item 2
    • " + "
    • Item 3
    " + ) + + +def test_dunder_html_method(): + div = Element("div", children=[Text("Hello")]) + assert div.__html__() == str(div) + + +def test_escaping_of_text_content(): + div = Element("div", children=[Text("")]) + assert str(div) == "
    <script>alert('XSS')</script>
    " + + +def test_escaping_of_attribute_values(): + div = Element("div", attrs={"class": '">XSS<'}) + assert str(div) == '
    ' diff --git a/uv.lock b/uv.lock index b620fe9..049ed6b 100644 --- a/uv.lock +++ b/uv.lock @@ -81,20 +81,20 @@ wheels = [ [[package]] name = "pyright" -version = "1.1.404" +version = "1.1.405" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nodeenv" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e2/6e/026be64c43af681d5632722acd100b06d3d39f383ec382ff50a71a6d5bce/pyright-1.1.404.tar.gz", hash = "sha256:455e881a558ca6be9ecca0b30ce08aa78343ecc031d37a198ffa9a7a1abeb63e", size = 4065679, upload-time = "2025-08-20T18:46:14.029Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fb/6c/ba4bbee22e76af700ea593a1d8701e3225080956753bee9750dcc25e2649/pyright-1.1.405.tar.gz", hash = "sha256:5c2a30e1037af27eb463a1cc0b9f6d65fec48478ccf092c1ac28385a15c55763", size = 4068319, upload-time = "2025-09-04T03:37:06.776Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/84/30/89aa7f7d7a875bbb9a577d4b1dc5a3e404e3d2ae2657354808e905e358e0/pyright-1.1.404-py3-none-any.whl", hash = "sha256:c7b7ff1fdb7219c643079e4c3e7d4125f0dafcc19d253b47e898d130ea426419", size = 5902951, upload-time = "2025-08-20T18:46:12.096Z" }, + { url = "https://files.pythonhosted.org/packages/d5/1a/524f832e1ff1962a22a1accc775ca7b143ba2e9f5924bb6749dce566784a/pyright-1.1.405-py3-none-any.whl", hash = "sha256:a2cb13700b5508ce8e5d4546034cb7ea4aedb60215c6c33f56cec7f53996035a", size = 5905038, upload-time = "2025-09-04T03:37:04.913Z" }, ] [[package]] name = "pytest" -version = "8.4.1" +version = "8.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -103,9 +103,9 @@ dependencies = [ { name = "pluggy" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, ] [[package]] @@ -122,28 +122,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.12.11" +version = "0.12.12" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/de/55/16ab6a7d88d93001e1ae4c34cbdcfb376652d761799459ff27c1dc20f6fa/ruff-0.12.11.tar.gz", hash = "sha256:c6b09ae8426a65bbee5425b9d0b82796dbb07cb1af045743c79bfb163001165d", size = 5347103, upload-time = "2025-08-28T13:59:08.87Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a8/f0/e0965dd709b8cabe6356811c0ee8c096806bb57d20b5019eb4e48a117410/ruff-0.12.12.tar.gz", hash = "sha256:b86cd3415dbe31b3b46a71c598f4c4b2f550346d1ccf6326b347cc0c8fd063d6", size = 5359915, upload-time = "2025-09-04T16:50:18.273Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d6/a2/3b3573e474de39a7a475f3fbaf36a25600bfeb238e1a90392799163b64a0/ruff-0.12.11-py3-none-linux_armv6l.whl", hash = "sha256:93fce71e1cac3a8bf9200e63a38ac5c078f3b6baebffb74ba5274fb2ab276065", size = 11979885, upload-time = "2025-08-28T13:58:26.654Z" }, - { url = "https://files.pythonhosted.org/packages/76/e4/235ad6d1785a2012d3ded2350fd9bc5c5af8c6f56820e696b0118dfe7d24/ruff-0.12.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b8e33ac7b28c772440afa80cebb972ffd823621ded90404f29e5ab6d1e2d4b93", size = 12742364, upload-time = "2025-08-28T13:58:30.256Z" }, - { url = "https://files.pythonhosted.org/packages/2c/0d/15b72c5fe6b1e402a543aa9d8960e0a7e19dfb079f5b0b424db48b7febab/ruff-0.12.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d69fb9d4937aa19adb2e9f058bc4fbfe986c2040acb1a4a9747734834eaa0bfd", size = 11920111, upload-time = "2025-08-28T13:58:33.677Z" }, - { url = "https://files.pythonhosted.org/packages/3e/c0/f66339d7893798ad3e17fa5a1e587d6fd9806f7c1c062b63f8b09dda6702/ruff-0.12.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:411954eca8464595077a93e580e2918d0a01a19317af0a72132283e28ae21bee", size = 12160060, upload-time = "2025-08-28T13:58:35.74Z" }, - { url = "https://files.pythonhosted.org/packages/03/69/9870368326db26f20c946205fb2d0008988aea552dbaec35fbacbb46efaa/ruff-0.12.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a2c0a2e1a450f387bf2c6237c727dd22191ae8c00e448e0672d624b2bbd7fb0", size = 11799848, upload-time = "2025-08-28T13:58:38.051Z" }, - { url = "https://files.pythonhosted.org/packages/25/8c/dd2c7f990e9b3a8a55eee09d4e675027d31727ce33cdb29eab32d025bdc9/ruff-0.12.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ca4c3a7f937725fd2413c0e884b5248a19369ab9bdd850b5781348ba283f644", size = 13536288, upload-time = "2025-08-28T13:58:40.046Z" }, - { url = "https://files.pythonhosted.org/packages/7a/30/d5496fa09aba59b5e01ea76775a4c8897b13055884f56f1c35a4194c2297/ruff-0.12.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4d1df0098124006f6a66ecf3581a7f7e754c4df7644b2e6704cd7ca80ff95211", size = 14490633, upload-time = "2025-08-28T13:58:42.285Z" }, - { url = "https://files.pythonhosted.org/packages/9b/2f/81f998180ad53445d403c386549d6946d0748e536d58fce5b5e173511183/ruff-0.12.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a8dd5f230efc99a24ace3b77e3555d3fbc0343aeed3fc84c8d89e75ab2ff793", size = 13888430, upload-time = "2025-08-28T13:58:44.641Z" }, - { url = "https://files.pythonhosted.org/packages/87/71/23a0d1d5892a377478c61dbbcffe82a3476b050f38b5162171942a029ef3/ruff-0.12.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4dc75533039d0ed04cd33fb8ca9ac9620b99672fe7ff1533b6402206901c34ee", size = 12913133, upload-time = "2025-08-28T13:58:47.039Z" }, - { url = "https://files.pythonhosted.org/packages/80/22/3c6cef96627f89b344c933781ed38329bfb87737aa438f15da95907cbfd5/ruff-0.12.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fc58f9266d62c6eccc75261a665f26b4ef64840887fc6cbc552ce5b29f96cc8", size = 13169082, upload-time = "2025-08-28T13:58:49.157Z" }, - { url = "https://files.pythonhosted.org/packages/05/b5/68b3ff96160d8b49e8dd10785ff3186be18fd650d356036a3770386e6c7f/ruff-0.12.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:5a0113bd6eafd545146440225fe60b4e9489f59eb5f5f107acd715ba5f0b3d2f", size = 13139490, upload-time = "2025-08-28T13:58:51.593Z" }, - { url = "https://files.pythonhosted.org/packages/59/b9/050a3278ecd558f74f7ee016fbdf10591d50119df8d5f5da45a22c6afafc/ruff-0.12.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0d737b4059d66295c3ea5720e6efc152623bb83fde5444209b69cd33a53e2000", size = 11958928, upload-time = "2025-08-28T13:58:53.943Z" }, - { url = "https://files.pythonhosted.org/packages/f9/bc/93be37347db854806904a43b0493af8d6873472dfb4b4b8cbb27786eb651/ruff-0.12.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:916fc5defee32dbc1fc1650b576a8fed68f5e8256e2180d4d9855aea43d6aab2", size = 11764513, upload-time = "2025-08-28T13:58:55.976Z" }, - { url = "https://files.pythonhosted.org/packages/7a/a1/1471751e2015a81fd8e166cd311456c11df74c7e8769d4aabfbc7584c7ac/ruff-0.12.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c984f07d7adb42d3ded5be894fb4007f30f82c87559438b4879fe7aa08c62b39", size = 12745154, upload-time = "2025-08-28T13:58:58.16Z" }, - { url = "https://files.pythonhosted.org/packages/68/ab/2542b14890d0f4872dd81b7b2a6aed3ac1786fae1ce9b17e11e6df9e31e3/ruff-0.12.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e07fbb89f2e9249f219d88331c833860489b49cdf4b032b8e4432e9b13e8a4b9", size = 13227653, upload-time = "2025-08-28T13:59:00.276Z" }, - { url = "https://files.pythonhosted.org/packages/22/16/2fbfc61047dbfd009c58a28369a693a1484ad15441723be1cd7fe69bb679/ruff-0.12.11-py3-none-win32.whl", hash = "sha256:c792e8f597c9c756e9bcd4d87cf407a00b60af77078c96f7b6366ea2ce9ba9d3", size = 11944270, upload-time = "2025-08-28T13:59:02.347Z" }, - { url = "https://files.pythonhosted.org/packages/08/a5/34276984705bfe069cd383101c45077ee029c3fe3b28225bf67aa35f0647/ruff-0.12.11-py3-none-win_amd64.whl", hash = "sha256:a3283325960307915b6deb3576b96919ee89432ebd9c48771ca12ee8afe4a0fd", size = 13046600, upload-time = "2025-08-28T13:59:04.751Z" }, - { url = "https://files.pythonhosted.org/packages/84/a8/001d4a7c2b37623a3fd7463208267fb906df40ff31db496157549cfd6e72/ruff-0.12.11-py3-none-win_arm64.whl", hash = "sha256:bae4d6e6a2676f8fb0f98b74594a048bae1b944aab17e9f5d504062303c6dbea", size = 12135290, upload-time = "2025-08-28T13:59:06.933Z" }, + { url = "https://files.pythonhosted.org/packages/09/79/8d3d687224d88367b51c7974cec1040c4b015772bfbeffac95face14c04a/ruff-0.12.12-py3-none-linux_armv6l.whl", hash = "sha256:de1c4b916d98ab289818e55ce481e2cacfaad7710b01d1f990c497edf217dafc", size = 12116602, upload-time = "2025-09-04T16:49:18.892Z" }, + { url = "https://files.pythonhosted.org/packages/c3/c3/6e599657fe192462f94861a09aae935b869aea8a1da07f47d6eae471397c/ruff-0.12.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7acd6045e87fac75a0b0cdedacf9ab3e1ad9d929d149785903cff9bb69ad9727", size = 12868393, upload-time = "2025-09-04T16:49:23.043Z" }, + { url = "https://files.pythonhosted.org/packages/e8/d2/9e3e40d399abc95336b1843f52fc0daaceb672d0e3c9290a28ff1a96f79d/ruff-0.12.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:abf4073688d7d6da16611f2f126be86523a8ec4343d15d276c614bda8ec44edb", size = 12036967, upload-time = "2025-09-04T16:49:26.04Z" }, + { url = "https://files.pythonhosted.org/packages/e9/03/6816b2ed08836be272e87107d905f0908be5b4a40c14bfc91043e76631b8/ruff-0.12.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:968e77094b1d7a576992ac078557d1439df678a34c6fe02fd979f973af167577", size = 12276038, upload-time = "2025-09-04T16:49:29.056Z" }, + { url = "https://files.pythonhosted.org/packages/9f/d5/707b92a61310edf358a389477eabd8af68f375c0ef858194be97ca5b6069/ruff-0.12.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42a67d16e5b1ffc6d21c5f67851e0e769517fb57a8ebad1d0781b30888aa704e", size = 11901110, upload-time = "2025-09-04T16:49:32.07Z" }, + { url = "https://files.pythonhosted.org/packages/9d/3d/f8b1038f4b9822e26ec3d5b49cf2bc313e3c1564cceb4c1a42820bf74853/ruff-0.12.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b216ec0a0674e4b1214dcc998a5088e54eaf39417327b19ffefba1c4a1e4971e", size = 13668352, upload-time = "2025-09-04T16:49:35.148Z" }, + { url = "https://files.pythonhosted.org/packages/98/0e/91421368ae6c4f3765dd41a150f760c5f725516028a6be30e58255e3c668/ruff-0.12.12-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:59f909c0fdd8f1dcdbfed0b9569b8bf428cf144bec87d9de298dcd4723f5bee8", size = 14638365, upload-time = "2025-09-04T16:49:38.892Z" }, + { url = "https://files.pythonhosted.org/packages/74/5d/88f3f06a142f58ecc8ecb0c2fe0b82343e2a2b04dcd098809f717cf74b6c/ruff-0.12.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ac93d87047e765336f0c18eacad51dad0c1c33c9df7484c40f98e1d773876f5", size = 14060812, upload-time = "2025-09-04T16:49:42.732Z" }, + { url = "https://files.pythonhosted.org/packages/13/fc/8962e7ddd2e81863d5c92400820f650b86f97ff919c59836fbc4c1a6d84c/ruff-0.12.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:01543c137fd3650d322922e8b14cc133b8ea734617c4891c5a9fccf4bfc9aa92", size = 13050208, upload-time = "2025-09-04T16:49:46.434Z" }, + { url = "https://files.pythonhosted.org/packages/53/06/8deb52d48a9a624fd37390555d9589e719eac568c020b27e96eed671f25f/ruff-0.12.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2afc2fa864197634e549d87fb1e7b6feb01df0a80fd510d6489e1ce8c0b1cc45", size = 13311444, upload-time = "2025-09-04T16:49:49.931Z" }, + { url = "https://files.pythonhosted.org/packages/2a/81/de5a29af7eb8f341f8140867ffb93f82e4fde7256dadee79016ac87c2716/ruff-0.12.12-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:0c0945246f5ad776cb8925e36af2438e66188d2b57d9cf2eed2c382c58b371e5", size = 13279474, upload-time = "2025-09-04T16:49:53.465Z" }, + { url = "https://files.pythonhosted.org/packages/7f/14/d9577fdeaf791737ada1b4f5c6b59c21c3326f3f683229096cccd7674e0c/ruff-0.12.12-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a0fbafe8c58e37aae28b84a80ba1817f2ea552e9450156018a478bf1fa80f4e4", size = 12070204, upload-time = "2025-09-04T16:49:56.882Z" }, + { url = "https://files.pythonhosted.org/packages/77/04/a910078284b47fad54506dc0af13839c418ff704e341c176f64e1127e461/ruff-0.12.12-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:b9c456fb2fc8e1282affa932c9e40f5ec31ec9cbb66751a316bd131273b57c23", size = 11880347, upload-time = "2025-09-04T16:49:59.729Z" }, + { url = "https://files.pythonhosted.org/packages/df/58/30185fcb0e89f05e7ea82e5817b47798f7fa7179863f9d9ba6fd4fe1b098/ruff-0.12.12-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5f12856123b0ad0147d90b3961f5c90e7427f9acd4b40050705499c98983f489", size = 12891844, upload-time = "2025-09-04T16:50:02.591Z" }, + { url = "https://files.pythonhosted.org/packages/21/9c/28a8dacce4855e6703dcb8cdf6c1705d0b23dd01d60150786cd55aa93b16/ruff-0.12.12-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:26a1b5a2bf7dd2c47e3b46d077cd9c0fc3b93e6c6cc9ed750bd312ae9dc302ee", size = 13360687, upload-time = "2025-09-04T16:50:05.8Z" }, + { url = "https://files.pythonhosted.org/packages/c8/fa/05b6428a008e60f79546c943e54068316f32ec8ab5c4f73e4563934fbdc7/ruff-0.12.12-py3-none-win32.whl", hash = "sha256:173be2bfc142af07a01e3a759aba6f7791aa47acf3604f610b1c36db888df7b1", size = 12052870, upload-time = "2025-09-04T16:50:09.121Z" }, + { url = "https://files.pythonhosted.org/packages/85/60/d1e335417804df452589271818749d061b22772b87efda88354cf35cdb7a/ruff-0.12.12-py3-none-win_amd64.whl", hash = "sha256:e99620bf01884e5f38611934c09dd194eb665b0109104acae3ba6102b600fd0d", size = 13178016, upload-time = "2025-09-04T16:50:12.559Z" }, + { url = "https://files.pythonhosted.org/packages/28/7e/61c42657f6e4614a4258f1c3b0c5b93adc4d1f8575f5229d1906b483099b/ruff-0.12.12-py3-none-win_arm64.whl", hash = "sha256:2a8199cab4ce4d72d158319b63370abf60991495fb733db96cd923a34c52d093", size = 12256762, upload-time = "2025-09-04T16:50:15.737Z" }, ] [[package]] From 5c04d906bc56dbde5aebe240502258c22c737150 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 4 Sep 2025 15:38:40 -0700 Subject: [PATCH 2/7] WIP: clean up the HTML parsing stuff. --- html_tstring/html.py | 80 +---------------- html_tstring/nodes.py | 35 ++++++-- html_tstring/nodes_test.py | 15 ++++ html_tstring/parser.py | 117 +++++++++++++++++++++++++ html_tstring/parser_test.py | 165 ++++++++++++++++++++++++++++++++++++ 5 files changed, 326 insertions(+), 86 deletions(-) create mode 100644 html_tstring/parser.py create mode 100644 html_tstring/parser_test.py diff --git a/html_tstring/html.py b/html_tstring/html.py index 0769c70..186a2f0 100644 --- a/html_tstring/html.py +++ b/html_tstring/html.py @@ -1,9 +1,7 @@ import typing as t -from html.parser import HTMLParser from string.templatelib import Interpolation, Template from .nodes import ( - VOID_ELEMENTS, Comment, DocumentType, Element, @@ -240,7 +238,10 @@ def _node_or_nodes_from_tuple( text = node[NODE_TEXT] assert text is not None if text in bookkeep: + as_interpolation = bookkeep[str(text)] + print("HERE IS THE INTERPOLATION: ", as_interpolation) bk_value = _format_interpolation(bookkeep[str(text)].value) + print("VALUE OF bk_value: ", bk_value) yield Text(str(bk_value)) else: yield Text(text) @@ -283,81 +284,6 @@ def _node_from_tuple(node: NodeTuple, bookkeep: dict[str, Interpolation]) -> Nod return Fragment(children=tuple(nodes)) -class ElementParser(HTMLParser): - stack: list[NodeTuple] - - def __init__(self): - super().__init__() - self.stack = [(KIND_FRAGMENT, "", {}, [], None)] - - def handle_starttag( - self, tag: str, attrs: t.Sequence[tuple[str, str | None]] - ) -> None: - element = (KIND_ELEMENT, tag, dict(attrs), [], None) - self.stack.append(element) - - # Unfortunately, Python's built-in HTMLParser has inconsistent behavior - # with void elements. In particular, it calls handle_endtag() for them - # only if they explicitly self-close (e.g.,
    ). But in the HTML - # spec itself, *there is no distinction* between
    and
    . - # So we need to handle this case ourselves. - # - # See https://github.com/python/cpython/issues/69445 - if tag in VOID_ELEMENTS: - # Always call handle_endtag for void elements. If it happens - # to be self-closed in the input, handle_endtag() will effectively - # be called twice. We ignore the second call there. - self.handle_endtag(tag) - - def handle_endtag(self, tag: str) -> None: - if len(self.stack) == 1: - # Special case to handle void elements that are not self-closed aka - # cpython #69445. - if tag in VOID_ELEMENTS: - children = self.stack[0][NODE_CHILDREN] - if isinstance(children[-1], tuple) and children[-1][NODE_TAG] == tag: - # The last child is the void element we just added. - return - raise ValueError( - f"Unexpected closing tag with no matching opening tag." - ) - - element = self.stack.pop() - if element[NODE_TAG] != tag: - raise ValueError( - f"Mismatched closing tag for <{element[NODE_TAG]}>." - ) - - self.append_child(element) - - def handle_data(self, data: str) -> None: - print("Handling data:", data) - text = (KIND_TEXT, "", {}, [], data) - self.append_child(text) - - def append_child(self, child: NodeTuple) -> None: - self.stack[-1][NODE_CHILDREN].append(child) - - def get_root(self) -> NodeTuple: - if len(self.stack) != 1: - raise ValueError("Invalid HTML structure: unclosed tags remain.") - - root = self.stack[0] - - assert root[NODE_KIND] == KIND_FRAGMENT - print(root) - - if len(root[NODE_CHILDREN]) == 1: - print("Single root element detected.") - print(root) - return root[NODE_CHILDREN][0] - - if len(root[NODE_CHILDREN]) == 0: - return (KIND_TEXT, "", {}, [], "") - - return root - - # -------------------------------------------------------------------------- # Safe HTML support # -------------------------------------------------------------------------- diff --git a/html_tstring/nodes.py b/html_tstring/nodes.py index 5571af1..cace2ab 100644 --- a/html_tstring/nodes.py +++ b/html_tstring/nodes.py @@ -24,6 +24,10 @@ ) +CDATA_CONTENT_ELEMENTS = frozenset(["script", "style"]) +RCDATA_CONTENT_ELEMENTS = frozenset(["textarea", "title"]) +CONTENT_ELEMENTS = CDATA_CONTENT_ELEMENTS | RCDATA_CONTENT_ELEMENTS + # TODO: add a pretty-printer for nodes for debugging # TODO: consider how significant whitespace is handled from t-string to nodes @@ -36,7 +40,7 @@ def __html__(self) -> str: ... type HTMLDunder = t.Callable[[], str] -@dataclass(frozen=True) +@dataclass class Node(HasHTMLDunder): def __html__(self) -> str: """Return the HTML representation of the node.""" @@ -44,7 +48,7 @@ def __html__(self) -> str: return str(self) -@dataclass(frozen=True) +@dataclass class Text(Node): # Django's `SafeString` and Markupsafe/Jinja2's `Markup` both inherit # from `str`, but that is not a requirement for the `__html__` dunder. @@ -56,19 +60,25 @@ def _cached_str(self) -> str: return self.text.__html__() return escape(self.text, quote=False) + def _as_unescaped(self) -> str: + """Return the text as-is, without escaping. For internal use only.""" + if isinstance(self.text, HasHTMLDunder): + return self.text.__html__() + return self.text + def __str__(self) -> str: return self._cached_str -@dataclass(frozen=True) +@dataclass class Fragment(Node): - children: t.Sequence[Node] = field(default_factory=tuple) + children: t.Sequence[Node] = field(default_factory=list) def __str__(self) -> str: return "".join(str(child) for child in self.children) -@dataclass(frozen=True) +@dataclass class Comment(Node): text: str @@ -76,7 +86,7 @@ def __str__(self) -> str: return f"" -@dataclass(frozen=True) +@dataclass class DocumentType(Node): text: str = "html" @@ -84,11 +94,11 @@ def __str__(self) -> str: return f"" -@dataclass(frozen=True) +@dataclass class Element(Node): tag: str attrs: t.Mapping[str, str | None] = field(default_factory=dict) - children: t.Sequence[Node] = field(default_factory=tuple) + children: t.Sequence[Node] = field(default_factory=list) def __post_init__(self): """Ensure all preconditions are met.""" @@ -113,5 +123,12 @@ def __str__(self) -> str: return f"<{self.tag}{attrs_str} />" if not self.children: return f"<{self.tag}{attrs_str}>" - children_str = "".join(str(child) for child in self.children) + if self.tag in CONTENT_ELEMENTS: + # Content elements should not escape their content + children_str = "".join( + child._as_unescaped() if isinstance(child, Text) else str(child) + for child in self.children + ) + else: + children_str = "".join(str(child) for child in self.children) return f"<{self.tag}{attrs_str}>{children_str}" diff --git a/html_tstring/nodes_test.py b/html_tstring/nodes_test.py index 4f04778..6499cd8 100644 --- a/html_tstring/nodes_test.py +++ b/html_tstring/nodes_test.py @@ -97,6 +97,21 @@ def test_standard_element_with_text_child(): assert str(div) == "
    Hello, world!
    " +def test_script_element_with_text_child(): + node = Element( + "script", + children=[Text("if (a < b && c > d) { alert('hello & friend'); }")], + ) + assert str(node) == ( + "" + ) + + +def test_title_element_with_text_child(): + node = Element("title", children=[Text("My & Awesome ")]) + assert str(node) == "My & Awesome <Site>" + + def test_standard_element_with_element_children(): div = Element( "div", diff --git a/html_tstring/parser.py b/html_tstring/parser.py new file mode 100644 index 0000000..de51efe --- /dev/null +++ b/html_tstring/parser.py @@ -0,0 +1,117 @@ +import typing as t +from html.parser import HTMLParser + +from .nodes import VOID_ELEMENTS, Comment, DocumentType, Element, Fragment, Node, Text + + +class NodeParser(HTMLParser): + root: Fragment + stack: list[Element] + + def __init__(self): + super().__init__() + self.root = Fragment(children=[]) + self.stack = [] + + def handle_starttag( + self, tag: str, attrs: t.Sequence[tuple[str, str | None]] + ) -> None: + element = Element(tag, attrs=dict(attrs), children=[]) + self.stack.append(element) + + # Unfortunately, Python's built-in HTMLParser has inconsistent behavior + # with void elements. In particular, it calls handle_endtag() for them + # only if they explicitly self-close (e.g.,
    ). But in the HTML + # spec itself, *there is no distinction* between
    and
    . + # So we need to handle this case ourselves. + # + # See https://github.com/python/cpython/issues/69445 + if tag in VOID_ELEMENTS: + # Always call handle_endtag for void elements. If it happens + # to be self-closed in the input, handle_endtag() will effectively + # be called twice. We ignore the second call there. + self.handle_endtag(tag) + + def handle_endtag(self, tag: str) -> None: + if tag in VOID_ELEMENTS: + # Special case: handle Python issue #69445 (see comment above). + open_element = self.get_open_element() + if open_element and open_element.tag == tag: + _ = self.stack.pop() + self.append_child(open_element) + return + most_recent_closed = self.get_most_recent_closed_element() + if most_recent_closed and most_recent_closed.tag == tag: + # Ignore this call; we've already closed it. + return + raise ValueError(f"Unexpected closing tag with no open element.") + + element = self.stack.pop() + if element.tag != tag: + raise ValueError(f"Mismatched closing tag for <{element.tag}>.") + + self.append_child(element) + + def handle_data(self, data: str) -> None: + text = Text(data) + self.append_child(text) + + def handle_comment(self, data: str) -> None: + comment = Comment(data) + self.append_child(comment) + + def handle_decl(self, decl: str) -> None: + if decl.upper().startswith("DOCTYPE"): + doctype_content = decl[7:].strip() + doctype = DocumentType(doctype_content) + self.append_child(doctype) + # For simplicity, we ignore other declarations. + pass + + def get_parent(self) -> Fragment | Element: + """Return the current parent node to which new children should be added.""" + return self.stack[-1] if self.stack else self.root + + def get_open_element(self) -> Element | None: + """Return the currently open Element, if any.""" + return self.stack[-1] if self.stack else None + + def get_most_recent_closed_element(self) -> Element | None: + """Return the most recently closed Element, if any.""" + parent = self.get_parent() + if parent.children and isinstance(parent.children[-1], Element): + return parent.children[-1] + return None + + def append_child(self, child: Node) -> None: + parent = self.get_parent() + # We *know* our parser is using lists for children, so this cast is safe. + t.cast(list[Node], parent.children).append(child) + + def close(self) -> None: + if self.stack: + raise ValueError("Invalid HTML structure: unclosed tags remain.") + super().close() + + def get_node(self) -> Node: + """Get the Node tree parsed from the input HTML.""" + assert not self.stack, "Did you forget to call close()?" + if len(self.root.children) > 1: + # The parse structure results in multiple root elements, so we + # return a Fragment to hold them all. + return self.root + elif len(self.root.children) == 1: + # The parse structure results in a single root element, so we + # return that element directly. This will be a non-Fragment Node. + return self.root.children[0] + else: + # Special case: the parse structure is empty; we treat + # this as an empty Text Node. + return Text("") + + +def parse_html(input_html: str) -> Node: + parser = NodeParser() + parser.feed(input_html) + parser.close() + return parser.get_node() diff --git a/html_tstring/parser_test.py b/html_tstring/parser_test.py new file mode 100644 index 0000000..191f0c2 --- /dev/null +++ b/html_tstring/parser_test.py @@ -0,0 +1,165 @@ +import pytest + +from .nodes import Comment, DocumentType, Element, Fragment, Text +from .parser import parse_html + + +def test_parse_empty(): + node = parse_html("") + assert node == Text("") + + +def test_parse_text(): + node = parse_html("Hello, world!") + assert node == Text("Hello, world!") + + +def test_parse_text_with_entities(): + node = parse_html("Panini's") + assert node == Text("Panini's") + + +def test_parse_void_element(): + node = parse_html("
    ") + assert node == Element("br") + + +def test_parse_void_element_self_closed(): + node = parse_html("
    ") + assert node == Element("br") + + +def test_parse_uppercase_void_element(): + node = parse_html("
    ") + assert node == Element("br") + + +def test_parse_standard_element_with_text(): + node = parse_html("
    Hello, world!
    ") + assert node == Element("div", children=[Text("Hello, world!")]) + + +def test_parse_nested_elements(): + node = parse_html("
    Nested content
    ") + assert node == Element( + "div", + children=[ + Element("span", children=[Text("Nested")]), + Text(" content"), + ], + ) + + +def test_parse_element_with_attributes(): + node = parse_html('Link') + assert node == Element( + "a", + attrs={"href": "https://example.com", "target": "_blank"}, + children=[Text("Link")], + ) + + +def test_parse_comment(): + node = parse_html("") + assert node == Comment(" This is a comment ") + + +def test_parse_doctype(): + node = parse_html("") + assert node == DocumentType("html") + + +def test_parse_multiple_voids(): + node = parse_html("






    ") + assert node == Fragment( + children=[ + Element("br"), + Element("hr"), + Element("hr"), + Element("hr"), + Element("br"), + Element("br"), + Element("br"), + ] + ) + + +def test_parse_mixed_content(): + node = parse_html( + '
    ' + "Hello,
    world !
    " + ) + assert node == Fragment( + children=[ + DocumentType("html"), + Comment(" Comment "), + Element( + "div", + attrs={"class": "container"}, + children=[ + Text("Hello, "), + Element("br", attrs={"class": "funky"}), + Text("world "), + Comment(" neato "), + Text("!"), + ], + ), + ] + ) + + +def test_parse_script_tag_content(): + node = parse_html("") + assert node == Element( + "script", + children=[Text("if (a < b && c > d) { alert('wow'); }")], + ) + assert str(node) == ("") + + +def test_parse_script_with_entities(): + # The ") + assert node == Element( + "script", + children=[Text("var x = 'a & b';")], + ) + assert str(node) == "" + + +def test_parse_textarea_tag_content(): + node = parse_html("") + assert node == Element( + "textarea", + children=[Text("if (a < b && c > d) { alert('wow'); }")], + ) + assert str(node) == "" + + +def test_parse_textarea_with_entities(): + # The ") + assert node == Element( + "textarea", + children=[Text("var x = 'a & b';")], + ) + assert str(node) == "" + + +def test_parse_title_unusual(): + node = parse_html("My & Awesome <Site>") + assert node == Element( + "title", + children=[Text("My & Awesome ")], + ) + assert str(node) == "My & Awesome <Site>" + + +def test_parse_mismatched_tags(): + with pytest.raises(ValueError): + _ = parse_html("
    Mismatched
    ") + + +def test_parse_unclosed_tag(): + with pytest.raises(ValueError): + _ = parse_html("
    Unclosed") From adc67296108561f41a6ce7ffc3bcf9afb9bb38e5 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 4 Sep 2025 15:55:12 -0700 Subject: [PATCH 3/7] Move clsx -> separate classnames.py --- html_tstring/classnames.py | 46 +++++++++++ html_tstring/classnames_test.py | 74 +++++++++++++++++ html_tstring/{html.py => tag_processor.py} | 82 +------------------ .../{html_test.py => tag_processor_test.py} | 78 +----------------- 4 files changed, 123 insertions(+), 157 deletions(-) create mode 100644 html_tstring/classnames.py create mode 100644 html_tstring/classnames_test.py rename html_tstring/{html.py => tag_processor.py} (81%) rename html_tstring/{html_test.py => tag_processor_test.py} (90%) diff --git a/html_tstring/classnames.py b/html_tstring/classnames.py new file mode 100644 index 0000000..7819d51 --- /dev/null +++ b/html_tstring/classnames.py @@ -0,0 +1,46 @@ +def classnames(*args: object) -> str: + """ + Construct a space-separated class string from various inputs. + + Accepts strings, lists/tuples of strings, and dicts mapping class names to + boolean values. Ignores None and False values. + + Examples: + classnames("btn", "btn-primary") -> "btn btn-primary" + classnames("btn", {"btn-primary": True, "disabled": False}) -> "btn btn-primary" + classnames(["btn", "btn-primary"], {"disabled": True}) -> "btn btn-primary disabled" + classnames("btn", None, False, "active") -> "btn active" + + Args: + *args: Variable length argument list containing strings, lists/tuples, + or dicts. + + Returns: + A single string with class names separated by spaces. + """ + classes: list[str] = [] + # Use a queue to process arguments iteratively, preserving order. + queue = list(args) + + while queue: + arg = queue.pop(0) + + if not arg: # Handles None, False, empty strings/lists/dicts + continue + + if isinstance(arg, str): + classes.append(arg) + elif isinstance(arg, dict): + for key, value in arg.items(): + if value: + classes.append(key) + elif isinstance(arg, (list, tuple)): + # Add items to the front of the queue to process them next, in order. + queue[0:0] = arg + elif isinstance(arg, bool): + pass # Explicitly ignore booleans not in a dict + else: + raise ValueError(f"Invalid class argument type: {type(arg).__name__}") + + # Filter out empty strings and join the result. + return " ".join(stripped for c in classes if (stripped := c.strip())) diff --git a/html_tstring/classnames_test.py b/html_tstring/classnames_test.py new file mode 100644 index 0000000..c21fdb9 --- /dev/null +++ b/html_tstring/classnames_test.py @@ -0,0 +1,74 @@ +import pytest + +from .classnames import classnames + + +def test_classnames_empty(): + assert classnames() == "" + + +def test_classnames_strings(): + assert classnames("btn", "btn-primary") == "btn btn-primary" + + +def test_classnames_strings_strip(): + assert classnames(" btn ", " btn-primary ") == "btn btn-primary" + + +def test_cslx_empty_strings(): + assert classnames("", "btn", "", "btn-primary", "") == "btn btn-primary" + + +def test_classnames_lists_and_tuples(): + assert ( + classnames(["btn", "btn-primary"], ("active", "disabled")) + == "btn btn-primary active disabled" + ) + + +def test_classnames_dicts(): + assert ( + classnames( + "btn", + {"btn-primary": True, "disabled": False, "active": True, "shown": "yes"}, + ) + == "btn btn-primary active shown" + ) + + +def test_classnames_mixed_inputs(): + assert ( + classnames( + "btn", + ["btn-primary", "active"], + {"disabled": True, "hidden": False}, + ("extra",), + ) + == "btn btn-primary active disabled extra" + ) + + +def test_classnames_ignores_none_and_false(): + assert ( + classnames("btn", None, False, "active", {"hidden": None, "visible": True}) + == "btn active visible" + ) + + +def test_classnames_raises_type_error_on_invalid_input(): + with pytest.raises(ValueError): + classnames(123) + + with pytest.raises(ValueError): + classnames(["btn", 456]) + + +def test_classnames_kitchen_sink(): + assert ( + classnames( + "foo", + [1 and "bar", {"baz": False, "bat": None}, ["hello", ["world"]]], + "cya", + ) + == "foo bar hello world cya" + ) diff --git a/html_tstring/html.py b/html_tstring/tag_processor.py similarity index 81% rename from html_tstring/html.py rename to html_tstring/tag_processor.py index 186a2f0..3c1ae91 100644 --- a/html_tstring/html.py +++ b/html_tstring/tag_processor.py @@ -1,94 +1,16 @@ import typing as t from string.templatelib import Interpolation, Template +from .classnames import classnames from .nodes import ( Comment, DocumentType, Element, Fragment, - HasHTMLDunder, Node, Text, ) -# For performance, a mutable tuple is used while parsing. -KIND_FRAGMENT = 0 -KIND_ELEMENT = 1 -KIND_TEXT = 2 -KIND_COMMENT = 3 -KIND_DOCTYPE = 4 - -type NodeTuple = tuple[ - int, str, dict[str, str | None], list["NodeTuple"], str | HasHTMLDunder | None -] -NODE_KIND = 0 -NODE_TAG = 1 -NODE_ATTRS = 2 -NODE_CHILDREN = 3 -NODE_TEXT = 4 - - -# TODO this is being put together super rapidly and so far it's a mess. -# Once I have a sense of how the features should shake out, and a set of -# test cases I believe in, I will refactor this entirely. -Dave - - -def clsx(*args: object) -> str: - """ - Construct a space-separated class string from various inputs. - - Accepts strings, lists/tuples of strings, and dicts mapping class names to - boolean values. Ignores None and False values. - - Examples: - clsx("btn", "btn-primary") -> "btn btn-primary" - clsx("btn", {"btn-primary": True, "disabled": False}) -> "btn btn-primary" - clsx(["btn", "btn-primary"], {"disabled": True}) -> "btn btn-primary disabled" - clsx("btn", None, False, "active") -> "btn active" - - Args: - *args: Variable length argument list containing strings, lists/tuples, - or dicts. - - Returns: - A single string with class names separated by spaces. - """ - classes: list[str] = [] - - for arg in args: - if isinstance(arg, str): - classes.append(arg) - elif isinstance(arg, (list, tuple)): - classes.append(clsx(*arg)) - elif isinstance(arg, dict): - for key, value in arg.items(): - if bool(value): - classes.append(key) - elif arg is None or isinstance(arg, bool): - continue - else: - raise ValueError(f"Invalid class argument type: {type(arg).__name__}") - - return " ".join(stripped for c in classes if (stripped := c.strip())) - - -def _clsx_single(arg: object) -> str: - """Helper to process a single argument to clsx().""" - if isinstance(arg, str): - return arg.strip() - elif isinstance(arg, (list, tuple)): - return clsx(*arg) - elif isinstance(arg, dict): - classes = [key for key, value in arg.items() if bool(value)] - return " ".join(classes) - elif arg is None or isinstance(arg, bool): - return "" - else: - raise ValueError(f"Invalid class argument type: {type(arg).__name__}") - - -# TODO document, clean up, and individually unit test all helper functions here. - def _attrs( attrs: dict[str, str | None], bookkeep: dict[str, Interpolation] @@ -103,7 +25,7 @@ def _process_attr_key(key: str, value: object) -> dict[str, str | None]: # we might want to map a single key in the input template to multiple # keys in the output element. So the return type here is a dict. if key == "class": - return {key: _clsx_single(value)} + return {key: classnames(value)} elif key == "data": if isinstance(value, dict): return {f"data-{k}": str(v) for k, v in value.items()} diff --git a/html_tstring/html_test.py b/html_tstring/tag_processor_test.py similarity index 90% rename from html_tstring/html_test.py rename to html_tstring/tag_processor_test.py index b691aea..23dca92 100644 --- a/html_tstring/html_test.py +++ b/html_tstring/tag_processor_test.py @@ -2,84 +2,8 @@ import pytest -from .html import SafeHTML, clsx, html from .nodes import Element, Fragment, Text - -# -------------------------------------------------------------------------- -# clsx tests -# -------------------------------------------------------------------------- - - -def test_clsx_empty(): - assert clsx() == "" - - -def test_clsx_strings(): - assert clsx("btn", "btn-primary") == "btn btn-primary" - - -def test_clsx_strings_strip(): - assert clsx(" btn ", " btn-primary ") == "btn btn-primary" - - -def test_cslx_empty_strings(): - assert clsx("", "btn", "", "btn-primary", "") == "btn btn-primary" - - -def test_clsx_lists_and_tuples(): - assert ( - clsx(["btn", "btn-primary"], ("active", "disabled")) - == "btn btn-primary active disabled" - ) - - -def test_clsx_dicts(): - assert ( - clsx( - "btn", - {"btn-primary": True, "disabled": False, "active": True, "shown": "yes"}, - ) - == "btn btn-primary active shown" - ) - - -def test_clsx_mixed_inputs(): - assert ( - clsx( - "btn", - ["btn-primary", "active"], - {"disabled": True, "hidden": False}, - ("extra",), - ) - == "btn btn-primary active disabled extra" - ) - - -def test_clsx_ignores_none_and_false(): - assert ( - clsx("btn", None, False, "active", {"hidden": None, "visible": True}) - == "btn active visible" - ) - - -def test_clsx_raises_type_error_on_invalid_input(): - with pytest.raises(ValueError): - clsx(123) - - with pytest.raises(ValueError): - clsx(["btn", 456]) - - -def test_clsx_kitchen_sink(): - assert ( - clsx( - "foo", - [1 and "bar", {"baz": False, "bat": None}, ["hello", ["world"]]], - "cya", - ) - == "foo bar hello world cya" - ) - +from .tag_processor import SafeHTML, html # -------------------------------------------------------------------------- # Basic HTML parsing tests From 11c6f0c907331dcf4dac5fc58b87c315c9036b28 Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 5 Sep 2025 11:23:25 -0700 Subject: [PATCH 4/7] Clean up format_interpolation() and convert() --- html_tstring/__init__.py | 14 +++ .../{tag_processor.py => processor.py} | 100 +++++++----------- ...ag_processor_test.py => processor_test.py} | 2 +- html_tstring/utils.py | 88 +++++++++++++++ html_tstring/utils_test.py | 93 ++++++++++++++++ 5 files changed, 235 insertions(+), 62 deletions(-) rename html_tstring/{tag_processor.py => processor.py} (77%) rename html_tstring/{tag_processor_test.py => processor_test.py} (99%) create mode 100644 html_tstring/utils.py create mode 100644 html_tstring/utils_test.py diff --git a/html_tstring/__init__.py b/html_tstring/__init__.py index e69de29..28bfdac 100644 --- a/html_tstring/__init__.py +++ b/html_tstring/__init__.py @@ -0,0 +1,14 @@ +from .nodes import Comment, DocumentType, Element, Fragment, Text +from .processor import SafeHTML, html + +# TODO: don't use SafeHTML; adopt markupsafe + +__all__ = [ + "SafeHTML", + "html", + "Element", + "Text", + "Fragment", + "Comment", + "DocumentType", +] diff --git a/html_tstring/tag_processor.py b/html_tstring/processor.py similarity index 77% rename from html_tstring/tag_processor.py rename to html_tstring/processor.py index 3c1ae91..2a6bfce 100644 --- a/html_tstring/tag_processor.py +++ b/html_tstring/processor.py @@ -1,4 +1,7 @@ +import random +import string import typing as t +from functools import lru_cache from string.templatelib import Interpolation, Template from .classnames import classnames @@ -10,6 +13,7 @@ Node, Text, ) +from .parser import parse_html def _attrs( @@ -160,7 +164,7 @@ def _node_or_nodes_from_tuple( text = node[NODE_TEXT] assert text is not None if text in bookkeep: - as_interpolation = bookkeep[str(text)] + as_interpolation = bookkeep[text] print("HERE IS THE INTERPOLATION: ", as_interpolation) bk_value = _format_interpolation(bookkeep[str(text)].value) print("VALUE OF bk_value: ", bk_value) @@ -231,75 +235,49 @@ def __repr__(self) -> str: # and their contexts. Also, to cache parsed templates. -@t.overload -def _convert[T](value: T, conversion: None) -> T: ... +_PLACEHOLDER_PREFIX = f"t🐍-{''.join(random.choices(string.ascii_lowercase, k=4))}-" +_PP_LEN = len(_PLACEHOLDER_PREFIX) -@t.overload -def _convert(value: object, conversion: t.Literal["a", "r", "s"]) -> str: ... +def _placeholder(i: int) -> str: + """Generate a placeholder for the i-th interpolation.""" + return f"{_PLACEHOLDER_PREFIX}{i}" -def _convert[T](value: T, conversion: t.Literal["a", "r", "s"] | None) -> T | str: - print( - f"_convert: value={type(value)}, conversion={conversion}, as_str={str(value)}, as_repr={repr(value)}" - ) - if conversion == "a": - return ascii(value) - elif conversion == "r": - return repr(value) - elif conversion == "s": - return str(value) - else: - return value +def _placholder_index(s: str) -> int: + """Extract the index from a placeholder string.""" + return int(s[_PP_LEN:]) + + +def _instrument(strings: t.Sequence[str]) -> str: + """ + Join the strings with placeholders in between where interpolations go. + This is used to prepare the template string for parsing, so that we can + later substitute the actual interpolated values into the parse tree. -def _format( - value: object, format_spec: str, conversion: t.Literal["a", "r", "s"] | None -) -> object: - converted = _convert(value, conversion) - if format_spec and format_spec != "safe": - return format(converted, format_spec) - return converted + The placeholders are chosen to be unlikely to collide with typical HTML + content. + """ + count = len(strings) + def _placeholder_or_final(i: int, s: str) -> str: + """Return the string with a placeholder if not the last one.""" + # There are always count-1 placeholders between count strings. + return f"{s}{_placeholder(i)}" if i < count - 1 else s -def _format_interpolation(interp: Interpolation) -> object: - return _format(interp.value, interp.format_spec, interp.conversion) + return "".join(_placeholder_or_final(i, s) for i, s in enumerate(strings)) + + +@lru_cache() +def _instrument_and_parse(strings: tuple[str, ...]) -> Node: + instrumented = _instrument(strings) + return parse_html(instrumented) def html(template: Template) -> Node: """Create an HTML element from a string.""" - # TODO: pick a better prefix that is less likely to collide - _prefix = "ts-bk-" - count: int = 0 - callables: dict[t.Callable, str] = {} - bookkeep: dict[str, Interpolation] = {} - - parser = ElementParser() - print("HERE I AM") - for part in template: - if isinstance(part, str): - print(f"FEEDING STRING: '{part}'") - parser.feed(part) - elif hasattr(part.value, "__html__"): - # Parse the HTML, which is presumed safe - parser.feed(part.value.__html__()) - # TODO XXX: better handling for format_spec AND for conversion - elif part.format_spec == "safe": - # Parse the HTML, which is presumed safe - # TODO CONSIDER: what should we do if conversion is set? - parser.feed(str(part.value)) - else: - # TODO: CONSIDER: how to choose a key that won't collide with - # your typical t-string content? - key = f"ts-bk-{count}" - # TODO: CONSIDER: do we want to broaden this key reuse to - # non-callables too? Or is it not worth the complexity? - if callable(part.value): - key = callables.get(part.value, key) - callables[part.value] = key - bookkeep[key] = part - count += 1 - parser.feed(key) - parser.close() - root = parser.get_root() - return _node_from_tuple(root, bookkeep) + # Parse the HTML, returning a tree of nodes with placeholders + # where interpolations go. + placeholder_node = _instrument_and_parse(template.strings) + return _substitute_interpolations(placeholder_node, template.interpolations) diff --git a/html_tstring/tag_processor_test.py b/html_tstring/processor_test.py similarity index 99% rename from html_tstring/tag_processor_test.py rename to html_tstring/processor_test.py index 23dca92..58d5e7e 100644 --- a/html_tstring/tag_processor_test.py +++ b/html_tstring/processor_test.py @@ -3,7 +3,7 @@ import pytest from .nodes import Element, Fragment, Text -from .tag_processor import SafeHTML, html +from .processor import SafeHTML, html # -------------------------------------------------------------------------- # Basic HTML parsing tests diff --git a/html_tstring/utils.py b/html_tstring/utils.py new file mode 100644 index 0000000..d1c25b4 --- /dev/null +++ b/html_tstring/utils.py @@ -0,0 +1,88 @@ +import typing as t +from string.templatelib import Interpolation + + +@t.overload +def convert[T](value: T, conversion: None) -> T: ... + + +@t.overload +def convert(value: object, conversion: t.Literal["a", "r", "s"]) -> str: ... + + +def convert[T](value: T, conversion: t.Literal["a", "r", "s"] | None) -> T | str: + """ + Convert a value according to the given conversion specifier. + + In the future, something like this should probably ship with Python itself. + """ + if conversion == "a": + return ascii(value) + elif conversion == "r": + return repr(value) + elif conversion == "s": + return str(value) + else: + return value + + +type FormatMatcher = t.Callable[[str], bool] +"""A predicate function that returns True if a given format specifier matches its criteria.""" + +type CustomFormatter = t.Callable[[object, str], str] +"""A function that takes a value and a format specifier and returns a formatted string.""" + +type MatcherAndFormatter = tuple[str | FormatMatcher, CustomFormatter] +""" +A pair of a matcher and its corresponding formatter. + +The matcher is used to determine if the formatter should be applied to a given +format specifier. If the matcher is a string, it must exactly match the format +specifier. If it is a FormatMatcher, it is called with the format specifier and +should return True if the formatter should be used. +""" + + +def _matcher_matches(matcher: str | FormatMatcher, format_spec: str) -> bool: + """Check if a matcher matches a given format specifier.""" + return matcher == format_spec if isinstance(matcher, str) else matcher(format_spec) + + +def _format_interpolation( + value: object, + format_spec: str, + conversion: t.Literal["a", "r", "s"] | None, + *, + formatters: t.Sequence[MatcherAndFormatter], +) -> object: + converted = convert(value, conversion) + if format_spec: + for matcher, formatter in formatters: + if _matcher_matches(matcher, format_spec): + return formatter(converted, format_spec) + return format(converted, format_spec) + return converted + + +def format_interpolation( + interpolation: Interpolation, + *, + formatters: t.Sequence[MatcherAndFormatter] = tuple(), +) -> object: + """ + Format an Interpolation's value according to its format spec and conversion. + + PEP 750 allows t-string processing code to decide whether, and how, to + interpret format specifiers. This function takes an optional sequence of + (matcher, formatter) pairs. If a matcher returns True for the given format + spec, the corresponding formatter is used to format the value. If no + matchers match, the default formatting behavior is used. + + Conversions are always applied before formatting. + """ + return _format_interpolation( + interpolation.value, + interpolation.format_spec, + interpolation.conversion, + formatters=formatters, + ) diff --git a/html_tstring/utils_test.py b/html_tstring/utils_test.py new file mode 100644 index 0000000..f00de23 --- /dev/null +++ b/html_tstring/utils_test.py @@ -0,0 +1,93 @@ +from string.templatelib import Interpolation + +from .utils import convert, format_interpolation + + +class Convertible: + def __str__(self) -> str: + return "Convertible str" + + def __repr__(self) -> str: + return "Convertible repr" + + +def test_convert_none(): + value = Convertible() + assert convert(value, None) is value + + +def test_convert_a(): + value = Convertible() + assert convert(value, "a") == "Convertible repr" + assert convert("Café", "a") == "'Caf\\xe9'" + + +def test_convert_r(): + value = Convertible() + assert convert(value, "r") == "Convertible repr" + + +def test_convert_s(): + value = Convertible() + assert convert(value, "s") == "Convertible str" + + +def test_format_interpolation_no_formatting(): + value = Convertible() + interp = Interpolation(value, expression="", conversion=None, format_spec="") + assert format_interpolation(interp) is value + + +def test_format_interpolation_a(): + value = Convertible() + interp = Interpolation(value, expression="", conversion="a", format_spec="") + assert format_interpolation(interp) == "Convertible repr" + + +def test_format_interpolation_r(): + value = Convertible() + interp = Interpolation(value, expression="", conversion="r", format_spec="") + assert format_interpolation(interp) == "Convertible repr" + + +def test_format_interpolation_s(): + value = Convertible() + interp = Interpolation(value, expression="", conversion="s", format_spec="") + assert format_interpolation(interp) == "Convertible str" + + +def test_format_interpolation_default_formatting(): + value = 42 + interp = Interpolation(value, expression="", conversion=None, format_spec="5d") + assert format_interpolation(interp) == " 42" + + +def test_format_interpolation_custom_formatter_match_exact(): + value = 42 + interp = Interpolation(value, expression="", conversion=None, format_spec="custom") + + def formatter(val: object, spec: str) -> str: + return f"formatted-{val}-{spec}" + + assert ( + format_interpolation(interp, formatters=[("custom", formatter)]) + == "formatted-42-custom" + ) + + +def test_format_interpolation_custom_formatter_match_predicate(): + value = 42 + interp = Interpolation( + value, expression="", conversion=None, format_spec="custom123" + ) + + def matcher(spec: str) -> bool: + return spec.startswith("custom") + + def formatter(val: object, spec: str) -> str: + return f"formatted-{val}-{spec}" + + assert ( + format_interpolation(interp, formatters=[(matcher, formatter)]) + == "formatted-42-custom123" + ) From 5f586abc4a98df8cae22d8c254fb1f0c1ffbcbdd Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 5 Sep 2025 11:27:21 -0700 Subject: [PATCH 5/7] Take a hard dependency on MarkupSafe --- pyproject.toml | 4 +++- uv.lock | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e892d24..0e6a31a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,9 @@ version = "0.1.0" description = "Tools to manipulate and render HTML using Python 3.14's t-strings." readme = "README.md" requires-python = ">=3.14" -dependencies = [] +dependencies = [ + "markupsafe>=3.0.2", +] authors = [{ name = "Dave Peck", email = "davepeck@davepeck.org" }] license = { text = "MIT" } classifiers = [ diff --git a/uv.lock b/uv.lock index 049ed6b..dd1c547 100644 --- a/uv.lock +++ b/uv.lock @@ -15,6 +15,9 @@ wheels = [ name = "html-tstring" version = "0.1.0" source = { editable = "." } +dependencies = [ + { name = "markupsafe" }, +] [package.dev-dependencies] dev = [ @@ -25,6 +28,7 @@ dev = [ ] [package.metadata] +requires-dist = [{ name = "markupsafe", specifier = ">=3.0.2" }] [package.metadata.requires-dev] dev = [ @@ -43,6 +47,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537, upload-time = "2024-10-18T15:21:54.129Z" } + [[package]] name = "nodeenv" version = "1.9.1" From 49eb95ce8927775b787e762a75449516bb5310cf Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 5 Sep 2025 13:34:32 -0700 Subject: [PATCH 6/7] Getting closer with clean impl. --- html_tstring/__init__.py | 17 +- html_tstring/nodes.py | 33 ++-- html_tstring/parser.py | 2 +- html_tstring/processor.py | 289 ++++++++------------------------- html_tstring/processor_test.py | 148 +++++++++-------- 5 files changed, 172 insertions(+), 317 deletions(-) diff --git a/html_tstring/__init__.py b/html_tstring/__init__.py index 28bfdac..c2533a0 100644 --- a/html_tstring/__init__.py +++ b/html_tstring/__init__.py @@ -1,14 +1,17 @@ +from markupsafe import Markup, escape + from .nodes import Comment, DocumentType, Element, Fragment, Text -from .processor import SafeHTML, html +from .processor import html -# TODO: don't use SafeHTML; adopt markupsafe +# We consider `Markup` and `escape` to be part of this module's public API __all__ = [ - "SafeHTML", - "html", - "Element", - "Text", - "Fragment", "Comment", "DocumentType", + "Element", + "escape", + "Fragment", + "html", + "Markup", + "Text", ] diff --git a/html_tstring/nodes.py b/html_tstring/nodes.py index cace2ab..52502e2 100644 --- a/html_tstring/nodes.py +++ b/html_tstring/nodes.py @@ -32,7 +32,6 @@ # TODO: consider how significant whitespace is handled from t-string to nodes -@t.runtime_checkable class HasHTMLDunder(t.Protocol): def __html__(self) -> str: ... @@ -40,15 +39,15 @@ def __html__(self) -> str: ... type HTMLDunder = t.Callable[[], str] -@dataclass -class Node(HasHTMLDunder): +@dataclass(slots=True) +class Node: def __html__(self) -> str: """Return the HTML representation of the node.""" # By default, just return the string representation return str(self) -@dataclass +@dataclass(slots=False) class Text(Node): # Django's `SafeString` and Markupsafe/Jinja2's `Markup` both inherit # from `str`, but that is not a requirement for the `__html__` dunder. @@ -56,29 +55,29 @@ class Text(Node): @cached_property def _cached_str(self) -> str: - if isinstance(self.text, HasHTMLDunder): - return self.text.__html__() - return escape(self.text, quote=False) + if hasattr(self.text, "__html__"): + return t.cast(HasHTMLDunder, self.text).__html__() + return escape(t.cast(str, self.text), quote=False) def _as_unescaped(self) -> str: """Return the text as-is, without escaping. For internal use only.""" - if isinstance(self.text, HasHTMLDunder): - return self.text.__html__() - return self.text + if hasattr(self.text, "__html__"): + return t.cast(HasHTMLDunder, self.text).__html__() + return t.cast(str, self.text) def __str__(self) -> str: return self._cached_str -@dataclass +@dataclass(slots=True) class Fragment(Node): - children: t.Sequence[Node] = field(default_factory=list) + children: list[Node] = field(default_factory=list) def __str__(self) -> str: return "".join(str(child) for child in self.children) -@dataclass +@dataclass(slots=True) class Comment(Node): text: str @@ -86,7 +85,7 @@ def __str__(self) -> str: return f"" -@dataclass +@dataclass(slots=True) class DocumentType(Node): text: str = "html" @@ -94,11 +93,11 @@ def __str__(self) -> str: return f"" -@dataclass +@dataclass(slots=True) class Element(Node): tag: str - attrs: t.Mapping[str, str | None] = field(default_factory=dict) - children: t.Sequence[Node] = field(default_factory=list) + attrs: dict[str, str | None] = field(default_factory=dict) + children: list[Node] = field(default_factory=list) def __post_init__(self): """Ensure all preconditions are met.""" diff --git a/html_tstring/parser.py b/html_tstring/parser.py index de51efe..a57400a 100644 --- a/html_tstring/parser.py +++ b/html_tstring/parser.py @@ -86,7 +86,7 @@ def get_most_recent_closed_element(self) -> Element | None: def append_child(self, child: Node) -> None: parent = self.get_parent() # We *know* our parser is using lists for children, so this cast is safe. - t.cast(list[Node], parent.children).append(child) + parent.children.append(child) def close(self) -> None: if self.stack: diff --git a/html_tstring/processor.py b/html_tstring/processor.py index 2a6bfce..efc812d 100644 --- a/html_tstring/processor.py +++ b/html_tstring/processor.py @@ -4,237 +4,19 @@ from functools import lru_cache from string.templatelib import Interpolation, Template -from .classnames import classnames from .nodes import ( - Comment, - DocumentType, Element, Fragment, Node, Text, ) from .parser import parse_html - - -def _attrs( - attrs: dict[str, str | None], bookkeep: dict[str, Interpolation] -) -> dict[str, str | None]: - """Substitute any bookkeeping keys in attributes.""" - result: dict[str, str | None] = {} - - def _process_attr_key(key: str, value: object) -> dict[str, str | None]: - # TODO XXX clarify the contract here. Mostly, we're mapping a single - # key and value in the input template to zero or more key-value pairs - # in the output element. But in the case of `data` (maybe others?), - # we might want to map a single key in the input template to multiple - # keys in the output element. So the return type here is a dict. - if key == "class": - return {key: classnames(value)} - elif key == "data": - if isinstance(value, dict): - return {f"data-{k}": str(v) for k, v in value.items()} - else: - raise ValueError( - f"Invalid value for 'data' attribute: expected dict, got {type(value).__name__}" - ) - elif key == "aria": - if isinstance(value, dict): - return {f"aria-{k}": str(v) for k, v in value.items()} - else: - raise ValueError( - f"Invalid value for 'aria' attribute: expected dict, got {type(value).__name__}" - ) - elif key == "style": - if isinstance(value, dict): - return {key: "; ".join(f"{k}: {v}" for k, v in value.items())} - elif isinstance(value, str): - return {key: value} - else: - raise ValueError( - f"Invalid value for 'style' attribute: expected dict or str, got {type(value).__name__}" - ) - elif isinstance(value, str): - return {key: value} - elif value is None or value is False: - return {} - elif value is True: - return {key: None} - else: - # TODO: do we really want to allow this? - return {key: str(value)} - - # TODO: clean this up when I understand the full logic. It's a mess. - - for key, value in attrs.items(): - if value is not None: - if value in bookkeep: - bk_value = _format_interpolation(bookkeep[value]) - result.update(_process_attr_key(key, bk_value)) - else: - result[key] = value - else: - if key in bookkeep: - bk_value = _format_interpolation(bookkeep[key]) - if isinstance(bk_value, str): - result[bk_value] = None - elif isinstance(bk_value, dict): - for k, v in bk_value.items(): - result.update(_process_attr_key(k, v)) - else: - raise ValueError( - f"Invalid attribute key substitution: {bk_value!r}" - ) - else: - result[key] = None - - return result - - -def _children( - children: list[NodeTuple], bookkeep: dict[str, Interpolation] -) -> tuple[Node, ...]: - """Substitute any bookkeeping keys in children.""" - # TODO XXX: this satisfies the test cases but does not yet recurse. - result: list[Node] = [] - for child in children: - if isinstance(child, str): - if child in bookkeep: - bk_value = _format_interpolation(bookkeep[child]) - if isinstance(bk_value, (Element, Text)): - result.append(bk_value) - elif isinstance(bk_value, Template): - result.append(html(bk_value)) - elif isinstance(bk_value, (list, tuple)): - # TODO XXX this should recurse - for item in bk_value: - if isinstance(item, (Element, Text)): - result.append(item) - elif isinstance(item, Template): - result.append(html(item)) - elif item is False: - pass - else: - result.append(Text(str(item))) - elif bk_value is False: - pass - else: - # TODO: should I handle more types here? - result.append(Text(str(bk_value))) - elif isinstance(child, Fragment): - result.extend(child.children) - elif isinstance(child, Element): - result.append(child) - else: - result.append(Text(child)) - else: - elements = list(_node_or_nodes_from_tuple(child, bookkeep)) - result.extend(elements) - return tuple(result) - - -def _resolve_tag( - tag: str, - bookkeep: dict[str, Interpolation], - attrs: dict[str, str | None], - children: tuple[Node, ...], -) -> str | Node: - if tag in bookkeep: - bk_value = _format_interpolation(bookkeep[tag]) - if isinstance(bk_value, str): - return bk_value - elif callable(bk_value): - result = bk_value(*children, **attrs) - if isinstance(result, (Element, str)): - return result - elif isinstance(result, Template): - return html(result) - elif isinstance(result, str): - return result - else: - raise ValueError(f"Invalid tag callable result: {result!r}") - else: - raise ValueError(f"Invalid tag substitution: {bk_value!r}") - return tag - - -def _node_or_nodes_from_tuple( - node: NodeTuple, bookkeep: dict[str, Interpolation] -) -> t.Iterable[Node]: - if node[NODE_KIND] == KIND_TEXT: - text = node[NODE_TEXT] - assert text is not None - if text in bookkeep: - as_interpolation = bookkeep[text] - print("HERE IS THE INTERPOLATION: ", as_interpolation) - bk_value = _format_interpolation(bookkeep[str(text)].value) - print("VALUE OF bk_value: ", bk_value) - yield Text(str(bk_value)) - else: - yield Text(text) - return - elif node[NODE_KIND] == KIND_COMMENT: - text = node[NODE_TEXT] - # TODO: XXX handle __html__ here? - assert isinstance(text, str) - yield Comment(text) - return - elif node[NODE_KIND] == KIND_DOCTYPE: - text = node[NODE_TEXT] - # TODO: XXX handle __html__ here? - assert isinstance(text, str) or text is None - yield DocumentType(text or "html") - return - elif node[NODE_KIND] not in (KIND_ELEMENT, KIND_FRAGMENT): - raise ValueError(f"Invalid node kind: {node[NODE_KIND]!r}") - attrs = _attrs(node[NODE_ATTRS], bookkeep) - children = _children(node[NODE_CHILDREN], bookkeep) - tag_or_node = _resolve_tag(node[NODE_TAG], bookkeep, attrs, children) - if isinstance(tag_or_node, str): - if tag_or_node == "": - # Fragment - yield Fragment(children=children) - else: - yield Element(tag=tag_or_node, attrs=attrs, children=children) - elif isinstance(tag_or_node, Fragment): - yield from tag_or_node.children - else: - yield tag_or_node - - -def _node_from_tuple(node: NodeTuple, bookkeep: dict[str, Interpolation]) -> Node: - nodes = list(_node_or_nodes_from_tuple(node, bookkeep)) - print("HERE ARE THE NODES: ", nodes) - if len(nodes) == 1: - return nodes[0] - else: - return Fragment(children=tuple(nodes)) - +from .utils import format_interpolation # -------------------------------------------------------------------------- -# Safe HTML support +# Instrumentation, Parsing, and Caching # -------------------------------------------------------------------------- - -class SafeHTML: - """A wrapper to mark a string as safe for direct inclusion in HTML.""" - - def __init__(self, content: str): - self.content = content - - def __html__(self) -> str: - return self.content - - def __str__(self) -> str: - return self.content - - def __repr__(self) -> str: - return f"raw({self.content!r})" - - -# TODO: so much to do here, to handle different types of interpolations -# and their contexts. Also, to cache parsed templates. - - _PLACEHOLDER_PREFIX = f"t🐍-{''.join(random.choices(string.ascii_lowercase, k=4))}-" _PP_LEN = len(_PLACEHOLDER_PREFIX) @@ -261,6 +43,9 @@ def _instrument(strings: t.Sequence[str]) -> str: """ count = len(strings) + # TODO: special case callables() so that we use the same placeholder + # to open *and* close tags. + def _placeholder_or_final(i: int, s: str) -> str: """Return the string with a placeholder if not the last one.""" # There are always count-1 placeholders between count strings. @@ -271,13 +56,73 @@ def _placeholder_or_final(i: int, s: str) -> str: @lru_cache() def _instrument_and_parse(strings: tuple[str, ...]) -> Node: + """ + Instrument the strings and parse the resulting HTML. + + The result is cached to avoid re-parsing the same template multiple times. + """ instrumented = _instrument(strings) return parse_html(instrumented) +# -------------------------------------------------------------------------- +# Placeholder Substitution +# -------------------------------------------------------------------------- + + +def _substitute_attrs( + attrs: dict[str, str | None], interpolations: tuple[Interpolation, ...] +) -> dict[str, str | None]: + new_attrs: dict[str, str | None] = {} + for key, value in attrs.items(): + if key.startswith(_PLACEHOLDER_PREFIX): + index = _placholder_index(key) + interpolation = interpolations[index] + value = format_interpolation(interpolation) + if not isinstance(value, str): + raise ValueError( + f"Attribute interpolation must be a string, got: {value!r}" + ) + new_attrs[key] = value + else: + new_attrs[key] = value + return new_attrs + + +def _substitute_node(p_node: Node, interpolations: tuple[Interpolation, ...]) -> Node: + match p_node: + case Text(text) if str(text).startswith(_PLACEHOLDER_PREFIX): + index = _placholder_index(str(text)) + interpolation = interpolations[index] + value = format_interpolation(interpolation) + match value: + case str(): + return Text(value) + case Node(): + return value + case Template(): + return html(value) + case _: + raise ValueError(f"Invalid interpolation value: {value!r}") + case Element(tag=tag, attrs=attrs, children=children): + new_attrs = _substitute_attrs(attrs, interpolations) + new_children = [_substitute_node(c, interpolations) for c in children] + return Element(tag=tag, attrs=new_attrs, children=new_children) + case Fragment(children=children): + new_children = [_substitute_node(c, interpolations) for c in children] + return Fragment(children=new_children) + case _: + return p_node + + +# -------------------------------------------------------------------------- +# Public API +# -------------------------------------------------------------------------- + + def html(template: Template) -> Node: """Create an HTML element from a string.""" # Parse the HTML, returning a tree of nodes with placeholders # where interpolations go. - placeholder_node = _instrument_and_parse(template.strings) - return _substitute_interpolations(placeholder_node, template.interpolations) + p_node = _instrument_and_parse(template.strings) + return _substitute_node(p_node, template.interpolations) diff --git a/html_tstring/processor_test.py b/html_tstring/processor_test.py index 58d5e7e..7608e08 100644 --- a/html_tstring/processor_test.py +++ b/html_tstring/processor_test.py @@ -1,9 +1,10 @@ from string.templatelib import Template import pytest +from markupsafe import Markup from .nodes import Element, Fragment, Text -from .processor import SafeHTML, html +from .processor import html # -------------------------------------------------------------------------- # Basic HTML parsing tests @@ -38,20 +39,25 @@ def test_parse_chain_of_void_elements(): # Make sure our handling of CPython issue #69445 is reasonable. node = html(t"



    ") assert node == Fragment( - children=( + children=[ Element("br"), Element("hr"), Element("img", attrs={"src": "image.png"}), Element("br"), Element("hr"), - ), + ], ) assert str(node) == '



    ' def test_parse_element_with_text(): node = html(t"

    Hello, world!

    ") - assert node == Element("p", children=(Text("Hello, world!"),)) + assert node == Element( + "p", + children=[ + Text("Hello, world!"), + ], + ) assert str(node) == "

    Hello, world!

    " @@ -60,7 +66,9 @@ def test_parse_element_with_attributes(): assert node == Element( "a", attrs={"href": "https://example.com", "target": "_blank"}, - children=(Text("Link"),), + children=[ + Text("Link"), + ], ) assert str(node) == 'Link' @@ -69,10 +77,10 @@ def test_parse_nested_elements(): node = html(t"

    Hello

    World

    ") assert node == Element( "div", - children=( - Element("p", children=(Text("Hello"),)), - Element("p", children=(Text("World"),)), - ), + children=[ + Element("p", children=[Text("Hello")]), + Element("p", children=[Text("World")]), + ], ) assert str(node) == "

    Hello

    World

    " @@ -85,7 +93,7 @@ def test_parse_nested_elements(): def text_interpolated_text_content(): name = "Alice" node = html(t"

    Hello, {name}!

    ") - assert node == Element("p", children=(Text("Hello, "), Text("Alice"), Text("!"))) + assert node == Element("p", children=[Text("Hello, "), Text("Alice"), Text("!")]) assert str(node) == "

    Hello, Alice!

    " @@ -93,7 +101,7 @@ def test_escaping_of_interpolated_text_content(): name = "" node = html(t"

    Hello, {name}!

    ") assert node == Element( - "p", children=(Text("Hello, "), Text(""), Text("!")) + "p", children=[Text("Hello, "), Text(""), Text("!")] ) assert str(node) == "

    Hello, <Alice & Bob>!

    " @@ -113,11 +121,11 @@ def test_conversions(): node = html(t"
  • {c!s}
  • {c!r}
  • {'😊'!a}
  • ") assert node == Element( "", - children=( - Element("li", children=(Text("string"),)), - Element("li", children=(Text("repr"),)), - Element("li", children=(Text("'\\U0001f60a'"),)), - ), + children=[ + Element("li", children=[Text("string")]), + Element("li", children=[Text("repr")]), + Element("li", children=[Text("'\\U0001f60a'")]), + ], ) @@ -127,10 +135,10 @@ def test_conversions(): def test_raw_html_injection_with_helper(): - raw_content = SafeHTML("I am bold") + raw_content = Markup("I am bold") node = html(t"
    {raw_content}
    ") assert node == Element( - "div", children=(Element("strong", children=(Text("I am bold"),)),) + "div", children=[Element("strong", children=[Text("I am bold")])] ) assert str(node) == "
    I am bold
    " @@ -147,7 +155,7 @@ def __html__(self): content = SafeContent("emphasized") node = html(t"

    Here is some {content}.

    ") assert node == Element( - "p", children=(Text("Here is some "), Text(content), Text(".")) + "p", children=[Text("Here is some "), Text(content), Text(".")] ) assert str(node) == "

    Here is some emphasized.

    " @@ -158,11 +166,11 @@ def test_raw_html_injection_with_format_spec(): # TODO XXX: this is wrong; raw_content should be wrapped in Text assert node == Element( "p", - children=( + children=[ Text("This is "), - Element("u", children=(Text("underlined"),)), + Element("u", children=[Text("underlined")]), Text(" text."), - ), + ], ) assert str(node) == "

    This is underlined text.

    " @@ -179,7 +187,7 @@ def test_conditional_rendering_with_if_else(): node = html(t"
    {user_profile if is_logged_in else login_prompt}
    ") assert node == Element( - "div", children=(Element("span", children=(Text("Welcome, User!"),)),) + "div", children=[Element("span", children=[Text("Welcome, User!")])] ) assert str(node) == "
    Welcome, User!
    " @@ -195,9 +203,9 @@ def test_conditional_rendering_with_and(): assert node == Element( "main", - children=( - Element("div", attrs={"class": "warning"}, children=(Text("Warning!"),)), - ), + children=[ + Element("div", attrs={"class": "warning"}, children=[Text("Warning!")]), + ], ) assert str(node) == '
    Warning!
    ' @@ -215,14 +223,14 @@ def test_conditional_rendering_with_and(): def test_interpolated_template_content(): child = t"Child" node = html(t"
    {child}
    ") - assert node == Element("div", children=(html(child),)) + assert node == Element("div", children=[html(child)]) assert str(node) == "
    Child
    " def test_interpolated_element_content(): child = html(t"Child") node = html(t"
    {child}
    ") - assert node == Element("div", children=(child,)) + assert node == Element("div", children=[child]) assert str(node) == "
    Child
    " @@ -230,7 +238,7 @@ def test_interpolated_nonstring_content(): number = 42 node = html(t"

    The answer is {number}.

    ") assert node == Element( - "p", children=(Text("The answer is "), Text("42"), Text(".")) + "p", children=[Text("The answer is "), Text("42"), Text(".")] ) assert str(node) == "

    The answer is 42.

    " @@ -240,11 +248,11 @@ def test_list_items(): node = html(t"
      {[t'
    • {item}
    • ' for item in items]}
    ") assert node == Element( "ul", - children=( - Element("li", children=(Text("Apple"),)), - Element("li", children=(Text("Banana"),)), - Element("li", children=(Text("Cherry"),)), - ), + children=[ + Element("li", children=[Text("Apple")]), + Element("li", children=[Text("Banana")]), + Element("li", children=[Text("Cherry")]), + ], ) assert str(node) == "
    • Apple
    • Banana
    • Cherry
    " @@ -258,36 +266,36 @@ def test_nested_list_items(): node = html(t"
      {outer_items}
    ") assert node == Element( "ul", - children=( + children=[ Element( "li", - children=( + children=[ Text("fruit"), Element( "ul", - children=( - Element("li", children=(Text("apple"),)), - Element("li", children=(Text("banana"),)), - Element("li", children=(Text("cherry"),)), - ), + children=[ + Element("li", children=[Text("apple")]), + Element("li", children=[Text("banana")]), + Element("li", children=[Text("cherry")]), + ], ), - ), + ], ), Element( "li", - children=( + children=[ Text("more fruit"), Element( "ul", - children=( - Element("li", children=(Text("apple"),)), - Element("li", children=(Text("banana"),)), - Element("li", children=(Text("cherry"),)), - ), + children=[ + Element("li", children=[Text("apple")]), + Element("li", children=[Text("banana")]), + Element("li", children=[Text("cherry")]), + ], ), - ), + ], ), - ), + ], ) assert ( str(node) @@ -304,7 +312,7 @@ def test_interpolated_attribute_value(): url = "https://example.com/" node = html(t'Link') assert node == Element( - "a", attrs={"href": "https://example.com/"}, children=(Text("Link"),) + "a", attrs={"href": "https://example.com/"}, children=[Text("Link")] ) assert str(node) == 'Link' @@ -315,7 +323,7 @@ def test_escaping_of_interpolated_attribute_value(): assert node == Element( "a", attrs={"href": 'https://example.com/?q="test"&lang=en'}, - children=(Text("Link"),), + children=[Text("Link")], ) assert ( str(node) @@ -326,7 +334,7 @@ def test_escaping_of_interpolated_attribute_value(): def test_interpolated_unquoted_attribute_value(): id = "roquefort" node = html(t"
    Cheese
    ") - assert node == Element("div", attrs={"id": "roquefort"}, children=(Text("Cheese"),)) + assert node == Element("div", attrs={"id": "roquefort"}, children=[Text("Cheese")]) assert str(node) == '
    Cheese
    ' @@ -334,7 +342,7 @@ def test_interpolated_attribute_value_true(): disabled = True node = html(t"") assert node == Element( - "button", attrs={"disabled": None}, children=(Text("Click me"),) + "button", attrs={"disabled": None}, children=[Text("Click me")] ) assert str(node) == "" @@ -343,7 +351,7 @@ def test_interpolated_attribute_value_falsy(): disabled = False crumpled = None node = html(t"") - assert node == Element("button", attrs={}, children=(Text("Click me"),)) + assert node == Element("button", attrs={}, children=[Text("Click me")]) assert str(node) == "" @@ -353,7 +361,7 @@ def test_interpolated_attribute_spread_dict(): assert node == Element( "a", attrs={"href": "https://example.com/", "target": "_blank"}, - children=(Text("Link"),), + children=[Text("Link")], ) assert str(node) == 'Link' @@ -365,7 +373,7 @@ def test_interpolated_mixed_attribute_values_and_spread_dict(): assert node == Element( "a", attrs={"href": "https://example.com/", "id": "link1", "target": "_blank"}, - children=(Text("Link"),), + children=[Text("Link")], ) assert ( str(node) @@ -380,7 +388,7 @@ def test_multiple_attribute_spread_dicts(): assert node == Element( "a", attrs={"href": "https://example.com/", "id": "link1", "target": "_blank"}, - children=(Text("Link"),), + children=[Text("Link")], ) assert ( str(node) @@ -394,7 +402,7 @@ def test_interpolated_class_attribute(): assert node == Element( "button", attrs={"class": "btn btn-primary active"}, - children=(Text("Click me"),), + children=[Text("Click me")], ) assert str(node) == '' @@ -405,7 +413,7 @@ def test_interpolated_attribute_spread_with_class_attribute(): assert node == Element( "button", attrs={"id": "button1", "class": "btn btn-primary"}, - children=(Text("Click me"),), + children=[Text("Click me")], ) assert str(node) == '' @@ -416,7 +424,7 @@ def test_interpolated_data_attributes(): assert node == Element( "div", attrs={"data-user-id": "123", "data-role": "admin"}, - children=(Text("User Info"),), + children=[Text("User Info")], ) assert str(node) == '
    User Info
    ' @@ -427,7 +435,7 @@ def test_interpolated_aria_attributes(): assert node == Element( "button", attrs={"aria-label": "Close", "aria-hidden": "True"}, - children=(Text("X"),), + children=[Text("X")], ) assert str(node) == '' @@ -438,7 +446,7 @@ def test_interpolated_style_attribute(): assert node == Element( "p", attrs={"style": "color: red; font-weight: bold; font-size: 16px"}, - children=(Text("Warning!"),), + children=[Text("Warning!")], ) assert ( str(node) @@ -474,7 +482,7 @@ def test_interpolated_template_component(): "data-second": "99", "class": "my-comp", }, - children=(Text("Component: "), Text("Hello, Component!")), + children=[Text("Component: "), Text("Hello, Component!")], ) assert ( str(node) @@ -497,14 +505,14 @@ def test_fragment_from_component(): node = html(t"<{ColumnsComponent} />
    ") assert node == Element( "table", - children=( + children=[ Element( "tr", - children=( - Element("td", children=(Text("Column 1"),)), - Element("td", children=(Text("Column 2"),)), - ), + children=[ + Element("td", children=[Text("Column 1")]), + Element("td", children=[Text("Column 2")]), + ], ), - ), + ], ) assert str(node) == "
    Column 1Column 2
    " From 536a4981f13d65741cdd9a99de41e870ab5808ad Mon Sep 17 00:00:00 2001 From: Dave Date: Fri, 5 Sep 2025 16:43:41 -0700 Subject: [PATCH 7/7] This is starting to feel coherent. --- html_tstring/nodes.py | 11 +++-- html_tstring/parser.py | 15 ++++++ html_tstring/parser_test.py | 22 ++++++++- html_tstring/processor.py | 87 +++++++++++++++++++++++++++------- html_tstring/processor_test.py | 10 ++-- 5 files changed, 114 insertions(+), 31 deletions(-) diff --git a/html_tstring/nodes.py b/html_tstring/nodes.py index 52502e2..b0bd227 100644 --- a/html_tstring/nodes.py +++ b/html_tstring/nodes.py @@ -32,6 +32,7 @@ # TODO: consider how significant whitespace is handled from t-string to nodes +@t.runtime_checkable class HasHTMLDunder(t.Protocol): def __html__(self) -> str: ... @@ -55,15 +56,15 @@ class Text(Node): @cached_property def _cached_str(self) -> str: - if hasattr(self.text, "__html__"): - return t.cast(HasHTMLDunder, self.text).__html__() + if isinstance(self.text, HasHTMLDunder): + return self.text.__html__() return escape(t.cast(str, self.text), quote=False) def _as_unescaped(self) -> str: """Return the text as-is, without escaping. For internal use only.""" - if hasattr(self.text, "__html__"): - return t.cast(HasHTMLDunder, self.text).__html__() - return t.cast(str, self.text) + if isinstance(self.text, HasHTMLDunder): + return self.text.__html__() + return self.text def __str__(self) -> str: return self._cached_str diff --git a/html_tstring/parser.py b/html_tstring/parser.py index a57400a..e9f1719 100644 --- a/html_tstring/parser.py +++ b/html_tstring/parser.py @@ -111,7 +111,22 @@ def get_node(self) -> Node: def parse_html(input_html: str) -> Node: + """Parse an HTML string into a Node tree.""" parser = NodeParser() parser.feed(input_html) parser.close() return parser.get_node() + + +def parse_html_iter(input_html: t.Iterable[str]) -> Node: + """ + Parse a sequence of HTML string chunks into a Node tree. + + This is particularly useful if your sequence keeps separate text nodes + that you wish to preserve intact. + """ + parser = NodeParser() + for chunk in input_html: + parser.feed(chunk) + parser.close() + return parser.get_node() diff --git a/html_tstring/parser_test.py b/html_tstring/parser_test.py index 191f0c2..f778bc2 100644 --- a/html_tstring/parser_test.py +++ b/html_tstring/parser_test.py @@ -1,7 +1,7 @@ import pytest from .nodes import Comment, DocumentType, Element, Fragment, Text -from .parser import parse_html +from .parser import parse_html, parse_html_iter def test_parse_empty(): @@ -163,3 +163,23 @@ def test_parse_mismatched_tags(): def test_parse_unclosed_tag(): with pytest.raises(ValueError): _ = parse_html("
    Unclosed") + + +def test_parse_html_iter_preserves_chunks(): + chunks = [ + "
    ", + "Hello ", + "there, ", + "world", + "!
    ", + ] + node = parse_html_iter(chunks) + assert node == Element( + "div", + children=[ + Text("Hello "), + Text("there, "), + Element("span", children=[Text("world")]), + Text("!"), + ], + ) diff --git a/html_tstring/processor.py b/html_tstring/processor.py index efc812d..26611b1 100644 --- a/html_tstring/processor.py +++ b/html_tstring/processor.py @@ -1,17 +1,35 @@ import random import string import typing as t +from collections.abc import Iterable from functools import lru_cache from string.templatelib import Interpolation, Template -from .nodes import ( - Element, - Fragment, - Node, - Text, -) -from .parser import parse_html -from .utils import format_interpolation +from markupsafe import Markup + +from .nodes import Element, Fragment, HasHTMLDunder, Node, Text +from .parser import parse_html_iter +from .utils import format_interpolation as base_format_interpolation + +# -------------------------------------------------------------------------- +# Value formatting +# -------------------------------------------------------------------------- + + +def _format_safe(value: object, format_spec: str) -> str: + assert format_spec == "safe" + return Markup(value) + + +CUSTOM_FORMATTERS = (("safe", _format_safe),) + + +def format_interpolation(interpolation: Interpolation) -> object: + return base_format_interpolation( + interpolation, + formatters=CUSTOM_FORMATTERS, + ) + # -------------------------------------------------------------------------- # Instrumentation, Parsing, and Caching @@ -31,7 +49,7 @@ def _placholder_index(s: str) -> int: return int(s[_PP_LEN:]) -def _instrument(strings: t.Sequence[str]) -> str: +def _instrument(strings: tuple[str, ...]) -> t.Iterable[str]: """ Join the strings with placeholders in between where interpolations go. @@ -46,12 +64,11 @@ def _instrument(strings: t.Sequence[str]) -> str: # TODO: special case callables() so that we use the same placeholder # to open *and* close tags. - def _placeholder_or_final(i: int, s: str) -> str: - """Return the string with a placeholder if not the last one.""" + for i, s in enumerate(strings): + yield s # There are always count-1 placeholders between count strings. - return f"{s}{_placeholder(i)}" if i < count - 1 else s - - return "".join(_placeholder_or_final(i, s) for i, s in enumerate(strings)) + if i < count - 1: + yield _placeholder(i) @lru_cache() @@ -62,7 +79,7 @@ def _instrument_and_parse(strings: tuple[str, ...]) -> Node: The result is cached to avoid re-parsing the same template multiple times. """ instrumented = _instrument(strings) - return parse_html(instrumented) + return parse_html_iter(instrumented) # -------------------------------------------------------------------------- @@ -89,6 +106,33 @@ def _substitute_attrs( return new_attrs +def _substitute_and_flatten_children( + children: t.Iterable[Node], interpolations: tuple[Interpolation, ...] +) -> list[Node]: + """Substitute placeholders in a list of children and flatten any fragments.""" + new_children: list[Node] = [] + for child in children: + substituted = _substitute_node(child, interpolations) + if isinstance(substituted, Fragment): + # This can happen if an interpolation results in a Fragment, for + # instance if it is iterable. + new_children.extend(substituted.children) + else: + new_children.append(substituted) + return new_children + + +def _node_from_value(value: object) -> Node: + """Convert a value to a Node, if possible.""" + # This is a bit of a hack, but it lets us handle Markup and + # other objects that implement __html__ without special-casing them here. + # We use a Text node to wrap the value, then parse it back out. + # This is not the most efficient, but it is simple and works. + node = Text(_placeholder(0)) + interpolations = (Interpolation(value, "", None, ""),) + return _substitute_node(node, interpolations) + + def _substitute_node(p_node: Node, interpolations: tuple[Interpolation, ...]) -> Node: match p_node: case Text(text) if str(text).startswith(_PLACEHOLDER_PREFIX): @@ -102,14 +146,21 @@ def _substitute_node(p_node: Node, interpolations: tuple[Interpolation, ...]) -> return value case Template(): return html(value) + case HasHTMLDunder(): + return Text(value) + case False: + return Text("") + case Iterable(): + children = [_node_from_value(v) for v in value] + return Fragment(children=children) case _: - raise ValueError(f"Invalid interpolation value: {value!r}") + return Text(str(value)) case Element(tag=tag, attrs=attrs, children=children): new_attrs = _substitute_attrs(attrs, interpolations) - new_children = [_substitute_node(c, interpolations) for c in children] + new_children = _substitute_and_flatten_children(children, interpolations) return Element(tag=tag, attrs=new_attrs, children=new_children) case Fragment(children=children): - new_children = [_substitute_node(c, interpolations) for c in children] + new_children = _substitute_and_flatten_children(children, interpolations) return Fragment(children=new_children) case _: return p_node diff --git a/html_tstring/processor_test.py b/html_tstring/processor_test.py index 7608e08..5ea7e75 100644 --- a/html_tstring/processor_test.py +++ b/html_tstring/processor_test.py @@ -119,8 +119,7 @@ def test_conversions(): assert f"{c!s}" == "string" assert f"{c!r}" == "repr" node = html(t"
  • {c!s}
  • {c!r}
  • {'😊'!a}
  • ") - assert node == Element( - "", + assert node == Fragment( children=[ Element("li", children=[Text("string")]), Element("li", children=[Text("repr")]), @@ -137,9 +136,7 @@ def test_conversions(): def test_raw_html_injection_with_helper(): raw_content = Markup("I am bold") node = html(t"
    {raw_content}
    ") - assert node == Element( - "div", children=[Element("strong", children=[Text("I am bold")])] - ) + assert node == Element("div", children=[Text(text=raw_content)]) assert str(node) == "
    I am bold
    " @@ -163,12 +160,11 @@ def __html__(self): def test_raw_html_injection_with_format_spec(): raw_content = "underlined" node = html(t"

    This is {raw_content:safe} text.

    ") - # TODO XXX: this is wrong; raw_content should be wrapped in Text assert node == Element( "p", children=[ Text("This is "), - Element("u", children=[Text("underlined")]), + Text(Markup(raw_content)), Text(" text."), ], )