Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 43 additions & 32 deletions Sources/DocCHTML/MarkdownRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -675,58 +675,69 @@ package struct MarkdownRenderer<Provider: LinkProvider> {
// - An empty element like `<br />` or `<hr />` that's complete on its own.
// - An element with children like `<span style="color: red;">Something</span>` that needs to be created out of multiple markup elements.
//
// FIXME: See if this can be extracted into 2 private functions to make the code easier to read.
// Because it may take multiple markdown elements to create an HTML element, we pop elements rather than iterating
var elements = Array(container)
outer: while !elements.isEmpty {
let element = elements.removeFirst()

guard let start = element as? InlineHTML else {
var remainder = Array(container)[...]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you're using a slice here, I imagine this is quite efficient, even though we are mutating remainder.

while let element = remainder.popFirst() {
guard let openingHTML = element as? InlineHTML else {
// If the markup _isn't_ inline HTML we can simply visit it to transform it.
children.append(visit(element))
continue
}

// Otherwise, we need to determine how long this markdown element it.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Otherwise, we need to determine how long this markdown element it.
// Otherwise, we need to determine how long this markdown element is.

var rawHTML = start.rawHTML
let rawHTML = openingHTML.rawHTML
// Simply skip any HTML/XML comments.
guard !rawHTML.hasPrefix("<!--") else {
// If it's a basic link, simply skip it.
continue
}

// Next, check if its empty element (for example `<br />` or `<hr />`) that's complete on its own.
if let parsed = try? XMLElement(xmlString: rawHTML) {

// On non-Darwin platforms, `XMLElement(xmlString:)` sometimes crashes for certain invalid / incomplete XML strings.
// To minimize the risk of this happening, don't try to parse the XML string as an empty HTML element unless it ends with "/>"
if rawHTML.hasSuffix("/>"), let parsed = try? XMLElement(xmlString: rawHTML) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you run into some crashes while running your new tests in Linux CI?

children.append(parsed)
continue
}

// This could be an HTML element with content or it could be invalid HTML.
// Don't modify `elements` until we know that we've parsed a valid HTML element.
var copy = elements

// Gradually check a longer and longer series of markup elements to see if they form a valid HTML element.
inner: while !copy.isEmpty, let next = copy.first as? any InlineMarkup {
_ = copy.removeFirst()

// Skip any HTML/XML comments _inside_ this HTML tag
if let html = next as? InlineHTML, html.rawHTML.hasPrefix("<!--") {
continue inner
}

rawHTML += next.format()
if let parsed = try? XMLElement(xmlString: rawHTML) {
children.append(parsed) // Include the valid HTML element in the output.
elements = copy // Skip over all the elements that were used to create that HTML element.
continue outer
}
// Lastly, check if this is the start of an HTML element that needs to be constructed out of more than one markup element
else if let parsed = _findMultiMarkupHTMLElement(in: &remainder, openingRawHTML: rawHTML) {
children.append(parsed)
}
// If we reached the end of the inline elements without parsing a valid HTML element, skip that first InlineHTML markup and continue from there.
continue
}

return children
}

private func _findMultiMarkupHTMLElement(in remainder: inout ArraySlice<any Markup>, openingRawHTML: String) -> XMLNode? {
// Don't modify `remainder` until we know that we've parsed a valid HTML element.
var copy = remainder

var rawHTML = openingRawHTML
let tagName = rawHTML.dropFirst(/* the opening "<" */).prefix(while: \.isLetter)
let expectedClosingTag = "</\(tagName)>"

// Only iterate as long the markup is _inline_ markup.
while let next = copy.first as? any InlineMarkup {
_ = copy.removeFirst()
let html = next as? InlineHTML

// Skip any HTML/XML comments _inside_ this HTML tag
if let html, html.rawHTML.hasPrefix("<!--") {
continue
}

// If this wasn't a comment, accumulate more raw HTML to try and parse
rawHTML += next.format()
// On non-Darwin platforms, `XMLElement(xmlString:)` sometimes crashes for certain invalid / incomplete XML strings.
// To minimize the risk of this happening, don't try to parse the XML string as an empty HTML element unless it ends with "/>"
if html?.rawHTML == expectedClosingTag, let parsed = try? XMLElement(xmlString: rawHTML) {
remainder = copy // Skip over all the elements that were used to create that HTML element.
return parsed // Include the valid HTML element in the output.
}
}
// If we reached the end of the _inline_ markup without parsing a valid HTML element, skip just that opening markup without updating `remainder`
return nil
}

// MARK: Directives

func visit(_: BlockDirective) -> XMLNode {
Expand Down
48 changes: 48 additions & 0 deletions Tests/DocCHTMLTests/MarkdownRendererTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,54 @@ struct MarkdownRendererTests {
)
}

@Test
func testParsesAndPreservesHTMLExceptComments() {
assert(
rendering: "This is a <!-- inline comment --><strong>formatted</strong> paragraph.",
matches: "<p>This is a <strong>formatted</strong> paragraph.</p>"
)

assert(
rendering: "This<br/> is a <em><!-- multi\n line\n comment-->formatted</em>paragraph.",
matches: "<p>This<br/> is a <em>formatted</em> paragraph.</p>"
)

assert(
rendering: "This is a <span style=\"color: red\"><!-- before -->custom formatted<!-- after --></span> paragraph.",
matches: "<p>This is a <span style=\"color: red\">custom formatted</span> paragraph.</p>"
)

// This markup doesn't properly close the `<strong>` tag (it uses an `</em>` tag.
// In this case we drop both tags but not their content in between. This matches what DocC does for inline HTML with regards to the Render JSON output.
assert(
rendering: "This is a <strong>custom formatted</em> paragraph.",
matches: "<p>This is a custom formatted paragraph.</p>"
)

// Any content _within_ HTML tags in the markdown isn't parsed as markdown content.
assert(
rendering: "This is a <span>custom **not** formatted</span> paragraph.",
matches: "<p>This is a <span>custom **not** formatted</span> paragraph.</p>"
)

assert(
rendering: """
<details>
<summary>Some summary<!-- comment in summary--></summary>
<!-- comment between elements -->
<p><!-- comment before -->Some longer<!-- comment between words --> description<!-- comment after --></p>
</details>
<!-- comment after block element -->
""",
matches: """
<details>
<summary>Some summary</summary>
<p>Some longer description</p>
</details>
"""
)
}

private func assert(
rendering markdownContent: String,
elementToReturn: LinkedElement? = nil,
Expand Down