Skip to content

Commit ff52b6b

Browse files
committed
Fine tune filtering.
1 parent f2c2425 commit ff52b6b

File tree

2 files changed

+141
-1
lines changed

2 files changed

+141
-1
lines changed

src/Filter/ProfileFilter.php

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,16 @@
77
use Djot\Exception\ProfileViolationException;
88
use Djot\LinkPolicy;
99
use Djot\Node\Block\BlockNode;
10+
use Djot\Node\Block\BlockQuote;
1011
use Djot\Node\Block\Paragraph;
12+
use Djot\Node\Block\Table;
13+
use Djot\Node\Block\TableRow;
1114
use Djot\Node\Document;
15+
use Djot\Node\Inline\FootnoteRef;
16+
use Djot\Node\Inline\HardBreak;
1217
use Djot\Node\Inline\Image;
1318
use Djot\Node\Inline\Link;
19+
use Djot\Node\Inline\Symbol;
1420
use Djot\Node\Inline\Text;
1521
use Djot\Node\Node;
1622
use Djot\Profile;
@@ -185,7 +191,7 @@ protected function convertToText(Node $node, Node $parent): void
185191
// For inline nodes, just replace with text
186192
if ($node instanceof BlockNode) {
187193
$paragraph = new Paragraph();
188-
$paragraph->appendChild(new Text($textContent));
194+
$this->appendTextWithBreaks($paragraph, $textContent);
189195
$parent->replaceChildNode($node, $paragraph);
190196
} else {
191197
// Inline node - replace with text
@@ -194,6 +200,25 @@ protected function convertToText(Node $node, Node $parent): void
194200
}
195201
}
196202

203+
/**
204+
* Append text content to a node, converting newlines to HardBreak nodes
205+
*/
206+
protected function appendTextWithBreaks(Node $parent, string $content): void
207+
{
208+
$lines = explode("\n", $content);
209+
$lastIndex = count($lines) - 1;
210+
211+
foreach ($lines as $index => $line) {
212+
if ($line !== '') {
213+
$parent->appendChild(new Text($line));
214+
}
215+
// Add line break between lines (not after the last line)
216+
if ($index < $lastIndex) {
217+
$parent->appendChild(new HardBreak());
218+
}
219+
}
220+
}
221+
197222
/**
198223
* Remove empty container nodes (list items, paragraphs with no content, empty lists)
199224
*/
@@ -264,6 +289,45 @@ protected function extractTextContent(Node $node): string
264289
return $text;
265290
}
266291

292+
// Special handling for tables - preserve row structure
293+
if ($node instanceof Table) {
294+
$rows = [];
295+
foreach ($node->getChildren() as $row) {
296+
if ($row instanceof TableRow) {
297+
$cells = [];
298+
foreach ($row->getChildren() as $cell) {
299+
$cells[] = $this->extractTextContent($cell);
300+
}
301+
$rows[] = implode(' ', $cells);
302+
}
303+
}
304+
305+
return implode("\n", $rows);
306+
}
307+
308+
// Special handling for blockquotes - preserve paragraph structure
309+
if ($node instanceof BlockQuote) {
310+
$paragraphs = [];
311+
foreach ($node->getChildren() as $child) {
312+
$text = $this->extractTextContent($child);
313+
if ($text !== '') {
314+
$paragraphs[] = $text;
315+
}
316+
}
317+
318+
return implode("\n", $paragraphs);
319+
}
320+
321+
// Special handling for symbols - use the symbol name
322+
if ($node instanceof Symbol) {
323+
return ':' . $node->getName() . ':';
324+
}
325+
326+
// Special handling for footnote references - use the label
327+
if ($node instanceof FootnoteRef) {
328+
return '[^' . $node->getLabel() . ']';
329+
}
330+
267331
if ($node instanceof Text) {
268332
return $node->getContent();
269333
}

tests/TestCase/ProfileTest.php

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,30 @@ public function testCodeBlockContentPreservedAsText(): void
802802
$this->assertStringContainsString('code block content', $html);
803803
}
804804

805+
public function testTableConvertedToStructuredText(): void
806+
{
807+
$profile = Profile::comment();
808+
$converter = new DjotConverter(profile: $profile);
809+
810+
$djot = <<<'DJOT'
811+
| Name | Type |
812+
|------|--------|
813+
| Djot | Markup |
814+
| PHP | Code |
815+
DJOT;
816+
817+
$html = $converter->convert($djot);
818+
819+
// Table tag should not appear
820+
$this->assertStringNotContainsString('<table>', $html);
821+
// Content should be preserved with row structure
822+
$this->assertStringContainsString('Name Type', $html);
823+
$this->assertStringContainsString('Djot Markup', $html);
824+
$this->assertStringContainsString('PHP Code', $html);
825+
// Rows should be on separate lines (converted to <br> tags)
826+
$this->assertStringContainsString('Name Type<br>', $html);
827+
}
828+
805829
// ==================== Whitespace Preservation Tests ====================
806830

807831
public function testFilteredBlocksDoNotRunTogether(): void
@@ -860,4 +884,56 @@ public function testNestedBlockContentPreservesWhitespace(): void
860884
// Should not run together
861885
$this->assertStringNotContainsString('First lineSecond line', $html);
862886
}
887+
888+
public function testSymbolConvertedToTextRepresentation(): void
889+
{
890+
$profile = Profile::minimal();
891+
$converter = new DjotConverter(profile: $profile);
892+
893+
$html = $converter->convert('I :heart: this!');
894+
895+
// Symbol tag should not appear
896+
$this->assertStringNotContainsString('<symbol', $html);
897+
// Symbol name should be preserved with colons
898+
$this->assertStringContainsString(':heart:', $html);
899+
}
900+
901+
public function testFootnoteRefConvertedToTextRepresentation(): void
902+
{
903+
$profile = Profile::minimal();
904+
$converter = new DjotConverter(profile: $profile);
905+
906+
$djot = <<<'DJOT'
907+
This has a footnote[^1].
908+
909+
[^1]: The footnote content.
910+
DJOT;
911+
912+
$html = $converter->convert($djot);
913+
914+
// Footnote elements should not appear
915+
$this->assertStringNotContainsString('<a href="#fn', $html);
916+
// Footnote reference should be converted to text
917+
$this->assertStringContainsString('[^1]', $html);
918+
}
919+
920+
public function testBlockquoteWithMultipleParagraphsPreservesLineBreaks(): void
921+
{
922+
$profile = Profile::minimal();
923+
$converter = new DjotConverter(profile: $profile);
924+
925+
$djot = <<<'DJOT'
926+
> First paragraph.
927+
>
928+
> Second paragraph.
929+
DJOT;
930+
931+
$html = $converter->convert($djot);
932+
933+
$this->assertStringNotContainsString('<blockquote>', $html);
934+
$this->assertStringContainsString('First paragraph.', $html);
935+
$this->assertStringContainsString('Second paragraph.', $html);
936+
// Paragraphs should be separated by line breaks
937+
$this->assertStringContainsString('<br>', $html);
938+
}
863939
}

0 commit comments

Comments
 (0)