77use Djot \Exception \ProfileViolationException ;
88use Djot \LinkPolicy ;
99use Djot \Node \Block \BlockNode ;
10+ use Djot \Node \Block \BlockQuote ;
1011use Djot \Node \Block \Paragraph ;
12+ use Djot \Node \Block \Table ;
13+ use Djot \Node \Block \TableRow ;
1114use Djot \Node \Document ;
15+ use Djot \Node \Inline \FootnoteRef ;
16+ use Djot \Node \Inline \HardBreak ;
1217use Djot \Node \Inline \Image ;
1318use Djot \Node \Inline \Link ;
19+ use Djot \Node \Inline \Symbol ;
1420use Djot \Node \Inline \Text ;
1521use Djot \Node \Node ;
1622use Djot \Profile ;
@@ -185,7 +191,7 @@ protected function convertToText(Node $node, Node $parent): void
185191 // For inline nodes, just replace with text
186192 if ($ node instanceof BlockNode) {
187193 $ paragraph = new Paragraph ();
188- $ paragraph -> appendChild ( new Text ( $ textContent) );
194+ $ this -> appendTextWithBreaks ( $ paragraph , $ textContent );
189195 $ parent ->replaceChildNode ($ node , $ paragraph );
190196 } else {
191197 // Inline node - replace with text
@@ -194,6 +200,25 @@ protected function convertToText(Node $node, Node $parent): void
194200 }
195201 }
196202
203+ /**
204+ * Append text content to a node, converting newlines to HardBreak nodes
205+ */
206+ protected function appendTextWithBreaks (Node $ parent , string $ content ): void
207+ {
208+ $ lines = explode ("\n" , $ content );
209+ $ lastIndex = count ($ lines ) - 1 ;
210+
211+ foreach ($ lines as $ index => $ line ) {
212+ if ($ line !== '' ) {
213+ $ parent ->appendChild (new Text ($ line ));
214+ }
215+ // Add line break between lines (not after the last line)
216+ if ($ index < $ lastIndex ) {
217+ $ parent ->appendChild (new HardBreak ());
218+ }
219+ }
220+ }
221+
197222 /**
198223 * Remove empty container nodes (list items, paragraphs with no content, empty lists)
199224 */
@@ -264,6 +289,45 @@ protected function extractTextContent(Node $node): string
264289 return $ text ;
265290 }
266291
292+ // Special handling for tables - preserve row structure
293+ if ($ node instanceof Table) {
294+ $ rows = [];
295+ foreach ($ node ->getChildren () as $ row ) {
296+ if ($ row instanceof TableRow) {
297+ $ cells = [];
298+ foreach ($ row ->getChildren () as $ cell ) {
299+ $ cells [] = $ this ->extractTextContent ($ cell );
300+ }
301+ $ rows [] = implode (' ' , $ cells );
302+ }
303+ }
304+
305+ return implode ("\n" , $ rows );
306+ }
307+
308+ // Special handling for blockquotes - preserve paragraph structure
309+ if ($ node instanceof BlockQuote) {
310+ $ paragraphs = [];
311+ foreach ($ node ->getChildren () as $ child ) {
312+ $ text = $ this ->extractTextContent ($ child );
313+ if ($ text !== '' ) {
314+ $ paragraphs [] = $ text ;
315+ }
316+ }
317+
318+ return implode ("\n" , $ paragraphs );
319+ }
320+
321+ // Special handling for symbols - use the symbol name
322+ if ($ node instanceof Symbol) {
323+ return ': ' . $ node ->getName () . ': ' ;
324+ }
325+
326+ // Special handling for footnote references - use the label
327+ if ($ node instanceof FootnoteRef) {
328+ return '[^ ' . $ node ->getLabel () . '] ' ;
329+ }
330+
267331 if ($ node instanceof Text) {
268332 return $ node ->getContent ();
269333 }
0 commit comments