Skip to content

Commit 8e67e36

Browse files
dereuromarkclaude
andauthored
Add MarkdownToDjot converter for source-to-source transformation (#4)
Converts common Markdown syntax to Djot equivalents: - **bold** / __bold__ → *bold* - *italic* → _italic_ - ***bold italic*** → *_bold italic_* - ~~strikethrough~~ → {-strikethrough-} - ==highlight== → {=highlight=} - ^superscript^ → {^superscript^} - ~subscript~ → {~subscript~} Preserves code blocks/spans and existing Djot braced syntax. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude <noreply@anthropic.com>
1 parent 2e58bc3 commit 8e67e36

File tree

2 files changed

+477
-0
lines changed

2 files changed

+477
-0
lines changed

src/Converter/MarkdownToDjot.php

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Djot\Converter;
6+
7+
use RuntimeException;
8+
9+
/**
10+
* Converts Markdown syntax to Djot syntax
11+
*
12+
* This performs a source-to-source transformation, not parsing.
13+
* It handles common Markdown patterns and converts them to their Djot equivalents.
14+
*/
15+
class MarkdownToDjot
16+
{
17+
/**
18+
* Convert Markdown text to Djot text
19+
*/
20+
public function convert(string $markdown): string
21+
{
22+
$lines = explode("\n", $markdown);
23+
$result = [];
24+
$inCodeBlock = false;
25+
$codeFence = '';
26+
27+
foreach ($lines as $line) {
28+
// Track code blocks to avoid converting inside them
29+
if (!$inCodeBlock && preg_match('/^(`{3,}|~{3,})/', $line, $matches)) {
30+
$inCodeBlock = true;
31+
$codeFence = $matches[1][0]; // First char of fence
32+
$result[] = $line;
33+
34+
continue;
35+
}
36+
37+
if ($inCodeBlock) {
38+
// Check for closing fence
39+
if (preg_match('/^(' . $codeFence . '{3,})\s*$/', $line)) {
40+
$inCodeBlock = false;
41+
$codeFence = '';
42+
}
43+
$result[] = $line;
44+
45+
continue;
46+
}
47+
48+
// Convert inline formatting
49+
$line = $this->convertInlineFormatting($line);
50+
$result[] = $line;
51+
}
52+
53+
return implode("\n", $result);
54+
}
55+
56+
/**
57+
* Convert inline Markdown formatting to Djot
58+
*/
59+
protected function convertInlineFormatting(string $line): string
60+
{
61+
// Protect inline code spans from conversion
62+
$protected = [];
63+
$line = preg_replace_callback('/`[^`]+`/', function ($match) use (&$protected) {
64+
$placeholder = "\x00PROTECTED" . count($protected) . "\x00";
65+
$protected[$placeholder] = $match[0];
66+
67+
return $placeholder;
68+
}, $line) ?? $line;
69+
70+
// Protect existing Djot syntax from double-conversion
71+
// Protect {-text-}, {=text=}, {^text^}, {~text~}
72+
$line = preg_replace_callback('/\{[-=^~][^}]+[-=^~]\}/', function ($match) use (&$protected) {
73+
$placeholder = "\x00PROTECTED" . count($protected) . "\x00";
74+
$protected[$placeholder] = $match[0];
75+
76+
return $placeholder;
77+
}, $line) ?? $line;
78+
79+
// Use placeholder to prevent re-matching
80+
$strongPlaceholders = [];
81+
82+
// Convert ___bold italic___ to *_bold italic_* (Djot)
83+
$line = preg_replace_callback('/___(.+?)___/', function ($match) use (&$strongPlaceholders) {
84+
$placeholder = "\x00STRONG" . count($strongPlaceholders) . "\x00";
85+
$strongPlaceholders[$placeholder] = '*_' . $match[1] . '_*';
86+
87+
return $placeholder;
88+
}, $line) ?? $line;
89+
90+
// Convert ***bold italic*** to *_bold italic_* (Djot)
91+
// Match 3+ asterisks to avoid partial matches
92+
$line = preg_replace_callback('/(\*{3,})(.+?)(\*{3,})/', function ($match) use (&$strongPlaceholders) {
93+
$placeholder = "\x00STRONG" . count($strongPlaceholders) . "\x00";
94+
$strongPlaceholders[$placeholder] = '*_' . $match[2] . '_*';
95+
96+
return $placeholder;
97+
}, $line) ?? $line;
98+
99+
// Convert **bold with nested content** to *bold* (Djot strong)
100+
$line = preg_replace_callback('/\*\*(.+?)\*\*/', function ($match) use (&$strongPlaceholders) {
101+
$placeholder = "\x00STRONG" . count($strongPlaceholders) . "\x00";
102+
// Recursively convert any *italic* inside to _italic_
103+
$inner = preg_replace('/(?<!\*)\*([^*]+)\*(?!\*)/', '_$1_', $match[1]) ?? $match[1];
104+
$strongPlaceholders[$placeholder] = '*' . $inner . '*';
105+
106+
return $placeholder;
107+
}, $line) ?? $line;
108+
109+
// Convert __bold__ to *bold* (Djot strong)
110+
$line = preg_replace_callback('/__(.+?)__/', function ($match) use (&$strongPlaceholders) {
111+
$placeholder = "\x00STRONG" . count($strongPlaceholders) . "\x00";
112+
$strongPlaceholders[$placeholder] = '*' . $match[1] . '*';
113+
114+
return $placeholder;
115+
}, $line) ?? $line;
116+
117+
// Convert *italic* to _italic_ (Djot emphasis)
118+
// Only match single asterisks not preceded/followed by asterisks
119+
// Skip if it looks like already-Djot *strong* (single word without spaces surrounded by single *)
120+
$line = preg_replace_callback('/(?<!\*)\*([^*]+)\*(?!\*)/', function ($match) {
121+
// If this looks like Djot strong (content has no internal formatting markers), leave it
122+
// This is a heuristic - can't be perfect without full parsing
123+
return '_' . $match[1] . '_';
124+
}, $line) ?? $line;
125+
126+
// Convert ~~strikethrough~~ to {-strikethrough-} (Djot delete)
127+
$line = preg_replace('/~~([^~]+)~~/', '{-$1-}', $line) ?? $line;
128+
129+
// Convert ==highlight== to {=highlight=} (Djot highlight, GFM extension)
130+
$line = preg_replace('/==([^=]+)==/', '{=$1=}', $line) ?? $line;
131+
132+
// Convert ^superscript^ to {^superscript^} (some Markdown extensions)
133+
// Only if not already in Djot format
134+
$line = preg_replace('/(?<!\{)\^([^^]+)\^(?!\})/', '{^$1^}', $line) ?? $line;
135+
136+
// Convert ~subscript~ to {~subscript~} (some Markdown extensions)
137+
// Only single tildes, not double (strikethrough)
138+
$line = preg_replace('/(?<![~{])~([^~}]+)~(?![~}])/', '{~$1~}', $line) ?? $line;
139+
140+
// Restore strong placeholders
141+
foreach ($strongPlaceholders as $placeholder => $content) {
142+
$line = str_replace($placeholder, $content, $line);
143+
}
144+
145+
// Restore protected content
146+
foreach ($protected as $placeholder => $content) {
147+
$line = str_replace($placeholder, $content, $line);
148+
}
149+
150+
return $line;
151+
}
152+
153+
/**
154+
* Convert a Markdown file to Djot
155+
*
156+
* @throws \RuntimeException If file cannot be read
157+
*/
158+
public function convertFile(string $inputPath): string
159+
{
160+
if (!is_file($inputPath)) {
161+
throw new RuntimeException("File not found: {$inputPath}");
162+
}
163+
164+
$content = file_get_contents($inputPath);
165+
if ($content === false) {
166+
throw new RuntimeException("Failed to read file: {$inputPath}");
167+
}
168+
169+
return $this->convert($content);
170+
}
171+
172+
/**
173+
* Convert a Markdown file and save as Djot
174+
*
175+
* @throws \RuntimeException If file cannot be read or written
176+
*/
177+
public function convertFileAndSave(string $inputPath, ?string $outputPath = null): void
178+
{
179+
$djot = $this->convertFile($inputPath);
180+
181+
if ($outputPath === null) {
182+
// Replace .md extension with .djot
183+
$outputPath = preg_replace('/\.md$/i', '.djot', $inputPath) ?? $inputPath;
184+
if ($outputPath === $inputPath) {
185+
$outputPath .= '.djot';
186+
}
187+
}
188+
189+
$result = file_put_contents($outputPath, $djot);
190+
if ($result === false) {
191+
throw new RuntimeException("Failed to write file: {$outputPath}");
192+
}
193+
}
194+
}

0 commit comments

Comments
 (0)