Skip to content

Commit 29d5f62

Browse files
committed
Add EBNF parser
1 parent 3cb57fe commit 29d5f62

18 files changed

+792
-5
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace Vojtechdobes\GrammarProcessing\Grammars;
4+
5+
use Generator;
6+
use Vojtechdobes\GrammarProcessing;
7+
8+
9+
final class AlternationNodeInterpretation implements GrammarProcessing\NodeInterpretation
10+
{
11+
12+
public function interpret(GrammarProcessing\Node $node): Generator
13+
{
14+
$result = [
15+
yield $node->value[1],
16+
];
17+
18+
foreach ($node->value[3]->value as $item) {
19+
$result[] = yield $item->value[2];
20+
}
21+
22+
if (count($result) === 1) {
23+
return $result[0];
24+
}
25+
26+
return new GrammarProcessing\Vocabulary\OneOf($result);
27+
}
28+
29+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace Vojtechdobes\GrammarProcessing\Grammars;
4+
5+
use Generator;
6+
use Vojtechdobes\GrammarProcessing;
7+
8+
9+
final class ConcatenationNodeInterpretation implements GrammarProcessing\NodeInterpretation
10+
{
11+
12+
public function interpret(GrammarProcessing\Node $node): Generator
13+
{
14+
$result = [
15+
yield $node->value[1],
16+
];
17+
18+
foreach ($node->value[3]->value as $item) {
19+
$result[] = yield $item->value[2];
20+
}
21+
22+
if (count($result) === 1) {
23+
return $result[0];
24+
}
25+
26+
return new GrammarProcessing\Vocabulary\Sequence($result);
27+
}
28+
29+
}
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace Vojtechdobes\GrammarProcessing\Grammars;
4+
5+
use Vojtechdobes\GrammarProcessing;
6+
7+
8+
final class EbnfGrammarFactory
9+
{
10+
11+
public function createGrammar(): GrammarProcessing\Grammar
12+
{
13+
$lexicalSymbols = [
14+
'letter' => new GrammarProcessing\Vocabulary\Regexp('[a-zA-Z]'),
15+
'digit' => new GrammarProcessing\Vocabulary\Regexp('[0-9]'),
16+
'symbol' => new GrammarProcessing\Vocabulary\OneOf([
17+
new GrammarProcessing\Vocabulary\Literal('['),
18+
new GrammarProcessing\Vocabulary\Literal(']'),
19+
new GrammarProcessing\Vocabulary\Literal('{'),
20+
new GrammarProcessing\Vocabulary\Literal('}'),
21+
new GrammarProcessing\Vocabulary\Literal('('),
22+
new GrammarProcessing\Vocabulary\Literal(')'),
23+
new GrammarProcessing\Vocabulary\Literal('<'),
24+
new GrammarProcessing\Vocabulary\Literal('>'),
25+
new GrammarProcessing\Vocabulary\Literal("'"),
26+
new GrammarProcessing\Vocabulary\Literal('"'),
27+
new GrammarProcessing\Vocabulary\Literal('='),
28+
new GrammarProcessing\Vocabulary\Literal('|'),
29+
new GrammarProcessing\Vocabulary\Literal('.'),
30+
new GrammarProcessing\Vocabulary\Literal(','),
31+
new GrammarProcessing\Vocabulary\Literal(';'),
32+
new GrammarProcessing\Vocabulary\Literal('-'),
33+
new GrammarProcessing\Vocabulary\Literal('+'),
34+
new GrammarProcessing\Vocabulary\Literal('*'),
35+
new GrammarProcessing\Vocabulary\Literal('?'),
36+
new GrammarProcessing\Vocabulary\Literal('\\'),
37+
]),
38+
'character' => new GrammarProcessing\Vocabulary\OneOf([
39+
new GrammarProcessing\Vocabulary\Nonterminal('letter'),
40+
new GrammarProcessing\Vocabulary\Nonterminal('digit'),
41+
new GrammarProcessing\Vocabulary\Nonterminal('symbol'),
42+
new GrammarProcessing\Vocabulary\Literal('_'),
43+
new GrammarProcessing\Vocabulary\Literal(' '),
44+
]),
45+
'identifier' => new GrammarProcessing\Vocabulary\Sequence([
46+
new GrammarProcessing\Vocabulary\Nonterminal('letter'),
47+
new GrammarProcessing\Vocabulary\Repeat(
48+
new GrammarProcessing\Vocabulary\OneOf([
49+
new GrammarProcessing\Vocabulary\Nonterminal('letter'),
50+
new GrammarProcessing\Vocabulary\Nonterminal('digit'),
51+
new GrammarProcessing\Vocabulary\Literal('_'),
52+
]),
53+
0,
54+
null,
55+
),
56+
]),
57+
'terminal' => new GrammarProcessing\Vocabulary\OneOf([
58+
new GrammarProcessing\Vocabulary\Sequence([
59+
new GrammarProcessing\Vocabulary\Literal("'"),
60+
new GrammarProcessing\Vocabulary\Repeat(
61+
new GrammarProcessing\Vocabulary\Subtract(
62+
new GrammarProcessing\Vocabulary\Nonterminal('character'),
63+
new GrammarProcessing\Vocabulary\Literal("'"),
64+
),
65+
1,
66+
null,
67+
),
68+
new GrammarProcessing\Vocabulary\Literal("'"),
69+
]),
70+
new GrammarProcessing\Vocabulary\Sequence([
71+
new GrammarProcessing\Vocabulary\Literal('"'),
72+
new GrammarProcessing\Vocabulary\Repeat(
73+
new GrammarProcessing\Vocabulary\Subtract(
74+
new GrammarProcessing\Vocabulary\Nonterminal('character'),
75+
new GrammarProcessing\Vocabulary\Literal('"'),
76+
),
77+
1,
78+
null,
79+
),
80+
new GrammarProcessing\Vocabulary\Literal('"'),
81+
]),
82+
]),
83+
'S' => new GrammarProcessing\Vocabulary\Repeat(
84+
new GrammarProcessing\Vocabulary\OneOf([
85+
new GrammarProcessing\Vocabulary\Literal(' '),
86+
new GrammarProcessing\Vocabulary\Regexp('\n'),
87+
new GrammarProcessing\Vocabulary\Regexp('\t'),
88+
new GrammarProcessing\Vocabulary\Regexp('\r'),
89+
new GrammarProcessing\Vocabulary\Regexp('\f'),
90+
new GrammarProcessing\Vocabulary\Regexp('\b'),
91+
]),
92+
1,
93+
null,
94+
),
95+
];
96+
97+
$syntacticSymbols = [
98+
'opt_S' => new GrammarProcessing\Vocabulary\Repeat(
99+
new GrammarProcessing\Vocabulary\Nonterminal('S'),
100+
0,
101+
null,
102+
),
103+
'terminator' => new GrammarProcessing\Vocabulary\OneOf([
104+
new GrammarProcessing\Vocabulary\Literal(';'),
105+
new GrammarProcessing\Vocabulary\Literal('.'),
106+
]),
107+
'term' => new GrammarProcessing\Vocabulary\OneOf([
108+
new GrammarProcessing\Vocabulary\Sequence([
109+
new GrammarProcessing\Vocabulary\Literal('('),
110+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
111+
new GrammarProcessing\Vocabulary\Nonterminal('rhs'),
112+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
113+
new GrammarProcessing\Vocabulary\Literal(')'),
114+
]),
115+
new GrammarProcessing\Vocabulary\Sequence([
116+
new GrammarProcessing\Vocabulary\Literal('['),
117+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
118+
new GrammarProcessing\Vocabulary\Nonterminal('rhs'),
119+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
120+
new GrammarProcessing\Vocabulary\Literal(']'),
121+
]),
122+
new GrammarProcessing\Vocabulary\Sequence([
123+
new GrammarProcessing\Vocabulary\Literal('{'),
124+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
125+
new GrammarProcessing\Vocabulary\Nonterminal('rhs'),
126+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
127+
new GrammarProcessing\Vocabulary\Literal('}'),
128+
]),
129+
new GrammarProcessing\Vocabulary\Nonterminal('terminal'),
130+
new GrammarProcessing\Vocabulary\Nonterminal('identifier'),
131+
]),
132+
'factor' => new GrammarProcessing\Vocabulary\OneOf([
133+
new GrammarProcessing\Vocabulary\Sequence([
134+
new GrammarProcessing\Vocabulary\Nonterminal('term'),
135+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
136+
new GrammarProcessing\Vocabulary\Literal('?'),
137+
]),
138+
new GrammarProcessing\Vocabulary\Sequence([
139+
new GrammarProcessing\Vocabulary\Nonterminal('term'),
140+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
141+
new GrammarProcessing\Vocabulary\Literal('*'),
142+
]),
143+
new GrammarProcessing\Vocabulary\Sequence([
144+
new GrammarProcessing\Vocabulary\Nonterminal('term'),
145+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
146+
new GrammarProcessing\Vocabulary\Literal('+'),
147+
]),
148+
new GrammarProcessing\Vocabulary\Sequence([
149+
new GrammarProcessing\Vocabulary\Nonterminal('term'),
150+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
151+
new GrammarProcessing\Vocabulary\Literal('-'),
152+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
153+
new GrammarProcessing\Vocabulary\Nonterminal('term'),
154+
]),
155+
new GrammarProcessing\Vocabulary\Sequence([
156+
new GrammarProcessing\Vocabulary\Nonterminal('term'),
157+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
158+
]),
159+
]),
160+
'concatenation' => new GrammarProcessing\Vocabulary\Sequence([
161+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
162+
new GrammarProcessing\Vocabulary\Nonterminal('factor'),
163+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
164+
new GrammarProcessing\Vocabulary\Repeat(
165+
new GrammarProcessing\Vocabulary\Sequence([
166+
new GrammarProcessing\Vocabulary\Literal(','),
167+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
168+
new GrammarProcessing\Vocabulary\Nonterminal('factor'),
169+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
170+
]),
171+
0,
172+
null,
173+
),
174+
]),
175+
'alternation' => new GrammarProcessing\Vocabulary\Sequence([
176+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
177+
new GrammarProcessing\Vocabulary\Nonterminal('concatenation'),
178+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
179+
new GrammarProcessing\Vocabulary\Repeat(
180+
new GrammarProcessing\Vocabulary\Sequence([
181+
new GrammarProcessing\Vocabulary\Literal('|'),
182+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
183+
new GrammarProcessing\Vocabulary\Nonterminal('concatenation'),
184+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
185+
]),
186+
0,
187+
null,
188+
),
189+
]),
190+
'rhs' => new GrammarProcessing\Vocabulary\Nonterminal('alternation'),
191+
'lhs' => new GrammarProcessing\Vocabulary\Nonterminal('identifier'),
192+
'rule' => new GrammarProcessing\Vocabulary\Sequence([
193+
new GrammarProcessing\Vocabulary\Nonterminal('lhs'),
194+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
195+
new GrammarProcessing\Vocabulary\Literal('='),
196+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
197+
new GrammarProcessing\Vocabulary\Nonterminal('rhs'),
198+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
199+
new GrammarProcessing\Vocabulary\Nonterminal('terminator'),
200+
]),
201+
'grammar' => new GrammarProcessing\Vocabulary\Repeat(
202+
new GrammarProcessing\Vocabulary\Sequence([
203+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
204+
new GrammarProcessing\Vocabulary\Nonterminal('rule'),
205+
new GrammarProcessing\Vocabulary\Nonterminal('opt_S'),
206+
]),
207+
0,
208+
null,
209+
),
210+
];
211+
212+
return new GrammarProcessing\Grammar(
213+
lexicalSymbols: $lexicalSymbols,
214+
syntaxTokenSymbols: [
215+
'identifier',
216+
'terminal',
217+
'symbol',
218+
'S',
219+
],
220+
ignoredTokenSymbols: [],
221+
syntacticSymbols: $syntacticSymbols,
222+
);
223+
}
224+
225+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace Vojtechdobes\GrammarProcessing\Grammars;
4+
5+
use Vojtechdobes\GrammarProcessing;
6+
7+
8+
final class EbnfInterpretationFactory
9+
{
10+
11+
/**
12+
* @template TLexicalSymbol of string
13+
* @template TSyntaxTokenSymbol of TLexicalSymbol
14+
* @template TIgnoredTokenSymbol of TLexicalSymbol
15+
* @param non-empty-list<TLexicalSymbol> $lexicalSymbols
16+
* @param non-empty-list<TSyntaxTokenSymbol> $syntaxTokenSymbols
17+
* @param list<TIgnoredTokenSymbol> $ignoredTokenSymbols
18+
*/
19+
public function createInterpretation(
20+
array $lexicalSymbols,
21+
array $syntaxTokenSymbols,
22+
array $ignoredTokenSymbols,
23+
): GrammarProcessing\Interpretation
24+
{
25+
return new GrammarProcessing\Interpretation([
26+
'alternation' => new AlternationNodeInterpretation(),
27+
'concatenation' => new ConcatenationNodeInterpretation(),
28+
'factor' => new FactorNodeInterpretation(),
29+
'grammar' => new GrammarNodeInterpretation(
30+
lexicalSymbols: $lexicalSymbols,
31+
syntaxTokenSymbols: $syntaxTokenSymbols,
32+
ignoredTokenSymbols: $ignoredTokenSymbols,
33+
),
34+
'identifier' => new IdentifierNodeInterpretation(),
35+
'lhs' => new LhsNodeInterpretation(),
36+
'rhs' => new RhsNodeInterpretation(),
37+
'rule' => new RuleNodeInterpretation(),
38+
'term' => new TermNodeInterpretation(),
39+
'terminal' => new TerminalNodeInterpretation(),
40+
]);
41+
}
42+
43+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<?php declare(strict_types=1);
2+
3+
namespace Vojtechdobes\GrammarProcessing\Grammars;
4+
5+
use Vojtechdobes\GrammarProcessing;
6+
7+
8+
final class EbnfParser
9+
{
10+
11+
private readonly GrammarProcessing\Grammar $ebnfGrammar;
12+
private readonly GrammarProcessing\Interpretation $ebnfInterpretation;
13+
14+
15+
16+
/**
17+
* @template TLexicalSymbol of string
18+
* @template TSyntaxTokenSymbol of TLexicalSymbol
19+
* @template TIgnoredTokenSymbol of TLexicalSymbol
20+
* @param non-empty-list<TLexicalSymbol> $lexicalSymbols
21+
* @param non-empty-list<TSyntaxTokenSymbol> $syntaxTokenSymbols
22+
* @param list<TIgnoredTokenSymbol> $ignoredTokenSymbols
23+
*/
24+
public function __construct(
25+
array $lexicalSymbols,
26+
array $syntaxTokenSymbols,
27+
array $ignoredTokenSymbols,
28+
)
29+
{
30+
$this->ebnfGrammar = new EbnfGrammarFactory()->createGrammar();
31+
$this->ebnfInterpretation = new EbnfInterpretationFactory()->createInterpretation(
32+
lexicalSymbols: $lexicalSymbols,
33+
syntaxTokenSymbols: $syntaxTokenSymbols,
34+
ignoredTokenSymbols: $ignoredTokenSymbols,
35+
);
36+
}
37+
38+
39+
40+
public function parseGrammarFromSource(string $source): GrammarProcessing\Grammar
41+
{
42+
return $this->ebnfGrammar
43+
->parseSource($source, 'grammar')
44+
->interpret($this->ebnfInterpretation);
45+
}
46+
47+
48+
49+
public function parseGrammarFromAbstractSyntaxTree(
50+
GrammarProcessing\AbstractSyntaxTree $abstractSyntaxTree,
51+
): GrammarProcessing\Grammar
52+
{
53+
return $abstractSyntaxTree->interpret($this->ebnfInterpretation);
54+
}
55+
56+
}

0 commit comments

Comments
 (0)