Skip to content

Commit cb7fe41

Browse files
lvan100lianghuan
authored andcommitted
fix(parser): fix embedded type parsing
1 parent b6ce1b9 commit cb7fe41

File tree

10 files changed

+1153
-835
lines changed

10 files changed

+1153
-835
lines changed

lib/tidl/TLexer.g4

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
lexer grammar TLexer;
55

66
// Define additional channels for whitespace and comments.
7-
// This ensures they don’t interfere with parsing, but can still be preserved if needed.
87
channels {WS_CHAN, SL_COMMENT_CHAN, ML_COMMENT_CHAN}
98

109
// --------------------
@@ -47,7 +46,7 @@ RIGHT_BRACE : '}';
4746
EQUAL : '=';
4847
COMMA : ',';
4948
QUESTION : '?';
50-
AT : '@';
49+
SEMI : ';' ;
5150

5251
// --------------------
5352
// String literal
@@ -88,20 +87,26 @@ fragment DIGIT : '0'..'9';
8887
fragment LETTER : 'A'..'Z' | 'a'..'z';
8988
fragment HEX_DIGIT : DIGIT | 'A'..'F' | 'a'..'f';
9089
90+
// --------------------
91+
// Newline
92+
// --------------------
93+
NEWLINE
94+
: '\r'? '\n'
95+
;
96+
9197
// --------------------
9298
// Whitespace
93-
// Skipped by sending to WS_CHAN
9499
// --------------------
95100
WHITESPACE
96-
: [ \t\r\n]+ -> channel(WS_CHAN)
101+
: [ \t]+ -> channel(WS_CHAN)
97102
;
98103
99104
// --------------------
100105
// Single-line comments
101106
// Supports both // and # styles
102107
// --------------------
103108
SINGLE_LINE_COMMENT
104-
: ('//' | '#') ~[\r\n]* ('\r'? '\n')? -> channel(SL_COMMENT_CHAN)
109+
: ('//' | '#') ~[\r\n]* -> channel(SL_COMMENT_CHAN)
105110
;
106111

107112
// --------------------

lib/tidl/TParser.g4

Lines changed: 42 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ options { tokenVocab = TLexer; }
77

88
// --------------------
99
// Document root
10+
// A document consists of zero or more definitions separated by terminators
11+
// and ends with EOF.
1012
// --------------------
1113
document
12-
: definition* EOF
14+
: ((definition terminator) | terminator)* EOF
1315
;
1416

1517
// --------------------
16-
// Definition types: const, enum, type, rpc
18+
// Top-level definitions: const, enum, type, oneof, rpc
1719
// --------------------
1820
definition
1921
: const_def | enum_def | type_def | oneof_def | rpc_def
@@ -34,39 +36,46 @@ const_type
3436

3537
// --------------------
3638
// Enum definition
37-
// Example: enum A { A = 1 }
39+
// Example:
40+
// enum A {
41+
// RED = 1
42+
// GREEN = 2
43+
// }
3844
// --------------------
3945
enum_def
40-
: KW_ENUM IDENTIFIER LEFT_BRACE enum_field* RIGHT_BRACE
46+
: KW_ENUM IDENTIFIER LEFT_BRACE terminator? (enum_field terminator)* terminator? RIGHT_BRACE
4147
;
4248

43-
// Enum field
49+
// Enum field: name = integer
4450
enum_field
4551
: IDENTIFIER EQUAL INTEGER
4652
;
4753

4854
// --------------------
4955
// Type definition
50-
// Example:
56+
// Example 1:
5157
// type A<T> {
5258
// B?
53-
// string? field = "1" ( go.type="string" )
59+
// string? field = "1" (go.type="string")
5460
// }
61+
// Example 2:
5562
// type Alias Map<string,User>
5663
// --------------------
5764
type_def
58-
: KW_TYPE IDENTIFIER (LESS_THAN IDENTIFIER GREATER_THAN)? LEFT_BRACE type_field* RIGHT_BRACE
59-
| KW_TYPE IDENTIFIER IDENTIFIER LESS_THAN generic_type GREATER_THAN
65+
// Structured type with optional generic parameter
66+
: KW_TYPE IDENTIFIER (LESS_THAN IDENTIFIER GREATER_THAN)? LEFT_BRACE terminator? (type_field terminator)* terminator? RIGHT_BRACE
67+
// Type alias to a generic container
68+
| KW_TYPE IDENTIFIER IDENTIFIER LESS_THAN value_type GREATER_THAN
6069
;
6170

6271
// A type field can be either an embedded type or a named typed field
6372
type_field
64-
: common_type_field | embed_type_field
73+
: embed_type_field | common_type_field
6574
;
6675

6776
// Embedded field: user-defined type (optionally nullable with '?')
6877
embed_type_field
69-
: '@'user_type
78+
: user_type
7079
;
7180

7281
// Common field: type + name + optional default value + optional annotations
@@ -83,14 +92,9 @@ common_field_type
8392
| TYPE_BINARY
8493
;
8594

86-
// Generic type
87-
generic_type
88-
: base_type | user_type | container_type
89-
;
90-
9195
// --------------------
9296
// Field annotations
93-
// Example: ( go.type="string", db.index=true )
97+
// Example: (go.type="string", db.index=true)
9498
// --------------------
9599
type_annotations
96100
: LEFT_PAREN annotation (COMMA annotation)* RIGHT_PAREN
@@ -100,17 +104,12 @@ type_annotations
100104
// OneOf definition
101105
// Example:
102106
// oneof Value {
103-
// A? a
104-
// B? b
107+
// A? a
108+
// B? b
105109
// }
106110
// --------------------
107111
oneof_def
108-
: KW_ONEOF IDENTIFIER LEFT_BRACE oneof_field* RIGHT_BRACE
109-
;
110-
111-
// OneOf fields must be normal named fields
112-
oneof_field
113-
: common_type_field
112+
: KW_ONEOF IDENTIFIER LEFT_BRACE terminator? (common_type_field terminator)* terminator? RIGHT_BRACE
114113
;
115114

116115
// --------------------
@@ -127,19 +126,18 @@ rpc_req
127126
: IDENTIFIER
128127
;
129128

130-
// RPC response type:
131-
// Either an identifier, a generic form (Type<T>), or a stream<T>
129+
// RPC response type: identifier, generic form (Type<T>), or stream<T>
132130
rpc_resp
133131
: IDENTIFIER
134132
| TYPE_STREAM LESS_THAN user_type GREATER_THAN
135133
;
136134

137135
// RPC annotations (inside { ... })
138136
rpc_annotations
139-
: LEFT_BRACE annotation* RIGHT_BRACE
137+
: LEFT_BRACE terminator? (annotation terminator)* terminator? RIGHT_BRACE
140138
;
141139

142-
// Annotation for type or RPC
140+
// Annotation key-value pair
143141
// Example: method="GET"
144142
annotation
145143
: IDENTIFIER (EQUAL const_value)?
@@ -153,21 +151,19 @@ base_type
153151
: (TYPE_BOOL | TYPE_INT | TYPE_FLOAT | TYPE_STRING) QUESTION?
154152
;
155153

156-
// User-defined type (identifier, optionally nullable with '?')
154+
// User-defined type, optionally nullable with '?'
157155
user_type
158156
: IDENTIFIER QUESTION?
159157
;
160158

161159
// --------------------
162-
// Container types
163-
// map<K,V> or list<T>
160+
// Container types: map<K,V> or list<T>
164161
// --------------------
165162
container_type
166163
: map_type | list_type
167164
;
168165

169-
// Map type
170-
// Example: map<string,int>
166+
// Map type: map<string,int>
171167
map_type
172168
: TYPE_MAP LESS_THAN key_type COMMA value_type GREATER_THAN
173169
;
@@ -177,8 +173,7 @@ key_type
177173
: TYPE_STRING | TYPE_INT
178174
;
179175

180-
// List type
181-
// Example: list<User>
176+
// List type: list<User>
182177
list_type
183178
: TYPE_LIST LESS_THAN value_type GREATER_THAN
184179
;
@@ -189,10 +184,18 @@ value_type
189184
;
190185

191186
// --------------------
192-
// Constant values
193-
// Can be literals (true, false, numbers, strings)
194-
// Or identifiers (e.g., enum constants)
187+
// Constant values: literals or identifiers (e.g., enum members)
195188
// --------------------
196189
const_value
197190
: KW_TRUE | KW_FALSE | INTEGER | FLOAT | STRING | IDENTIFIER
198191
;
192+
193+
// --------------------
194+
// Terminator
195+
// Terminator is used to separate statements or fields.
196+
// It allows either one or more newlines, or a semicolon.
197+
// This provides flexibility for both newline-based and semicolon-based syntax.
198+
// --------------------
199+
terminator
200+
: (NEWLINE | SEMI)+
201+
;

lib/tidl/parser.go

Lines changed: 100 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ func (l *ParseTreeListener) parseRedefinedType(ctx *Type_defContext, t *Type) {
248248
t.Redefined = &RedefinedType{
249249
Name: ctx.IDENTIFIER(1).GetText(),
250250
}
251-
g := ctx.Generic_type()
251+
g := ctx.Value_type()
252252
if g.Base_type() != nil {
253253
t.Redefined.GenericType = BaseType{
254254
Name: strings.TrimRight(g.Base_type().GetText(), "?"),
@@ -374,7 +374,7 @@ func (l *ParseTreeListener) ExitOneof_def(ctx *Oneof_defContext) {
374374
func (l *ParseTreeListener) parseOneOfType(ctx *Oneof_defContext, o *OneOf) {
375375

376376
// Process all oneof fields
377-
for _, f := range ctx.AllOneof_field() {
377+
for _, f := range ctx.AllCommon_type_field() {
378378
typeField := TypeField{
379379
Position: Position{
380380
Start: f.GetStart().GetLine(),
@@ -387,18 +387,18 @@ func (l *ParseTreeListener) parseOneOfType(ctx *Oneof_defContext, o *OneOf) {
387387
}
388388

389389
// Regular field
390-
typeField.FieldType = l.parseCommonFieldType(f.Common_type_field().Common_field_type())
391-
typeField.Name = f.Common_type_field().IDENTIFIER().GetText()
390+
typeField.FieldType = l.parseCommonFieldType(f.Common_field_type())
391+
typeField.Name = f.IDENTIFIER().GetText()
392392

393393
// Default value
394-
if f.Common_type_field().Const_value() != nil {
395-
s := f.Common_type_field().Const_value().GetText()
394+
if f.Const_value() != nil {
395+
s := f.Const_value().GetText()
396396
typeField.Default = &s
397397
}
398398

399399
// Annotations
400-
if f.Common_type_field().Type_annotations() != nil {
401-
for _, aCtx := range f.Common_type_field().Type_annotations().AllAnnotation() {
400+
if f.Type_annotations() != nil {
401+
for _, aCtx := range f.Type_annotations().AllAnnotation() {
402402
a := Annotation{
403403
Key: aCtx.IDENTIFIER().GetText(),
404404
Position: Position{
@@ -473,6 +473,96 @@ func (l *ParseTreeListener) ExitRpc_def(ctx *Rpc_defContext) {
473473
l.Document.RPCs = append(l.Document.RPCs, r)
474474
}
475475

476+
// isTerminatorToken returns true if the token is a terminator token.
477+
func isTerminatorToken(t antlr.Token) bool {
478+
return t.GetTokenType() == TLexerNEWLINE || t.GetTokenType() == TLexerSEMI
479+
480+
}
481+
482+
// previousTokenOnChannel returns the previous token on the specified channel.
483+
func (l *ParseTreeListener) previousTokenOnChannel(i int) int {
484+
tokens := l.Tokens.GetAllTokens()
485+
for i >= 0 && (isTerminatorToken(tokens[i]) || tokens[i].GetChannel() != antlr.LexerDefaultTokenChannel) {
486+
i--
487+
}
488+
return i
489+
}
490+
491+
// filterForChannel filters tokens for a specific channel.
492+
func (l *ParseTreeListener) filterForChannel(left, right, channel int) []antlr.Token {
493+
tokens := l.Tokens.GetAllTokens()
494+
hidden := make([]antlr.Token, 0)
495+
for i := left; i < right+1; i++ {
496+
t := tokens[i]
497+
if channel == -1 {
498+
if t.GetChannel() != antlr.LexerDefaultTokenChannel {
499+
hidden = append(hidden, t)
500+
}
501+
} else if t.GetChannel() == channel {
502+
hidden = append(hidden, t)
503+
}
504+
}
505+
if len(hidden) == 0 {
506+
return nil
507+
}
508+
return hidden
509+
}
510+
511+
// GetHiddenTokensToLeft returns all hidden tokens to the left of a token.
512+
func (l *ParseTreeListener) GetHiddenTokensToLeft(tokenIndex, channel int) []antlr.Token {
513+
tokens := l.Tokens.GetAllTokens()
514+
if tokenIndex < 0 || tokenIndex >= len(tokens) {
515+
panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(tokens)-1))
516+
}
517+
518+
prevOnChannel := l.previousTokenOnChannel(tokenIndex - 1)
519+
if prevOnChannel == tokenIndex-1 {
520+
return nil
521+
}
522+
523+
// If there are none on channel to the left and prevOnChannel == -1 then from = 0
524+
from := prevOnChannel + 1
525+
to := tokenIndex - 1
526+
return l.filterForChannel(from, to, channel)
527+
}
528+
529+
// nextTokenOnChannel returns the next token on the specified channel.
530+
func (l *ParseTreeListener) nextTokenOnChannel(i int) int {
531+
tokens := l.Tokens.GetAllTokens()
532+
if i >= len(tokens) {
533+
return -1
534+
}
535+
token := tokens[i]
536+
for isTerminatorToken(tokens[i]) || token.GetChannel() != antlr.LexerDefaultTokenChannel {
537+
if token.GetTokenType() == antlr.TokenEOF {
538+
return -1
539+
}
540+
i++
541+
token = tokens[i]
542+
}
543+
return i
544+
}
545+
546+
// GetHiddenTokensToRight returns all hidden tokens to the right of a token.
547+
func (l *ParseTreeListener) GetHiddenTokensToRight(tokenIndex, channel int) []antlr.Token {
548+
tokens := l.Tokens.GetAllTokens()
549+
if tokenIndex < 0 || tokenIndex >= len(tokens) {
550+
panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(tokens)-1))
551+
}
552+
553+
nextOnChannel := l.nextTokenOnChannel(tokenIndex + 1)
554+
from := tokenIndex + 1
555+
556+
// If no onChannel to the right, then nextOnChannel == -1, so set 'to' to the last token
557+
var to int
558+
if nextOnChannel == -1 {
559+
to = len(tokens) - 1
560+
} else {
561+
to = nextOnChannel
562+
}
563+
return l.filterForChannel(from, to, channel)
564+
}
565+
476566
// topComment extracts comments immediately above a token.
477567
// It supports both single-line (//) and multi-line (/* */) comments.
478568
func (l *ParseTreeListener) topComment(token antlr.Token) []Comment {
@@ -482,7 +572,7 @@ func (l *ParseTreeListener) topComment(token antlr.Token) []Comment {
482572
)
483573

484574
// Collect single-line comments
485-
comments := l.Tokens.GetHiddenTokensToLeft(token.GetTokenIndex(), TLexerSL_COMMENT_CHAN)
575+
comments := l.GetHiddenTokensToLeft(token.GetTokenIndex(), TLexerSL_COMMENT_CHAN)
486576
for _, c := range comments {
487577
if _, ok := l.Attached[c.GetLine()]; ok {
488578
continue
@@ -498,7 +588,7 @@ func (l *ParseTreeListener) topComment(token antlr.Token) []Comment {
498588
}
499589

500590
// Collect multi-line comments
501-
comments = l.Tokens.GetHiddenTokensToLeft(token.GetTokenIndex(), TLexerML_COMMENT_CHAN)
591+
comments = l.GetHiddenTokensToLeft(token.GetTokenIndex(), TLexerML_COMMENT_CHAN)
502592
for _, c := range comments {
503593
if _, ok := l.Attached[c.GetLine()]; ok {
504594
continue

0 commit comments

Comments
 (0)