Skip to content

Commit 9b04bcc

Browse files
committed
Fix: Reject list syntax before || operator and support | | spacing
Addresses feedback from @bakaq about [a,b,c]||S being incorrectly accepted and support for spaced "| |" syntax per spec. Issue 1: List syntax like [a,b,c] was incorrectly accepted --------------------------------------------------------- The issue was that in chars mode, reduce_list() converts lists like [a,b,c] to CompleteString terms, making them indistinguishable from actual string literals "abc" by the time the || validation runs. Solution: Introduce LIST_TERM spec constant to mark terms originating from list syntax ([...]), distinct from string literals ("..."). The || operator now correctly rejects list syntax while accepting only double-quoted strings. Issue 2: Spaced "| |" syntax not supported ------------------------------------------- The spec explicitly shows "a"| |"b"| |"c" as valid syntax with spaces between the bars. Modified HeadTailSeparator handling to peek ahead and detect two consecutive | tokens, treating them as DoubleBar. Comments are supported in all positions per spec: - Before bars: "a" /* comment */ || "b" - After bars: "a" || /* comment */ "b" - Between bars: "a" | /* comment */ | "b" - Multiple positions: "a" /* c1 */ | /* c2 */ | /* c3 */ "b" - Line comments: "a" | % comment | "b" Changes: - src/parser/ast.rs: Add LIST_TERM = 0x5000 constant - src/parser/parser.rs: * Set LIST_TERM spec in reduce_list() * Check LIST_TERM in || validation to reject list syntax * Detect | | token pair and handle as DoubleBar - src/tests/double_bar.pl: * Document [a,b,c]||S as invalid case * Add tests for spaced "| |" syntax * Add comprehensive tests for comments in all positions Tested: ✅ [a,b,c]||S correctly rejected ✅ [1,2,3]||K correctly rejected ✅ [_]||Rs correctly rejected ✅ "abc"||K works correctly ✅ "abc" | | K works correctly (spaced syntax) ✅ "a" | | "b" | | "c" works correctly ✅ Comments before, after, and between bars work correctly ✅ All 21 integration tests pass ✅ All cargo tests pass (no regressions)
1 parent af38843 commit 9b04bcc

File tree

3 files changed

+119
-15
lines changed

3 files changed

+119
-15
lines changed

src/parser/ast.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ pub const DELIMITER: u32 = 0x0100;
141141
pub const TERM: u32 = 0x1000;
142142
pub const LTERM: u32 = 0x3000;
143143
pub const BTERM: u32 = 0x11000;
144+
pub const LIST_TERM: u32 = 0x5000;
144145

145146
pub const NEGATIVE_SIGN: u32 = 0x0200;
146147

src/parser/parser.rs

Lines changed: 66 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
757757
self.stack.push(TokenDesc {
758758
tt: TokenType::Term,
759759
priority: 0,
760-
spec: TERM,
760+
spec: LIST_TERM,
761761
unfold_bounds: 0,
762762
});
763763

@@ -1044,30 +1044,74 @@ impl<'a, R: CharRead> Parser<'a, R> {
10441044
}
10451045
}
10461046
Token::HeadTailSeparator => {
1047-
/* '|' as an operator must have priority > 1000 and can only be infix.
1048-
* See: http://www.complang.tuwien.ac.at/ulrich/iso-prolog/dtc2#Res_A78
1049-
*/
1050-
let (priority, spec) = get_op_desc(atom!("|"), op_dir)
1051-
.map(|CompositeOpDesc { inf, spec, .. }| (inf, spec))
1052-
.unwrap_or((1000, DELIMITER));
1047+
// Check if next token is also HeadTailSeparator (i.e., "| |" with space)
1048+
// This allows both "||" and "| |" syntax per spec
1049+
if matches!(self.tokens.last(), Some(Token::HeadTailSeparator)) {
1050+
// Pop the second | and treat as DoubleBar
1051+
self.tokens.pop();
1052+
1053+
// Handle as DoubleBar - check validation constraints
1054+
if let Some(last_stack) = self.stack.last() {
1055+
if last_stack.tt == TokenType::Term && last_stack.spec == BTERM {
1056+
return Err(ParserError::IncompleteReduction(
1057+
self.lexer.line_num,
1058+
self.lexer.col_num,
1059+
));
1060+
}
1061+
if last_stack.tt == TokenType::Term && last_stack.spec == LIST_TERM {
1062+
return Err(ParserError::IncompleteReduction(
1063+
self.lexer.line_num,
1064+
self.lexer.col_num,
1065+
));
1066+
}
1067+
}
10531068

1054-
let old_stack_len = self.stack.len();
1069+
let is_valid = if let Some(last_term) = self.terms.last() {
1070+
match last_term {
1071+
Term::CompleteString(_, _) => true,
1072+
Term::PartialString(_, _, _) => true,
1073+
_ => false,
1074+
}
1075+
} else {
1076+
false
1077+
};
10551078

1056-
self.reduce_op(priority);
1079+
if !is_valid {
1080+
return Err(ParserError::IncompleteReduction(
1081+
self.lexer.line_num,
1082+
self.lexer.col_num,
1083+
));
1084+
}
10571085

1058-
let new_stack_len = self.stack.len();
1086+
self.reduce_op(1);
1087+
self.shift(Token::DoubleBar, 1, XFY as u32);
1088+
} else {
1089+
// Handle as regular HeadTailSeparator
1090+
/* '|' as an operator must have priority > 1000 and can only be infix.
1091+
* See: http://www.complang.tuwien.ac.at/ulrich/iso-prolog/dtc2#Res_A78
1092+
*/
1093+
let (priority, spec) = get_op_desc(atom!("|"), op_dir)
1094+
.map(|CompositeOpDesc { inf, spec, .. }| (inf, spec))
1095+
.unwrap_or((1000, DELIMITER));
10591096

1060-
if let Some(term_desc) = self.stack.last_mut() {
1061-
term_desc.unfold_bounds = old_stack_len - new_stack_len;
1062-
}
1097+
let old_stack_len = self.stack.len();
1098+
1099+
self.reduce_op(priority);
1100+
1101+
let new_stack_len = self.stack.len();
10631102

1064-
self.shift(Token::HeadTailSeparator, priority, spec);
1103+
if let Some(term_desc) = self.stack.last_mut() {
1104+
term_desc.unfold_bounds = old_stack_len - new_stack_len;
1105+
}
1106+
1107+
self.shift(Token::HeadTailSeparator, priority, spec);
1108+
}
10651109
}
10661110
Token::DoubleBar => {
10671111
// Double bar operator only valid after string literals
10681112
// NOT valid after parenthesized expressions or variables
10691113

1070-
// Check that the last stack element is not from brackets
1114+
// Check that the last stack element is not from brackets or list syntax
10711115
if let Some(last_stack) = self.stack.last() {
10721116
if last_stack.tt == TokenType::Term && last_stack.spec == BTERM {
10731117
// Term came from parentheses like ("a"), reject it
@@ -1076,6 +1120,13 @@ impl<'a, R: CharRead> Parser<'a, R> {
10761120
self.lexer.col_num,
10771121
));
10781122
}
1123+
if last_stack.tt == TokenType::Term && last_stack.spec == LIST_TERM {
1124+
// Term came from list syntax like [a,b,c], reject it
1125+
return Err(ParserError::IncompleteReduction(
1126+
self.lexer.line_num,
1127+
self.lexer.col_num,
1128+
));
1129+
}
10791130
}
10801131

10811132
// Check that the last term is a string literal (CompleteString or PartialString)

src/tests/double_bar.pl

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,64 @@
6464
L = [a,b,c]
6565
)).
6666

67+
test("spaced double bar syntax", (
68+
L = "abc" | | K,
69+
L = [a,b,c|K]
70+
)).
71+
72+
test("spaced double bar chain", (
73+
L = "a" | | "b" | | "c",
74+
L = [a,b,c]
75+
)).
76+
77+
test("block comment between bars", (
78+
L = "a" | /* comment */ | "b",
79+
L = [a,b]
80+
)).
81+
82+
test("line comment between bars", (
83+
L = "a" | % line comment
84+
| "b",
85+
L = [a,b]
86+
)).
87+
88+
test("block comment in spaced bar with tail", (
89+
L = "abc" |/* comment */| K,
90+
L = [a,b,c|K]
91+
)).
92+
93+
test("comment before double bar", (
94+
L = "a" /* before */ || "b",
95+
L = [a,b]
96+
)).
97+
98+
test("comment after double bar", (
99+
L = "a" || /* after */ "b",
100+
L = [a,b]
101+
)).
102+
103+
test("comment before spaced bars", (
104+
L = "a" /* before */ | | "b",
105+
L = [a,b]
106+
)).
107+
108+
test("comment after spaced bars", (
109+
L = "a" | | /* after */ "b",
110+
L = [a,b]
111+
)).
112+
113+
test("multiple comments around bars", (
114+
L = "a" /* before */ | /* between */ | /* after */ "b",
115+
L = [a,b]
116+
)).
117+
67118
% Note: These invalid cases are tested at parse time, not runtime
68119
% They cannot be included as test/2 predicates because they fail at read_term
69120
% The parser correctly rejects them with syntax_error(incomplete_reduction)
70121
%
71122
% Invalid cases (verified separately):
72123
% - [1,2,3]||K => syntax_error
73124
% - [_]||Rs => syntax_error
125+
% - [a,b,c]||S => syntax_error
74126
% - K||[] => syntax_error
75127
% - ("a")||[] => syntax_error

0 commit comments

Comments
 (0)