Skip to content

Commit af5fb3a

Browse files
jjtoltonclaude
andcommitted
Fix #3172: Parser rejects invalid | and || syntax
Migrates parser fixes from combined-testing branch to properly reject: - (|) when | is declared as operator (ISO TC2 C.2) - [|], [|X], [a,|] - missing head/tail in list notation (ISO §6.3.5) - []||X, foo||X - non-string before || operator (WG17 2025) - Mismatched brackets like ([) or ({) Changes: - push_binary_op returns Result<(), ParserError> to propagate || errors - reduce_op returns Result<(), ParserError> for error propagation - reduce_term returns Result<bool, ParserError> - reduce_brackets returns Result<bool, ParserError> - compute_arity_in_brackets rejects HeadTailSeparator at even positions - compute_arity_in_list rejects DoubleBar tokens - reduce_list uses LIST_TERM spec for empty list [] to distinguish from empty string "" in codes mode - reduce_brackets checks for HeadTailSeparator with priority > 1000 and rejects mismatched OpenList/OpenCurly Test files added: - issue_3172_operator_bar: Tests (|), [|], [|X] patterns - issue_3172_doublebar: Tests || with non-string operands 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 4e1eb9e commit af5fb3a

File tree

13 files changed

+326
-36
lines changed

13 files changed

+326
-36
lines changed

src/parser/parser.rs

Lines changed: 63 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
350350
}
351351
}
352352

353-
fn push_binary_op(&mut self, td: TokenDesc, spec: Specifier) {
353+
fn push_binary_op(&mut self, td: TokenDesc, spec: Specifier) -> Result<(), ParserError> {
354354
if let Some(arg2) = self.terms.pop() {
355355
if let Some(name) = self.get_term_name(td) {
356356
if let Some(arg1) = self.terms.pop() {
@@ -375,8 +375,11 @@ impl<'a, R: CharRead> Parser<'a, R> {
375375
Self::replace_cons_tail(arg1, arg2)
376376
}
377377
_ => {
378-
// Should never reach here due to validation, but handle gracefully
379-
Term::Clause(Cell::default(), name, vec![arg1, arg2])
378+
return Err(ParserError::UnexpectedChar(
379+
'|',
380+
self.lexer.line_num,
381+
self.lexer.col_num,
382+
));
380383
}
381384
}
382385
} else {
@@ -393,6 +396,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
393396
}
394397
}
395398
}
399+
Ok(())
396400
}
397401

398402
fn push_unary_op(&mut self, td: TokenDesc, spec: Specifier, assoc: OpDeclSpec) {
@@ -487,18 +491,18 @@ impl<'a, R: CharRead> Parser<'a, R> {
487491
});
488492
}
489493

490-
fn reduce_op(&mut self, priority: usize) {
494+
fn reduce_op(&mut self, priority: usize) -> Result<(), ParserError> {
491495
loop {
492496
if let Some(desc1) = self.stack.pop() {
493497
if let Some(desc2) = self.stack.pop() {
494498
if let Some(desc3) = self.stack.pop() {
495499
if is_xfx!(desc2.spec) && affirm_xfx(priority, desc2, desc3, desc1)
496500
|| is_yfx!(desc2.spec) && affirm_yfx(priority, desc2, desc3, desc1)
497501
{
498-
self.push_binary_op(desc2, LTERM);
502+
self.push_binary_op(desc2, LTERM)?;
499503
continue;
500504
} else if is_xfy!(desc2.spec) && affirm_xfy(priority, desc2, desc3, desc1) {
501-
self.push_binary_op(desc2, TERM);
505+
self.push_binary_op(desc2, TERM)?;
502506
continue;
503507
} else {
504508
self.stack.push(desc3);
@@ -528,6 +532,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
528532

529533
break;
530534
}
535+
Ok(())
531536
}
532537

533538
fn compute_arity_in_brackets(&self) -> Option<usize> {
@@ -538,6 +543,8 @@ impl<'a, R: CharRead> Parser<'a, R> {
538543
// expect a term or non-comma operator.
539544
if let TokenType::Comma = desc.tt {
540545
return None;
546+
} else if desc.tt == TokenType::HeadTailSeparator {
547+
return None;
541548
} else if is_term!(desc.spec) || is_op!(desc.spec) || is_negate!(desc.spec) {
542549
arity += 1;
543550
} else {
@@ -559,16 +566,16 @@ impl<'a, R: CharRead> Parser<'a, R> {
559566
None
560567
}
561568

562-
fn reduce_term(&mut self) -> bool {
569+
fn reduce_term(&mut self) -> Result<bool, ParserError> {
563570
if self.stack.is_empty() {
564-
return false;
571+
return Ok(false);
565572
}
566573

567-
self.reduce_op(999);
574+
self.reduce_op(999)?;
568575

569576
let arity = match self.compute_arity_in_brackets() {
570577
Some(arity) => arity,
571-
None => return false,
578+
None => return Ok(false),
572579
};
573580

574581
if self.stack.len() > 2 * arity {
@@ -579,14 +586,14 @@ impl<'a, R: CharRead> Parser<'a, R> {
579586
&& !is_op!(self.stack[idx - 1].spec)
580587
&& !self.stack[idx - 1].tt.is_sep()
581588
{
582-
return false;
589+
return Ok(false);
583590
}
584591
} else {
585-
return false;
592+
return Ok(false);
586593
}
587594

588595
if self.terms.len() < 1 + arity {
589-
return false;
596+
return Ok(false);
590597
}
591598

592599
let stack_len = self.stack.len() - 2 * arity - 1;
@@ -627,7 +634,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
627634
}) = self.stack.last_mut()
628635
{
629636
if *spec == BTERM {
630-
return false;
637+
return Ok(false);
631638
}
632639

633640
*tt = TokenType::Term;
@@ -636,11 +643,11 @@ impl<'a, R: CharRead> Parser<'a, R> {
636643
*unfold_bounds = 0;
637644
}
638645

639-
return true;
646+
return Ok(true);
640647
}
641648
}
642649

643-
false
650+
Ok(false)
644651
}
645652

646653
pub fn reset(&mut self) {
@@ -697,6 +704,10 @@ impl<'a, R: CharRead> Parser<'a, R> {
697704
// expect a term or non-comma operator.
698705
if let TokenType::Comma = desc.tt {
699706
return None;
707+
} else if desc.tt == TokenType::HeadTailSeparator {
708+
// HeadTailSeparator at even position means pattern like [|] or [a,|]
709+
// ISO 6.3.5: items = arg, ht_sep, arg - requires arg before ht_sep
710+
return None;
700711
} else if is_term!(desc.spec) || is_op!(desc.spec) || is_negate!(desc.spec) {
701712
arity += 1;
702713
} else {
@@ -707,6 +718,8 @@ impl<'a, R: CharRead> Parser<'a, R> {
707718
continue;
708719
}
709720
return None;
721+
} else if desc.tt == TokenType::DoubleBar {
722+
return None;
710723
} else if desc.tt == TokenType::OpenList {
711724
return Some(arity);
712725
} else if desc.tt != TokenType::Comma {
@@ -724,7 +737,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
724737

725738
if let Some(ref mut td) = self.stack.last_mut() {
726739
if td.tt == TokenType::OpenList {
727-
td.spec = TERM;
740+
td.spec = LIST_TERM;
728741
td.tt = TokenType::Term;
729742
td.priority = 0;
730743

@@ -734,7 +747,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
734747
}
735748
}
736749

737-
self.reduce_op(1000);
750+
self.reduce_op(999)?;
738751

739752
let mut arity = match self.compute_arity_in_list() {
740753
Some(arity) => arity,
@@ -823,7 +836,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
823836
}
824837
}
825838

826-
self.reduce_op(1201);
839+
self.reduce_op(1201)?;
827840

828841
if self.stack.len() > 1 {
829842
if let Some(td) = self.stack.pop() {
@@ -859,19 +872,26 @@ impl<'a, R: CharRead> Parser<'a, R> {
859872
Ok(false)
860873
}
861874

862-
fn reduce_brackets(&mut self) -> bool {
875+
fn reduce_brackets(&mut self) -> Result<bool, ParserError> {
863876
if self.stack.is_empty() {
864-
return false;
877+
return Ok(false);
865878
}
866879

867-
self.reduce_op(1400);
880+
self.reduce_op(1400)?;
868881

869882
if self.stack.len() <= 1 {
870-
return false;
883+
return Ok(false);
871884
}
872885

873886
if let Some(TokenType::Open | TokenType::OpenCT) = self.stack.last().map(|token| token.tt) {
874-
return false;
887+
return Ok(false);
888+
}
889+
890+
if let Some(TokenType::OpenList | TokenType::OpenCurly) = self.stack.last().map(|token| token.tt) {
891+
return Err(ParserError::IncompleteReduction(
892+
self.lexer.line_num,
893+
self.lexer.col_num,
894+
));
875895
}
876896

877897
let idx = self.stack.len() - 2;
@@ -880,7 +900,14 @@ impl<'a, R: CharRead> Parser<'a, R> {
880900
match td.tt {
881901
TokenType::Open | TokenType::OpenCT => {
882902
if self.stack[idx].tt == TokenType::Comma {
883-
return false;
903+
return Ok(false);
904+
}
905+
906+
if self.stack[idx].tt == TokenType::HeadTailSeparator && self.stack[idx].priority > 1000 {
907+
return Err(ParserError::IncompleteReduction(
908+
self.lexer.line_num,
909+
self.lexer.col_num,
910+
));
884911
}
885912

886913
if let Some(atom) = self.stack[idx].tt.sep_to_atom() {
@@ -892,9 +919,9 @@ impl<'a, R: CharRead> Parser<'a, R> {
892919
self.stack[idx].tt = TokenType::Term;
893920
self.stack[idx].priority = 0;
894921

895-
true
922+
Ok(true)
896923
}
897-
_ => false,
924+
_ => Ok(false),
898925
}
899926
}
900927

@@ -913,15 +940,15 @@ impl<'a, R: CharRead> Parser<'a, R> {
913940
Token::OpenCT => {
914941
// can't be prefix, so either inf == 0
915942
// or post == 0.
916-
self.reduce_op(inf + post);
943+
self.reduce_op(inf + post)?;
917944
self.promote_atom_op(
918945
name,
919946
inf + post,
920947
spec & (XFX as u32 | XFY as u32 | YFX as u32 | YF as u32 | XF as u32),
921948
);
922949
}
923950
_ => {
924-
self.reduce_op(inf + post);
951+
self.reduce_op(inf + post)?;
925952

926953
if let Some(TokenDesc { spec: pspec, .. }) = self.stack.last().cloned() {
927954
// rterm.c: 412
@@ -948,7 +975,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
948975
}
949976
}
950977
} else {
951-
self.reduce_op(pre + inf + post); // only one non-zero priority among these.
978+
self.reduce_op(pre + inf + post)?; // only one non-zero priority among these.
952979
self.promote_atom_op(name, pre + inf + post, spec);
953980
}
954981

@@ -1046,7 +1073,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
10461073
Token::Open => self.shift(Token::Open, 1300, DELIMITER),
10471074
Token::OpenCT => self.shift(Token::OpenCT, 1300, DELIMITER),
10481075
Token::Close => {
1049-
if !self.reduce_term() && !self.reduce_brackets() {
1076+
if !self.reduce_term()? && !self.reduce_brackets()? {
10501077
return Err(ParserError::IncompleteReduction(
10511078
self.lexer.line_num,
10521079
self.lexer.col_num,
@@ -1111,7 +1138,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
11111138
));
11121139
}
11131140

1114-
self.reduce_op(1);
1141+
self.reduce_op(1)?;
11151142
self.shift(Token::DoubleBar, 1, XFY as u32);
11161143
} else {
11171144
// Handle as regular HeadTailSeparator
@@ -1124,7 +1151,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
11241151

11251152
let old_stack_len = self.stack.len();
11261153

1127-
self.reduce_op(priority);
1154+
self.reduce_op(priority)?;
11281155

11291156
let new_stack_len = self.stack.len();
11301157

@@ -1178,11 +1205,11 @@ impl<'a, R: CharRead> Parser<'a, R> {
11781205
));
11791206
}
11801207

1181-
self.reduce_op(1);
1208+
self.reduce_op(1)?;
11821209
self.shift(Token::DoubleBar, 1, XFY as u32);
11831210
}
11841211
Token::Comma => {
1185-
self.reduce_op(1000);
1212+
self.reduce_op(1000)?;
11861213
self.shift(Token::Comma, 1000, XFY as u32);
11871214
}
11881215
Token::End => match self.stack.last().map(|t| t.tt) {
@@ -1230,7 +1257,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
12301257
self.shift_token(token, op_dir)?;
12311258
}
12321259

1233-
self.reduce_op(1400);
1260+
self.reduce_op(1400)?;
12341261

12351262
if self.terms.len() > 1 || self.stack.len() > 1 {
12361263
return Err(ParserError::IncompleteReduction(
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
%% Test file for issue #3172: Mismatched bracket detection
2+
%%
3+
%% GitHub: https://github.com/mthom/scryer-prolog/pull/3172#issuecomment-3574685968
4+
%%
5+
%% Bug Report:
6+
%% - read_from_chars("([).", T) was returning T = '[' instead of syntax_error
7+
%% - read_from_chars("{([)}.", T) was returning T = {[} instead of syntax_error
8+
%%
9+
%% ISO/IEC 13211-1:1995 References:
10+
%% - Section 6.3: Brackets must be properly matched
11+
%% - ( must close with )
12+
%% - [ must close with ]
13+
%% - { must close with }
14+
15+
:- use_module(library(charsio)).
16+
17+
%% ============================================================================
18+
%% TESTS FOR MISMATCHED BRACKETS (should all throw syntax_error)
19+
%% ============================================================================
20+
21+
%% Test 1-12: Various mismatched bracket patterns
22+
test1 :- catch((read_from_chars("([).", _), fail), error(syntax_error(_), _), true).
23+
test2 :- catch((read_from_chars("({).", _), fail), error(syntax_error(_), _), true).
24+
test3 :- catch((read_from_chars("{([)}.", _), fail), error(syntax_error(_), _), true).
25+
test4 :- catch((read_from_chars("[(]).", _), fail), error(syntax_error(_), _), true).
26+
test5 :- catch((read_from_chars("{(}).", _), fail), error(syntax_error(_), _), true).
27+
test6 :- catch((read_from_chars("{[}.", _), fail), error(syntax_error(_), _), true).
28+
test7 :- catch((read_from_chars("[{].", _), fail), error(syntax_error(_), _), true).
29+
test8 :- catch((read_from_chars("((]).", _), fail), error(syntax_error(_), _), true).
30+
test9 :- catch((read_from_chars("[).", _), fail), error(syntax_error(_), _), true).
31+
test10 :- catch((read_from_chars("{).", _), fail), error(syntax_error(_), _), true).
32+
test11 :- catch((read_from_chars("(].", _), fail), error(syntax_error(_), _), true).
33+
test12 :- catch((read_from_chars("(}.", _), fail), error(syntax_error(_), _), true).
34+
35+
%% ============================================================================
36+
%% TESTS FOR VALID NESTED BRACKETS (should all succeed)
37+
%% ============================================================================
38+
39+
test13 :- read_from_chars("([]).", T), T = [].
40+
test14 :- read_from_chars("({}).", T), T = {}.
41+
test15 :- read_from_chars("{[]}.", T), T = '{}'([]).
42+
test17 :- read_from_chars("{([])}.", T), T = '{}'([]).
43+
test18 :- read_from_chars("[{}].", T), T = [{}].
44+
test19 :- read_from_chars("([a]).", T), T = [a].
45+
test20 :- read_from_chars("({a}).", T), T = '{}'(a).
46+
test21 :- read_from_chars("{[a,b]}.", T), T = '{}'([a,b]).
47+
test22 :- read_from_chars("[a,[b],c].", T), T = [a,[b],c].
48+
test23 :- read_from_chars("{a,{b},c}.", T), T = '{}'(','(a,','('{}'(b),c))).
49+
50+
%% Run all tests (silent success pattern)
51+
run :-
52+
test1, test2, test3, test4, test5, test6, test7, test8, test9, test10,
53+
test11, test12, test13, test14, test15, test17, test18, test19, test20,
54+
test21, test22, test23,
55+
halt.

tests/scryer/cli/issues/issue_3172_brackets.stderr

Whitespace-only changes.

tests/scryer/cli/issues/issue_3172_brackets.stdout

Whitespace-only changes.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# issue #3172 - mismatched bracket detection
2+
# Tests that ([), ({), {([)}, etc. throw syntax_error
3+
# ISO/IEC 13211-1:1995: Each bracket type must close with its matching delimiter
4+
args = ["-f", "--no-add-history", "-g", "run", "test.pl"]

0 commit comments

Comments
 (0)