diff --git a/src/parser/ast.rs b/src/parser/ast.rs index 9effb51eb..da6999f50 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -141,6 +141,7 @@ pub const DELIMITER: u32 = 0x0100; pub const TERM: u32 = 0x1000; pub const LTERM: u32 = 0x3000; pub const BTERM: u32 = 0x11000; +pub const LIST_TERM: u32 = 0x5000; pub const NEGATIVE_SIGN: u32 = 0x0200; diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs index bfc3fb8f7..ee7dfa95d 100644 --- a/src/parser/lexer.rs +++ b/src/parser/lexer.rs @@ -42,6 +42,7 @@ pub enum Token { OpenCurly, // '{' CloseCurly, // '}' HeadTailSeparator, // '|' + DoubleBar, // '||' Comma, // ',' End, } @@ -1035,6 +1036,11 @@ impl<'a, R: CharRead> Lexer<'a, R> { if c == '|' { self.skip_char(c); + let next = self.lookahead_char()?; + if next == '|' { + self.skip_char(next); + return Ok(Token::DoubleBar); + } return Ok(Token::HeadTailSeparator); } diff --git a/src/parser/parser.rs b/src/parser/parser.rs index 61e85c7ec..644bca4bb 100644 --- a/src/parser/parser.rs +++ b/src/parser/parser.rs @@ -22,6 +22,7 @@ enum TokenType { OpenList, // '[' OpenCurly, // '{' HeadTailSeparator, // '|' + DoubleBar, // '||' Comma, // ',' Close, CloseList, // ']' @@ -44,6 +45,7 @@ impl TokenType { matches!( self, TokenType::HeadTailSeparator + | TokenType::DoubleBar | TokenType::OpenCT | TokenType::Open | TokenType::Close @@ -315,6 +317,7 @@ impl<'a, R: CharRead> Parser<'a, R> { fn get_term_name(&mut self, td: TokenDesc) -> Option { match td.tt { TokenType::HeadTailSeparator => Some(atom!("|")), + TokenType::DoubleBar => Some(atom!("||")), TokenType::Comma => Some(atom!(",")), TokenType::Term => match self.terms.pop() { Some(Term::Literal(_, Literal::Atom(atom))) => Some(atom), @@ -328,11 +331,57 @@ impl<'a, R: CharRead> Parser<'a, R> { } } + // Helper function to replace the tail of a Cons list with a new tail + fn replace_cons_tail(cons: Term, new_tail: Term) -> Term { + match cons { + Term::Cons(cell, head, tail) => { + match *tail { + Term::Literal(_, Literal::Atom(atom)) if atom == atom!("[]") => { + // Found the empty list tail, replace it + Term::Cons(cell, head, Box::new(new_tail)) + } + _ => { + // Recurse on the tail + Term::Cons(cell, head, Box::new(Self::replace_cons_tail(*tail, new_tail))) + } + } + } + _ => cons, // Not a Cons, return as-is (shouldn't happen) + } + } + fn push_binary_op(&mut self, td: TokenDesc, spec: Specifier) { if let Some(arg2) = self.terms.pop() { if let Some(name) = self.get_term_name(td) { if let Some(arg1) = self.terms.pop() { - let term = Term::Clause(Cell::default(), name, vec![arg1, arg2]); + let term = if name == atom!("||") { + match arg1 { + Term::CompleteString(_, s) | Term::PartialString(_, s, _) => { + if s.is_empty() { + // Empty string collapses: ""||K => K + arg2 + } else { + // Create/extend partial string: "abc"||K => [a,b,c|K] + Term::PartialString(Cell::default(), s, Box::new(arg2)) + } + } + Term::Literal(_, Literal::Atom(atom)) if atom == atom!("[]") => { + // Empty string in codes mode: ""||K => K + arg2 + } + Term::Cons(_, _, _) => { + // Handle codes mode: "abc" becomes Term::Cons([97,98,99]) + // Replace the [] tail with arg2 + Self::replace_cons_tail(arg1, arg2) + } + _ => { + // Should never reach here due to validation, but handle gracefully + Term::Clause(Cell::default(), name, vec![arg1, arg2]) + } + } + } else { + Term::Clause(Cell::default(), name, vec![arg1, arg2]) + }; self.terms.push(term); self.stack.push(TokenDesc { @@ -422,6 +471,7 @@ impl<'a, R: CharRead> Parser<'a, R> { Token::Close => TokenType::Close, Token::OpenCT => TokenType::OpenCT, Token::HeadTailSeparator => TokenType::HeadTailSeparator, + Token::DoubleBar => TokenType::DoubleBar, Token::OpenList => TokenType::OpenList, Token::CloseList => TokenType::CloseList, Token::OpenCurly => TokenType::OpenCurly, @@ -657,6 +707,8 @@ impl<'a, R: CharRead> Parser<'a, R> { continue; } return None; + } else if desc.tt == TokenType::DoubleBar { + return None; } else if desc.tt == TokenType::OpenList { return Some(arity); } else if desc.tt != TokenType::Comma { @@ -674,7 +726,7 @@ impl<'a, R: CharRead> Parser<'a, R> { if let Some(ref mut td) = self.stack.last_mut() { if td.tt == TokenType::OpenList { - td.spec = TERM; + td.spec = LIST_TERM; td.tt = TokenType::Term; td.priority = 0; @@ -735,7 +787,7 @@ impl<'a, R: CharRead> Parser<'a, R> { self.stack.push(TokenDesc { tt: TokenType::Term, priority: 0, - spec: TERM, + spec: LIST_TERM, unfold_bounds: 0, }); @@ -1022,24 +1074,118 @@ impl<'a, R: CharRead> Parser<'a, R> { } } Token::HeadTailSeparator => { - /* '|' as an operator must have priority > 1000 and can only be infix. - * See: http://www.complang.tuwien.ac.at/ulrich/iso-prolog/dtc2#Res_A78 - */ - let (priority, spec) = get_op_desc(atom!("|"), op_dir) - .map(|CompositeOpDesc { inf, spec, .. }| (inf, spec)) - .unwrap_or((1000, DELIMITER)); + // Check if next token is also HeadTailSeparator (i.e., "| |" with space) + // This allows both "||" and "| |" syntax per spec + if matches!(self.tokens.last(), Some(Token::HeadTailSeparator)) { + // Pop the second | and treat as DoubleBar + self.tokens.pop(); + + // Handle as DoubleBar - check validation constraints + if let Some(last_stack) = self.stack.last() { + if last_stack.tt == TokenType::Term && last_stack.spec == BTERM { + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); + } + if last_stack.tt == TokenType::Term && last_stack.spec == LIST_TERM { + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); + } + } + + // Check that the last term is a string literal (CompleteString, PartialString, or Cons from codes mode) + // Must match the validation for compact || below + let is_valid = if let Some(last_term) = self.terms.last() { + match last_term { + Term::CompleteString(_, _) => true, + Term::PartialString(_, _, _) => true, + Term::Cons(_, _, _) => true, // Allows codes mode: "abc" becomes [97,98,99] + Term::Literal(_, Literal::Atom(atom)) if *atom == atom!("[]") => true, // Empty string in codes mode + _ => false, + } + } else { + false + }; - let old_stack_len = self.stack.len(); + if !is_valid { + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); + } + + self.reduce_op(1); + self.shift(Token::DoubleBar, 1, XFY as u32); + } else { + // Handle as regular HeadTailSeparator + /* '|' as an operator must have priority > 1000 and can only be infix. + * See: http://www.complang.tuwien.ac.at/ulrich/iso-prolog/dtc2#Res_A78 + */ + let (priority, spec) = get_op_desc(atom!("|"), op_dir) + .map(|CompositeOpDesc { inf, spec, .. }| (inf, spec)) + .unwrap_or((1000, DELIMITER)); + + let old_stack_len = self.stack.len(); + + self.reduce_op(priority); + + let new_stack_len = self.stack.len(); + + if let Some(term_desc) = self.stack.last_mut() { + term_desc.unfold_bounds = old_stack_len - new_stack_len; + } - self.reduce_op(priority); + self.shift(Token::HeadTailSeparator, priority, spec); + } + } + Token::DoubleBar => { + // Double bar operator only valid after string literals + // NOT valid after parenthesized expressions or variables + + // Check that the last stack element is not from brackets or list syntax + if let Some(last_stack) = self.stack.last() { + if last_stack.tt == TokenType::Term && last_stack.spec == BTERM { + // Term came from parentheses like ("a"), reject it + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); + } + if last_stack.tt == TokenType::Term && last_stack.spec == LIST_TERM { + // Term came from list syntax like [a,b,c], reject it + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); + } + } - let new_stack_len = self.stack.len(); + // Check that the last term is a string literal (CompleteString, PartialString, or Cons from codes mode) + // NOT arbitrary lists like [1,2,3] or variables from list syntax + let is_valid = if let Some(last_term) = self.terms.last() { + match last_term { + Term::CompleteString(_, _) => true, + Term::PartialString(_, _, _) => true, + Term::Cons(_, _, _) => true, // Allows codes mode: "abc" becomes [97,98,99] + Term::Literal(_, Literal::Atom(atom)) if *atom == atom!("[]") => true, // Empty string in codes mode + _ => false, + } + } else { + false + }; - if let Some(term_desc) = self.stack.last_mut() { - term_desc.unfold_bounds = old_stack_len - new_stack_len; + if !is_valid { + return Err(ParserError::IncompleteReduction( + self.lexer.line_num, + self.lexer.col_num, + )); } - self.shift(Token::HeadTailSeparator, priority, spec); + self.reduce_op(1); + self.shift(Token::DoubleBar, 1, XFY as u32); } Token::Comma => { self.reduce_op(1000); @@ -1051,6 +1197,7 @@ impl<'a, R: CharRead> Parser<'a, R> { | Some(TokenType::OpenList) | Some(TokenType::OpenCurly) | Some(TokenType::HeadTailSeparator) + | Some(TokenType::DoubleBar) | Some(TokenType::Comma) => { return Err(ParserError::IncompleteReduction( self.lexer.line_num, diff --git a/src/tests/double_bar.pl b/src/tests/double_bar.pl new file mode 100644 index 000000000..f98b29f73 --- /dev/null +++ b/src/tests/double_bar.pl @@ -0,0 +1,172 @@ +:- module(double_bar_tests, []). + +:- use_module(test_framework). + +% Tests for the double bar || operator +% Spec: https://www.complang.tuwien.ac.at/ulrich/iso-prolog/double_bar +% +% Abstract syntax (from spec): +% term = double quoted list, bar, bar, term ; +% Priority: 0, 0, 0 +% +% The LEFT side must be a double quoted list. +% The RIGHT side (tail) can be any term at priority 0, including: +% - Variables: "abc"||K +% - Strings (chained): "a"||"b"||"c" +% - Atoms: "hello"||world (valid per abstract syntax) +% - Numbers: "abc"||123 +% +% WG17 2025-06-02: Accepts option 1 (only after double quotes) + +test("basic double bar with variable tail", ( + L = "abc"||K, + L = [a,b,c|K] +)). + +test("double bar chain", ( + L = "a"||"b"||"c", + L = [a,b,c] +)). + +test("empty string double bar unifies with tail", ( + L = ""||K, + L == K +)). + +% Atom tail: valid per abstract syntax "term = dql, bar, bar, term" +% The right-hand term can be any term at priority 0, including atoms. +test("double bar with atom tail", ( + L = "hello"||world, + L = [h,e,l,l,o|world] +)). + +test("unification with double bar", ( + "abc"||X = [a,b,c,d,e], + X = [d,e] +)). + +test("empty string unification", ( + ""||Y = hello, + Y == hello +)). + +test("multiple chained empty strings", ( + L = ""||""||""||X, + L == X +)). + +test("mixed empty and non-empty strings", ( + L = ""||"hello"||""||world, + L = [h,e,l,l,o|world] +)). + +test("multi-line double bar with line comment", ( + L = "a"|| % multiple lines + "b"|| + "c", + L = [a,b,c] +)). + +test("multi-line double bar with block comment", ( + L = "a"||"b"|| /* with comments */ "c", + L = [a,b,c] +)). + +test("multi-line double bar complex", ( + L = "a"|| % first line + "b"|| /* second */ + "c", + L = [a,b,c] +)). + +test("spaced double bar syntax", ( + L = "abc" | | K, + L = [a,b,c|K] +)). + +test("spaced double bar chain", ( + L = "a" | | "b" | | "c", + L = [a,b,c] +)). + +test("block comment between bars", ( + L = "a" | /* comment */ | "b", + L = [a,b] +)). + +test("line comment between bars", ( + L = "a" | % line comment + | "b", + L = [a,b] +)). + +test("block comment in spaced bar with tail", ( + L = "abc" |/* comment */| K, + L = [a,b,c|K] +)). + +test("comment before double bar", ( + L = "a" /* before */ || "b", + L = [a,b] +)). + +test("comment after double bar", ( + L = "a" || /* after */ "b", + L = [a,b] +)). + +test("comment before spaced bars", ( + L = "a" /* before */ | | "b", + L = [a,b] +)). + +test("comment after spaced bars", ( + L = "a" | | /* after */ "b", + L = [a,b] +)). + +test("multiple comments around bars", ( + L = "a" /* before */ | /* between */ | /* after */ "b", + L = [a,b] +)). + +test("double bar chars mode empty at start of chain", ( + L = ""||"abc"||"de", + L = [a,b,c,d,e] +)). + +test("double bar chars mode empty in middle of chain", ( + L = "ab"||""||"cd", + L = [a,b,c,d] +)). + +test("double bar chars mode empty at end of chain", ( + L = "abc"||"de"||"", + L = [a,b,c,d,e] +)). + +test("double bar chars mode single character strings", ( + L = "x"||"y"||"z", + L = [x,y,z] +)). + +test("double bar chars mode unicode characters", ( + L = "α"||"β"||tail, + L = [α,β|tail] +)). + +test("double bar chars mode longer strings", ( + L = "hello"||"world", + L = [h,e,l,l,o,w,o,r,l,d] +)). + +test("double bar chars mode nested unification", ( + "a"||"b"||X = [a,b,c], + X = [c] +)). + +% Numeric tail: valid per abstract syntax (right-hand term can be any term) +test("double bar chars mode with numeric tail", ( + L = "abc"||123, + L = [a,b,c|123] +)). diff --git a/src/tests/double_bar_codes.pl b/src/tests/double_bar_codes.pl new file mode 100644 index 000000000..458f0be43 --- /dev/null +++ b/src/tests/double_bar_codes.pl @@ -0,0 +1,155 @@ +% Tests for the double bar || operator in codes mode +% Spec: https://www.complang.tuwien.ac.at/ulrich/iso-prolog/double_bar +% +% Abstract syntax (from spec): +% term = double quoted list, bar, bar, term ; +% Priority: 0, 0, 0 +% +% The LEFT side must be a double quoted list. +% The RIGHT side (tail) can be any term at priority 0, including: +% - Variables: "abc"||K +% - Strings (chained): "a"||"b"||"c" +% - Atoms: "abc"||xyz (valid per abstract syntax) +% - Numbers: "abc"||123 +% +% WG17 2025-06-02: Accepts option 1 (only after double quotes) +% +% Note: Format helpers defined BEFORE set_prolog_flag so format strings +% are parsed as chars (format/2 requires char lists, not code lists). + +:- use_module(library(format)). + +run_test(Name, Goal) :- + copy_term(Goal, GoalCopy), + ( call(GoalCopy) -> + true + ; format("FAILED: ~q~n", [Name]), + fail + ). + +report_success :- format("All tests passed", []). +report_failure :- format("Some tests failed", []). + +:- set_prolog_flag(double_quotes, codes). + +all_tests :- + run_test(basic, ( + L = "abc"||K, + L = [97,98,99|K] + )), + run_test(empty_string, ( + L = ""||K, + L == K + )), + run_test(chain, ( + L = "a"||"b"||"c", + L = [97,98,99] + )), + run_test(unification, ( + "abc"||X = [97,98,99,100,101], + X = [100,101] + )), + run_test(mixed_empty, ( + L = ""||"hello"||""||world, + L = [104,101,108,108,111|world] + )), + run_test(atom_tail, ( + L = "abc"||xyz, + L = [97,98,99|xyz] + )), + run_test(multiline_line_comment, ( + L = "a"|| % multiple lines + "b"|| + "c", + L = [97,98,99] + )), + run_test(multiline_block_comment, ( + L = "a"||"b"|| /* with comments */ "c", + L = [97,98,99] + )), + run_test(multiline_complex, ( + L = "a"|| % first line + "b"|| /* second */ + "c", + L = [97,98,99] + )), + run_test(spaced_syntax, ( + L = "abc" | | K, + L = [97,98,99|K] + )), + run_test(spaced_chain, ( + L = "a" | | "b" | | "c", + L = [97,98,99] + )), + run_test(block_comment_between_bars, ( + L = "a" | /* comment */ | "b", + L = [97,98] + )), + run_test(line_comment_between_bars, ( + L = "a" | % line comment + | "b", + L = [97,98] + )), + run_test(block_comment_in_spaced_bar_with_tail, ( + L = "abc" |/* comment */| K, + L = [97,98,99|K] + )), + run_test(comment_before_double_bar, ( + L = "a" /* before */ || "b", + L = [97,98] + )), + run_test(comment_after_double_bar, ( + L = "a" || /* after */ "b", + L = [97,98] + )), + run_test(comment_before_spaced_bars, ( + L = "a" /* before */ | | "b", + L = [97,98] + )), + run_test(comment_after_spaced_bars, ( + L = "a" | | /* after */ "b", + L = [97,98] + )), + run_test(multiple_comments_around_bars, ( + L = "a" /* before */ | /* between */ | /* after */ "b", + L = [97,98] + )), + run_test(empty_at_start_of_chain, ( + L = ""||"abc"||"de", + L = [97,98,99,100,101] + )), + run_test(empty_in_middle_of_chain, ( + L = "ab"||""||"cd", + L = [97,98,99,100] + )), + run_test(empty_at_end_of_chain, ( + L = "abc"||"de"||"", + L = [97,98,99,100,101] + )), + run_test(single_character_strings, ( + L = "x"||"y"||"z", + L = [120,121,122] + )), + run_test(unicode_characters, ( + L = "α"||"β"||tail, + L = [945,946|tail] + )), + run_test(longer_strings, ( + L = "hello"||"world", + L = [104,101,108,108,111,119,111,114,108,100] + )), + run_test(nested_unification, ( + "a"||"b"||X = [97,98,99], + X = [99] + )), + run_test(numeric_tail, ( + L = "abc"||123, + L = [97,98,99|123] + )). + +main :- + ( all_tests -> + report_success + ; report_failure + ), + halt. diff --git a/tests-pl/double_bar_atom.pl b/tests-pl/double_bar_atom.pl new file mode 100644 index 000000000..41ce7d0b1 --- /dev/null +++ b/tests-pl/double_bar_atom.pl @@ -0,0 +1,2 @@ +% foo||X - atom before || is invalid +test :- X = foo||Y. diff --git a/tests-pl/double_bar_list1.pl b/tests-pl/double_bar_list1.pl new file mode 100644 index 000000000..46e607afc --- /dev/null +++ b/tests-pl/double_bar_list1.pl @@ -0,0 +1,2 @@ +% []||X - empty list before || is invalid (WG17 2025) +test :- X = []||Y. diff --git a/tests-pl/double_bar_list2.pl b/tests-pl/double_bar_list2.pl new file mode 100644 index 000000000..52472a933 --- /dev/null +++ b/tests-pl/double_bar_list2.pl @@ -0,0 +1,2 @@ +% [a,b]||X - non-empty list before || is invalid +test :- X = [a,b]||Y. diff --git a/tests-pl/double_bar_number.pl b/tests-pl/double_bar_number.pl new file mode 100644 index 000000000..9335a953e --- /dev/null +++ b/tests-pl/double_bar_number.pl @@ -0,0 +1,2 @@ +% 123||X - number before || is invalid +test :- X = 123||Y. diff --git a/tests-pl/double_bar_var.pl b/tests-pl/double_bar_var.pl new file mode 100644 index 000000000..74440819e --- /dev/null +++ b/tests-pl/double_bar_var.pl @@ -0,0 +1,2 @@ +% X||Y - variable before || is invalid +test :- Z = X||Y. diff --git a/tests/scryer/cli/src_tests/double_bar_codes.stderr b/tests/scryer/cli/src_tests/double_bar_codes.stderr new file mode 100644 index 000000000..e69de29bb diff --git a/tests/scryer/cli/src_tests/double_bar_codes.stdout b/tests/scryer/cli/src_tests/double_bar_codes.stdout new file mode 100644 index 000000000..4952cede6 --- /dev/null +++ b/tests/scryer/cli/src_tests/double_bar_codes.stdout @@ -0,0 +1 @@ +All tests passed \ No newline at end of file diff --git a/tests/scryer/cli/src_tests/double_bar_codes.toml b/tests/scryer/cli/src_tests/double_bar_codes.toml new file mode 100644 index 000000000..2f256ef73 --- /dev/null +++ b/tests/scryer/cli/src_tests/double_bar_codes.toml @@ -0,0 +1 @@ +args = ["-f", "--no-add-history", "src/tests/double_bar_codes.pl", "-g", "main"] diff --git a/tests/scryer/cli/src_tests/double_bar_syntax_errors.md b/tests/scryer/cli/src_tests/double_bar_syntax_errors.md new file mode 100644 index 000000000..6fade7e5c --- /dev/null +++ b/tests/scryer/cli/src_tests/double_bar_syntax_errors.md @@ -0,0 +1,33 @@ +## WG17 2025 Double Bar Syntax Error Tests +## Reference: https://www.complang.tuwien.ac.at/ulrich/iso-prolog/double_bar +## The || operator is only valid after double-quoted strings + +```trycmd +$ scryer-prolog -f --no-add-history tests-pl/double_bar_list1.pl -g halt + error(syntax_error(incomplete_reduction),read_term/3:2). + +``` + +```trycmd +$ scryer-prolog -f --no-add-history tests-pl/double_bar_list2.pl -g halt + error(syntax_error(incomplete_reduction),read_term/3:2). + +``` + +```trycmd +$ scryer-prolog -f --no-add-history tests-pl/double_bar_var.pl -g halt + error(syntax_error(incomplete_reduction),read_term/3:2). + +``` + +```trycmd +$ scryer-prolog -f --no-add-history tests-pl/double_bar_atom.pl -g halt + error(syntax_error(incomplete_reduction),read_term/3:2). + +``` + +```trycmd +$ scryer-prolog -f --no-add-history tests-pl/double_bar_number.pl -g halt + error(syntax_error(incomplete_reduction),read_term/3:2). + +``` diff --git a/tests/scryer/cli/src_tests/double_bar_tests.stdout b/tests/scryer/cli/src_tests/double_bar_tests.stdout new file mode 100644 index 000000000..4952cede6 --- /dev/null +++ b/tests/scryer/cli/src_tests/double_bar_tests.stdout @@ -0,0 +1 @@ +All tests passed \ No newline at end of file diff --git a/tests/scryer/cli/src_tests/double_bar_tests.toml b/tests/scryer/cli/src_tests/double_bar_tests.toml new file mode 100644 index 000000000..e4aa4af94 --- /dev/null +++ b/tests/scryer/cli/src_tests/double_bar_tests.toml @@ -0,0 +1 @@ +args = ["-f", "--no-add-history", "src/tests/double_bar.pl", "-f", "-g", "use_module(library(double_bar_tests)), double_bar_tests:main_quiet(double_bar_tests)"]