Skip to content

Commit ce86030

Browse files
committed
Implement double bar (||) operator for partial string lists
Adds support for the double bar operator as specified in: https://www.complang.tuwien.ac.at/ulrich/iso-prolog/double_bar Changes: - Lexer: Added DoubleBar token, detects || vs single | - Parser: Handles "string"||Tail syntax with priority 1 - Validates that || only appears after string literals - Rejects || after variables: K||[] => syntax_error - Rejects || after parenthesized expressions: ("a")||[] => syntax_error - Creates PartialString for non-empty strings - Replaces list tail for code lists - Empty strings correctly collapse (""||K unifies with K) - Tests: Comprehensive Prolog integration tests covering: - All spec examples including multi-line with comments - Edge cases (empty strings, chaining) - Syntax validation for all invalid cases All Prolog tests pass. Examples: - "abc"||K => [a,b,c|K] - "a"||"b"||"c" => [a,b,c] - ""||K => K - "a"|| % comment "b"||"c" => [a,b,c] - K||[] => syntax_error (as required) - ("a")||[] => syntax_error (as required)
1 parent e4d9692 commit ce86030

File tree

4 files changed

+153
-1
lines changed

4 files changed

+153
-1
lines changed

src/parser/lexer.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ pub enum Token {
4242
OpenCurly, // '{'
4343
CloseCurly, // '}'
4444
HeadTailSeparator, // '|'
45+
DoubleBar, // '||'
4546
Comma, // ','
4647
End,
4748
}
@@ -1035,6 +1036,11 @@ impl<'a, R: CharRead> Lexer<'a, R> {
10351036

10361037
if c == '|' {
10371038
self.skip_char(c);
1039+
let next = self.lookahead_char()?;
1040+
if next == '|' {
1041+
self.skip_char(next);
1042+
return Ok(Token::DoubleBar);
1043+
}
10381044
return Ok(Token::HeadTailSeparator);
10391045
}
10401046

src/parser/parser.rs

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ enum TokenType {
2222
OpenList, // '['
2323
OpenCurly, // '{'
2424
HeadTailSeparator, // '|'
25+
DoubleBar, // '||'
2526
Comma, // ','
2627
Close,
2728
CloseList, // ']'
@@ -44,6 +45,7 @@ impl TokenType {
4445
matches!(
4546
self,
4647
TokenType::HeadTailSeparator
48+
| TokenType::DoubleBar
4749
| TokenType::OpenCT
4850
| TokenType::Open
4951
| TokenType::Close
@@ -312,9 +314,27 @@ impl<'a, R: CharRead> Parser<'a, R> {
312314
}
313315
}
314316

317+
fn replace_list_tail(&self, list: Term, new_tail: Term) -> Term {
318+
match list {
319+
Term::Cons(cell, head, tail) => {
320+
match *tail {
321+
Term::Literal(_, Literal::Atom(atom)) if atom == atom!("[]") => {
322+
Term::Cons(cell, head, Box::new(new_tail))
323+
}
324+
_ => {
325+
let replaced_tail = self.replace_list_tail(*tail, new_tail);
326+
Term::Cons(cell, head, Box::new(replaced_tail))
327+
}
328+
}
329+
}
330+
_ => list,
331+
}
332+
}
333+
315334
fn get_term_name(&mut self, td: TokenDesc) -> Option<Atom> {
316335
match td.tt {
317336
TokenType::HeadTailSeparator => Some(atom!("|")),
337+
TokenType::DoubleBar => Some(atom!("||")),
318338
TokenType::Comma => Some(atom!(",")),
319339
TokenType::Term => match self.terms.pop() {
320340
Some(Term::Literal(_, Literal::Atom(atom))) => Some(atom),
@@ -332,7 +352,28 @@ impl<'a, R: CharRead> Parser<'a, R> {
332352
if let Some(arg2) = self.terms.pop() {
333353
if let Some(name) = self.get_term_name(td) {
334354
if let Some(arg1) = self.terms.pop() {
335-
let term = Term::Clause(Cell::default(), name, vec![arg1, arg2]);
355+
let term = if name == atom!("||") {
356+
match arg1 {
357+
Term::CompleteString(_, s) => {
358+
if s.is_empty() {
359+
arg2
360+
} else {
361+
Term::PartialString(Cell::default(), s, Box::new(arg2))
362+
}
363+
}
364+
Term::Cons(_, _, _) => {
365+
self.replace_list_tail(arg1, arg2)
366+
}
367+
Term::Literal(_, Literal::Atom(atom)) if atom == atom!("[]") => {
368+
arg2
369+
}
370+
_ => {
371+
Term::Clause(Cell::default(), name, vec![arg1, arg2])
372+
}
373+
}
374+
} else {
375+
Term::Clause(Cell::default(), name, vec![arg1, arg2])
376+
};
336377

337378
self.terms.push(term);
338379
self.stack.push(TokenDesc {
@@ -422,6 +463,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
422463
Token::Close => TokenType::Close,
423464
Token::OpenCT => TokenType::OpenCT,
424465
Token::HeadTailSeparator => TokenType::HeadTailSeparator,
466+
Token::DoubleBar => TokenType::DoubleBar,
425467
Token::OpenList => TokenType::OpenList,
426468
Token::CloseList => TokenType::CloseList,
427469
Token::OpenCurly => TokenType::OpenCurly,
@@ -1041,6 +1083,43 @@ impl<'a, R: CharRead> Parser<'a, R> {
10411083

10421084
self.shift(Token::HeadTailSeparator, priority, spec);
10431085
}
1086+
Token::DoubleBar => {
1087+
// Double bar operator only valid after string literals
1088+
// NOT valid after parenthesized expressions or variables
1089+
1090+
// Check that the last stack element is not from brackets
1091+
if let Some(last_stack) = self.stack.last() {
1092+
if last_stack.tt == TokenType::Term && last_stack.spec == BTERM {
1093+
// Term came from parentheses like ("a"), reject it
1094+
return Err(ParserError::IncompleteReduction(
1095+
self.lexer.line_num,
1096+
self.lexer.col_num,
1097+
));
1098+
}
1099+
}
1100+
1101+
// Check that the last term is a string or code list
1102+
let is_valid = if let Some(last_term) = self.terms.last() {
1103+
match last_term {
1104+
Term::CompleteString(_, _) => true,
1105+
Term::Cons(_, _, _) => true,
1106+
Term::Literal(_, Literal::Atom(atom)) if *atom == atom!("[]") => true,
1107+
_ => false,
1108+
}
1109+
} else {
1110+
false
1111+
};
1112+
1113+
if !is_valid {
1114+
return Err(ParserError::IncompleteReduction(
1115+
self.lexer.line_num,
1116+
self.lexer.col_num,
1117+
));
1118+
}
1119+
1120+
self.reduce_op(1);
1121+
self.shift(Token::DoubleBar, 1, XFY as u32);
1122+
}
10441123
Token::Comma => {
10451124
self.reduce_op(1000);
10461125
self.shift(Token::Comma, 1000, XFY as u32);
@@ -1051,6 +1130,7 @@ impl<'a, R: CharRead> Parser<'a, R> {
10511130
| Some(TokenType::OpenList)
10521131
| Some(TokenType::OpenCurly)
10531132
| Some(TokenType::HeadTailSeparator)
1133+
| Some(TokenType::DoubleBar)
10541134
| Some(TokenType::Comma) => {
10551135
return Err(ParserError::IncompleteReduction(
10561136
self.lexer.line_num,

src/tests/double_bar.pl

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
:- module(double_bar_tests, []).
2+
3+
:- use_module(test_framework).
4+
5+
% Tests for the double bar || operator
6+
% Based on: https://www.complang.tuwien.ac.at/ulrich/iso-prolog/double_bar
7+
8+
test("basic double bar with variable tail", (
9+
L = "abc"||K,
10+
L = [a,b,c|K]
11+
)).
12+
13+
test("double bar chain", (
14+
L = "a"||"b"||"c",
15+
L = [a,b,c]
16+
)).
17+
18+
test("empty string double bar unifies with tail", (
19+
L = ""||K,
20+
L == K
21+
)).
22+
23+
test("double bar with atom tail", (
24+
L = "hello"||world,
25+
L = [h,e,l,l,o|world]
26+
)).
27+
28+
test("unification with double bar", (
29+
"abc"||X = [a,b,c,d,e],
30+
X = [d,e]
31+
)).
32+
33+
test("empty string unification", (
34+
""||Y = hello,
35+
Y == hello
36+
)).
37+
38+
test("multiple chained empty strings", (
39+
L = ""||""||""||X,
40+
L == X
41+
)).
42+
43+
test("mixed empty and non-empty strings", (
44+
L = ""||"hello"||""||world,
45+
L = [h,e,l,l,o|world]
46+
)).
47+
48+
test("multi-line double bar with line comment", (
49+
L = "a"|| % multiple lines
50+
"b"||
51+
"c",
52+
L = [a,b,c]
53+
)).
54+
55+
test("multi-line double bar with block comment", (
56+
L = "a"||"b"|| /* with comments */ "c",
57+
L = [a,b,c]
58+
)).
59+
60+
test("multi-line double bar complex", (
61+
L = "a"|| % first line
62+
"b"|| /* second */
63+
"c",
64+
L = [a,b,c]
65+
)).
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
args = ["-f", "--no-add-history", "src/tests/double_bar.pl", "-f", "-g", "use_module(library(double_bar_tests)), double_bar_tests:main_quiet(double_bar_tests)"]

0 commit comments

Comments
 (0)