Skip to content

Commit 4e1eb9e

Browse files
jjtoltonclaude
andcommitted
Add codes mode support for double bar operator
Implements support for the double bar (||) operator when double_quotes flag is set to codes, fixing issue #3142. The operator now correctly handles string literals in all three modes (chars, codes, atom). Changes: - Modified parser to accept Term::Cons and empty list literals for codes-mode strings before the || operator - Added replace_cons_tail helper to properly replace the tail of codes-mode lists - Extended push_binary_op to handle codes-mode string concatenation Tests: - Added 35 comprehensive tests (8 chars mode + 27 codes mode) - Full parity in comment handling, spacing, unicode, and edge cases - All 56 tests passing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 9b04bcc commit 4e1eb9e

File tree

2 files changed

+223
-2
lines changed

2 files changed

+223
-2
lines changed

src/parser/parser.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,25 @@ impl<'a, R: CharRead> Parser<'a, R> {
331331
}
332332
}
333333

334+
// Helper function to replace the tail of a Cons list with a new tail
335+
fn replace_cons_tail(cons: Term, new_tail: Term) -> Term {
336+
match cons {
337+
Term::Cons(cell, head, tail) => {
338+
match *tail {
339+
Term::Literal(_, Literal::Atom(atom)) if atom == atom!("[]") => {
340+
// Found the empty list tail, replace it
341+
Term::Cons(cell, head, Box::new(new_tail))
342+
}
343+
_ => {
344+
// Recurse on the tail
345+
Term::Cons(cell, head, Box::new(Self::replace_cons_tail(*tail, new_tail)))
346+
}
347+
}
348+
}
349+
_ => cons, // Not a Cons, return as-is (shouldn't happen)
350+
}
351+
}
352+
334353
fn push_binary_op(&mut self, td: TokenDesc, spec: Specifier) {
335354
if let Some(arg2) = self.terms.pop() {
336355
if let Some(name) = self.get_term_name(td) {
@@ -346,6 +365,15 @@ impl<'a, R: CharRead> Parser<'a, R> {
346365
Term::PartialString(Cell::default(), s, Box::new(arg2))
347366
}
348367
}
368+
Term::Literal(_, Literal::Atom(atom)) if atom == atom!("[]") => {
369+
// Empty string in codes mode: ""||K => K
370+
arg2
371+
}
372+
Term::Cons(_, _, _) => {
373+
// Handle codes mode: "abc" becomes Term::Cons([97,98,99])
374+
// Replace the [] tail with arg2
375+
Self::replace_cons_tail(arg1, arg2)
376+
}
349377
_ => {
350378
// Should never reach here due to validation, but handle gracefully
351379
Term::Clause(Cell::default(), name, vec![arg1, arg2])
@@ -1129,12 +1157,14 @@ impl<'a, R: CharRead> Parser<'a, R> {
11291157
}
11301158
}
11311159

1132-
// Check that the last term is a string literal (CompleteString or PartialString)
1133-
// NOT arbitrary lists like [1,2,3] or variables
1160+
// Check that the last term is a string literal (CompleteString, PartialString, or Cons from codes mode)
1161+
// NOT arbitrary lists like [1,2,3] or variables from list syntax
11341162
let is_valid = if let Some(last_term) = self.terms.last() {
11351163
match last_term {
11361164
Term::CompleteString(_, _) => true,
11371165
Term::PartialString(_, _, _) => true,
1166+
Term::Cons(_, _, _) => true, // Allows codes mode: "abc" becomes [97,98,99]
1167+
Term::Literal(_, Literal::Atom(atom)) if *atom == atom!("[]") => true, // Empty string in codes mode
11381168
_ => false,
11391169
}
11401170
} else {

src/tests/double_bar.pl

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,194 @@
125125
% - [a,b,c]||S => syntax_error
126126
% - K||[] => syntax_error
127127
% - ("a")||[] => syntax_error
128+
129+
130+
131+
test("double bar chars mode empty at start of chain", (
132+
L = ""||"abc"||"de",
133+
L = [a,b,c,d,e]
134+
)).
135+
136+
test("double bar chars mode empty in middle of chain", (
137+
L = "ab"||""||"cd",
138+
L = [a,b,c,d]
139+
)).
140+
141+
test("double bar chars mode empty at end of chain", (
142+
L = "abc"||"de"||"",
143+
L = [a,b,c,d,e]
144+
)).
145+
146+
test("double bar chars mode single character strings", (
147+
L = "x"||"y"||"z",
148+
L = [x,y,z]
149+
)).
150+
151+
test("double bar chars mode unicode characters", (
152+
L = "α"||"β"||tail,
153+
L = [α,β|tail]
154+
)).
155+
156+
test("double bar chars mode longer strings", (
157+
L = "hello"||"world",
158+
L = [h,e,l,l,o,w,o,r,l,d]
159+
)).
160+
161+
test("double bar chars mode nested unification", (
162+
"a"||"b"||X = [a,b,c],
163+
X = [c]
164+
)).
165+
166+
test("double bar chars mode with numeric tail", (
167+
L = "abc"||123,
168+
L = [a,b,c|123]
169+
)).
170+
% Tests for double bar with double_quotes set to codes
171+
% These must be in a separate section with the flag set at parse time
172+
173+
:- set_prolog_flag(double_quotes, codes).
174+
175+
test("double bar with codes mode basic", (
176+
L = "abc"||K,
177+
L = [97,98,99|K]
178+
)).
179+
180+
test("double bar with codes mode empty string", (
181+
L = ""||K,
182+
L == K
183+
)).
184+
185+
test("double bar with codes mode chain", (
186+
L = "a"||"b"||"c",
187+
L = [97,98,99]
188+
)).
189+
190+
test("double bar with codes mode unification", (
191+
"abc"||X = [97,98,99,100,101],
192+
X = [100,101]
193+
)).
194+
195+
test("double bar with codes mode mixed empty and non-empty", (
196+
L = ""||"hello"||""||world,
197+
L = [104,101,108,108,111|world]
198+
)).
199+
200+
test("double bar with codes mode with atom tail", (
201+
L = "abc"||xyz,
202+
L = [97,98,99|xyz]
203+
)).
204+
205+
206+
test("double bar with codes mode multi-line with line comment", (
207+
L = "a"|| % multiple lines
208+
"b"||
209+
"c",
210+
L = [97,98,99]
211+
)).
212+
213+
test("double bar with codes mode multi-line with block comment", (
214+
L = "a"||"b"|| /* with comments */ "c",
215+
L = [97,98,99]
216+
)).
217+
218+
test("double bar with codes mode multi-line complex", (
219+
L = "a"|| % first line
220+
"b"|| /* second */
221+
"c",
222+
L = [97,98,99]
223+
)).
224+
225+
test("double bar with codes mode spaced syntax", (
226+
L = "abc" | | K,
227+
L = [97,98,99|K]
228+
)).
229+
230+
test("double bar with codes mode spaced chain", (
231+
L = "a" | | "b" | | "c",
232+
L = [97,98,99]
233+
)).
234+
235+
test("double bar with codes mode block comment between bars", (
236+
L = "a" | /* comment */ | "b",
237+
L = [97,98]
238+
)).
239+
240+
test("double bar with codes mode line comment between bars", (
241+
L = "a" | % line comment
242+
| "b",
243+
L = [97,98]
244+
)).
245+
246+
test("double bar with codes mode block comment in spaced bar with tail", (
247+
L = "abc" |/* comment */| K,
248+
L = [97,98,99|K]
249+
)).
250+
251+
test("double bar with codes mode comment before double bar", (
252+
L = "a" /* before */ || "b",
253+
L = [97,98]
254+
)).
255+
256+
test("double bar with codes mode comment after double bar", (
257+
L = "a" || /* after */ "b",
258+
L = [97,98]
259+
)).
260+
261+
test("double bar with codes mode comment before spaced bars", (
262+
L = "a" /* before */ | | "b",
263+
L = [97,98]
264+
)).
265+
266+
test("double bar with codes mode comment after spaced bars", (
267+
L = "a" | | /* after */ "b",
268+
L = [97,98]
269+
)).
270+
271+
test("double bar with codes mode multiple comments around bars", (
272+
L = "a" /* before */ | /* between */ | /* after */ "b",
273+
L = [97,98]
274+
)).
275+
276+
test("double bar with codes mode empty at start of chain", (
277+
L = ""||"abc"||"de",
278+
L = [97,98,99,100,101]
279+
)).
280+
281+
test("double bar with codes mode empty in middle of chain", (
282+
L = "ab"||""||"cd",
283+
L = [97,98,99,100]
284+
)).
285+
286+
test("double bar with codes mode empty at end of chain", (
287+
L = "abc"||"de"||"",
288+
L = [97,98,99,100,101]
289+
)).
290+
291+
test("double bar with codes mode single character strings", (
292+
L = "x"||"y"||"z",
293+
L = [120,121,122]
294+
)).
295+
296+
test("double bar with codes mode unicode characters", (
297+
L = "α"||"β"||tail,
298+
L = [945,946|tail]
299+
)).
300+
301+
test("double bar with codes mode longer strings", (
302+
L = "hello"||"world",
303+
L = [104,101,108,108,111,119,111,114,108,100]
304+
)).
305+
306+
test("double bar with codes mode nested unification", (
307+
"a"||"b"||X = [97,98,99],
308+
X = [99]
309+
)).
310+
311+
312+
test("double bar with codes mode with numeric tail", (
313+
L = "abc"||123,
314+
L = [97,98,99|123]
315+
)).
316+
317+
318+
:- set_prolog_flag(double_quotes, chars).

0 commit comments

Comments
 (0)