Skip to content

Commit 2d0f491

Browse files
authored
Fix dot to match anything in ascii mode (#135)
fix dot to match anything in ascii mode
1 parent 77aa186 commit 2d0f491

File tree

6 files changed

+8
-25
lines changed

6 files changed

+8
-25
lines changed

src/regex.nim

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ This flag makes ascii mode ``(?-u)`` the default.
319319
:test:
320320
let flags = {regexArbitraryBytes}
321321
doAssert match("\xff", re2(r"\xff", flags))
322-
#doAssert match("\xf8\xa1\xa1\xa1\xa1", re2(r".+", flags))
322+
doAssert match("\xf8\xa1\xa1\xa1\xa1", re2(r".+", flags))
323323
324324
Beware of (un)expected behaviour when mixin UTF-8 characters.
325325
@@ -1581,6 +1581,7 @@ when isMainModule:
15811581
doAssert match("", re2"\w(?<=a)Ϊ")
15821582
doAssert match("Ϊb", re2"\w(?<=Ϊ)b")
15831583
doAssert match("弢Ⓐ", re2"\w(?<=弢)Ⓐ")
1584+
doAssert match("弢", re2"(?-u).+")
15841585
block: # Follows Nim re's behaviour
15851586
doAssert match("abc", re2"(?<=a)bc", m, start = 1)
15861587
doAssert(not match("abc", re2"(?<=x)bc", m, start = 1))

src/regex/exptransformation.nim

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,6 @@ func toAsciiKind(k: NodeKind): NodeKind =
118118
reNotDigitAscii
119119
of reNotWhiteSpace:
120120
reNotWhiteSpaceAscii
121-
of reAny:
122-
reAnyAscii
123-
of reAnyNL:
124-
reAnyNLAscii
125121
else:
126122
k
127123

src/regex/nfamacro.nim

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,8 @@ func genMatch(c: NimNode, n: Node): NimNode =
120120
quote do: not `whiteSpaceMatch`
121121
of reAny:
122122
quote do: `c` != '\L'.ord
123-
of reAnyAscii:
124-
quote do: `c` <= 128 and `c` != '\L'.ord
125123
of reAnyNL:
126124
quote do: true
127-
of reAnyNlAscii:
128-
quote do: `c` <= 128
129125
of reCharCI:
130126
let cp2Lit = newLit n.cp.swapCase().int32
131127
quote do: `c` == `cpLit` or `c` == `cp2Lit`

src/regex/nodematch.nim

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,6 @@ func isDigitAscii(r: Rune): bool {.inline.} =
9595
else:
9696
false
9797

98-
func isAnyAscii(r: Rune): bool {.inline.} =
99-
(r.int <= int8.high and
100-
r != lineBreakRune)
101-
10298
# todo: can not use unicodeplus due to
10399
# https://github.com/nim-lang/Nim/issues/7059
104100
func swapCase*(r: Rune): Rune =
@@ -160,10 +156,6 @@ func match*(n: Node, r: Rune): bool {.inline.} =
160156
not r.isWhiteSpaceAscii()
161157
of reNotUCC:
162158
r.unicodeCategory() notin n.cc
163-
of reAnyAscii:
164-
r.isAnyAscii()
165-
of reAnyNLAscii:
166-
r.isAnyAscii() or r == lineBreakRune
167159
else:
168160
assert n.kind == reChar
169161
n.cp == r

src/regex/types.nim

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ type
8484
reNotAlphaNumAscii, # \W ascii only
8585
reNotDigitAscii, # \D ascii only
8686
reNotWhiteSpaceAscii, # \S ascii only
87-
reAnyAscii, # . ascii only
88-
reAnyNlAscii, # . new-line ascii only
8987
reInSet, # [abc]
9088
reNotSet, # [^abc]
9189
reLookahead, # (?=...)
@@ -261,9 +259,7 @@ const
261259
reWhiteSpaceAscii,
262260
reNotAlphaNumAscii,
263261
reNotDigitAscii,
264-
reNotWhiteSpaceAscii,
265-
reAnyAscii,
266-
reAnyNLAscii}
262+
reNotWhiteSpaceAscii}
267263
repetitionKind* = {
268264
reZeroOrMore,
269265
reOneOrMore,
@@ -310,7 +306,7 @@ func `$`*(n: Node): string =
310306
of reNotDigit, reNotDigitAscii: r"\D"
311307
of reNotWhiteSpace, reNotWhiteSpaceAscii: r"\S"
312308
of reNotUCC: r"\PN"
313-
of reAny, reAnyNl, reAnyAscii, reAnyNlAscii: "."
309+
of reAny, reAnyNl: "."
314310
of reInSet, reNotSet:
315311
var str = ""
316312
str.add '['

tests/tests2.nim

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,6 +1232,8 @@ test "tflags":
12321232
check(not "Ǝ".isMatch(re2"(?-u)[\w]"))
12331233
check(not "\t".isMatch(re2"(?-u)[\w]"))
12341234
check "ƎƎ".isMatch(re2"(?-u)[^\w](?u)\w")
1235+
check isMatch("弢", re2"(?u).+")
1236+
check isMatch("弢", re2"(?-u).+")
12351237

12361238
check "a".isMatch(re2"(?x)a")
12371239
check "a".isMatch(re2"(?x)a ")
@@ -3156,9 +3158,9 @@ when not defined(js) or NimMajor >= 2:
31563158
check match("abcd", re2(r"(?-su).{4}", flags))
31573159
check match("abcd", re2(r"(?s-u).{4}", flags))
31583160
check match("abcd", re2(r"(?u-s).{4}", flags))
3159-
#check match("弢", re2(r".{4}", flags)) # XXX should match
3161+
check match("弢", re2(r".{4}", flags))
31603162
check match("弢", re2(r"(?u).{4}", flags))
3161-
check(not match("弢", re2(r"(?-u).{4}", flags)))
3163+
check match("弢", re2(r"(?-u).{4}", flags))
31623164
check(not match("\n", re2(r".", flags)))
31633165
check match("\n", re2(r"(?s).", flags))
31643166
check(not match("\n", re2(r"(?u).", flags)))

0 commit comments

Comments
 (0)