Skip to content

Commit c0ea531

Browse files
authored
disable utf8 regex check for arbitrary bytes (#136)
1 parent 4ecbeb5 commit c0ea531

File tree

3 files changed

+10
-1
lines changed

3 files changed

+10
-1
lines changed

src/regex.nim

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,6 +1466,7 @@ when isMainModule:
14661466
block:
14671467
let flags = {regexArbitraryBytes}
14681468
doAssert match("\xff", re2(r"\xff", flags))
1469+
doAssert match("\xff", re2("\xff", flags))
14691470
doAssert replace("\xff", re2(r"\xff", flags), "abc") == "abc"
14701471
doAssert match("\xff\xff", re2(r"\xff\xff", flags))
14711472
doAssert replace("\xff\xff", re2(r"\xff\xff", flags), "abc") == "abc"

src/regex/compiler.nim

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ when defined(regexDotDir):
99
import ./dotgraph
1010

1111
func reImpl*(s: string, flags: RegexFlags = {}): Regex {.inline.} =
12-
if verifyUtf8(s) != -1:
12+
if regexArbitraryBytes notin flags and verifyUtf8(s) != -1:
1313
raise newException(RegexError, "Invalid utf-8 regex")
1414
var groups: GroupsCapture
1515
let rpn = s

tests/tests2.nim

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3073,11 +3073,19 @@ when not defined(js) or NimMajor >= 2:
30733073
check match("a", re2(r"a", flags))
30743074
check(not match("b", re2(r"a", flags)))
30753075
check match("\xff", re2(r"\xff", flags))
3076+
check match("\xff", re2("\xff", flags))
3077+
check match("\xf8\xa1\xa1\xa1\xa1", re2(r"\xf8\xa1\xa1\xa1\xa1", flags))
3078+
check match("\xf8\xa1\xa1\xa1\xa1", re2("\xf8\xa1\xa1\xa1\xa1", flags))
30763079
check replace("\xff", re2(r"\xff", flags), "abc") == "abc"
3080+
check replace("\xff", re2("\xff", flags), "abc") == "abc"
30773081
check match("\xff\xff", re2(r"\xff\xff", flags))
3082+
check match("\xff\xff", re2("\xff\xff", flags))
30783083
check replace("\xff\xff", re2(r"\xff\xff", flags), "abc") == "abc"
3084+
check replace("\xff\xff", re2("\xff\xff", flags), "abc") == "abc"
30793085
check match("\xff\xff", re2(r"\xff+", flags))
3086+
check match("\xff\xff", re2("\xff+", flags))
30803087
check replace("\xff\xff", re2(r"\xff", flags), "abc") == "abcabc"
3088+
check replace("\xff\xff", re2("\xff", flags), "abc") == "abcabc"
30813089
check(not match("\xf0", re2(r"\xff", flags)))
30823090
check replace("\xf0", re2(r"\xff", flags), "abc") == "\xf0"
30833091
check match("弢", re2(r"弢", flags))

0 commit comments

Comments
 (0)