Skip to content

Commit 51c17cf

Browse files
authored
Parallel states refactor (#153)
1 parent 0673df0 commit 51c17cf

File tree

8 files changed

+153
-193
lines changed

8 files changed

+153
-193
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
runs-on: ubuntu-latest
1313
strategy:
1414
matrix:
15-
nim: [1.6.18, 2.0.0, 2.2.0]
15+
nim: [1.6.18, 1.6.20, 2.0.0, 2.0.14, 2.2.0]
1616
steps:
1717
- uses: actions/checkout@v2
1818
- name: Run Tests

bench/bench.nim

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,4 +244,8 @@ when isMainModule:
244244
# open the log with KCachegrind
245245
246246
$ nim c --debugger:native --threads:off -d:danger -d:useMalloc -o:bin/bench2 bench/bench2.nim && valgrind --tool=callgrind -v ./bin/bench2
247+
248+
# Bench
249+
250+
$ nim c -r --threads:off -d:danger --mm:arc -o:bin/bench bench/bench.nim
247251
]#

src/regex/nfafindall.nim

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ type
1919
s: seq[MatchItem]
2020
i: int
2121
RegexMatches* = object
22-
a, b: Submatches
22+
a, b: Pstates
2323
m: Matches
2424
c: Capts
2525
look: Lookaround
@@ -46,22 +46,18 @@ func add(ms: var Matches, m: MatchItem) {.inline.} =
4646
func clear(ms: var Matches) {.inline.} =
4747
ms.i = 0
4848

49-
template initMaybeImpl(
49+
func initMaybeImpl(
5050
ms: var RegexMatches,
5151
size: int
52-
) =
53-
if ms.a == nil:
54-
assert ms.b == nil
55-
ms.a = newSubmatches size
56-
ms.b = newSubmatches size
57-
ms.look = initLook()
58-
doAssert ms.a.cap >= size and
59-
ms.b.cap >= size
52+
) {.inline.} =
53+
ms.a.reset size
54+
ms.b.reset size
55+
ms.look = initLook()
6056

61-
template initMaybeImpl(
57+
func initMaybeImpl(
6258
ms: var RegexMatches,
6359
regex: Regex
64-
) =
60+
) {.inline.} =
6561
initMaybeImpl(ms, regex.nfa.s.len)
6662

6763
func hasMatches(ms: RegexMatches): bool {.inline.} =
@@ -130,7 +126,7 @@ func submatch(
130126
while nti < L:
131127
let isEoe = ntn.kind == reEoe
132128
let nt0 = nt
133-
matched = not smB.hasState(nt) and
129+
matched = nt notin smB and
134130
(ntn.match(c.Rune) or ntn.kind == reEoe)
135131
inc nti
136132
captx = capt
@@ -158,10 +154,10 @@ func submatch(
158154
smA.clear()
159155
if not eoeFound:
160156
eoeFound = true
161-
smA.add (0'i16, -1.CaptIdx, i .. i-1)
157+
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
162158
smi = -1
163159
break
164-
smB.add (nt0, captx, bounds.a .. i-1)
160+
smB.add initPstate(nt0, captx, bounds.a .. i-1)
165161
inc smi
166162
swap smA, smB
167163

@@ -181,7 +177,7 @@ func findSomeImpl*(
181177
i = start.int
182178
iPrev = start.int
183179
optFlag = mfFindMatchOpt in flags
184-
smA.add (0'i16, -1.CaptIdx, i .. i-1)
180+
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
185181
if start-1 in 0 .. text.len-1:
186182
cPrev = bwRuneAt(text, start-1).int32
187183
while i < text.len:
@@ -200,7 +196,7 @@ func findSomeImpl*(
200196
# else: # XXX clear captures
201197
if optFlag:
202198
return i
203-
smA.add (0'i16, -1.CaptIdx, i .. i-1)
199+
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
204200
iPrev = i
205201
cPrev = c.int32
206202
submatch(ms, text, regex, iPrev, cPrev, -1'i32)

src/regex/nfafindall2.nim

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,28 +54,24 @@ type
5454
bounds: Bounds
5555
Matches = seq[MatchItem]
5656
RegexMatches2* = object
57-
a, b: Submatches
57+
a, b: Pstates
5858
m: Matches
5959
c: Capts3
6060
look: Lookaround
6161

62-
template initMaybeImpl(
62+
func initMaybeImpl(
6363
ms: var RegexMatches2,
6464
size, groupsLen: int
65-
) =
66-
if ms.a == nil:
67-
assert ms.b == nil
68-
ms.a = newSubmatches size
69-
ms.b = newSubmatches size
70-
ms.c = initCapts3 groupsLen
71-
ms.look = initLook()
72-
doAssert ms.a.cap >= size and
73-
ms.b.cap >= size
65+
) {.inline.} =
66+
ms.a.reset(size)
67+
ms.b.reset(size)
68+
ms.c.reset(groupsLen)
69+
ms.look = initLook()
7470

75-
template initMaybeImpl(
71+
func initMaybeImpl(
7672
ms: var RegexMatches2,
7773
regex: Regex
78-
) =
74+
) {.inline.} =
7975
initMaybeImpl(ms, regex.nfa.s.len, regex.groupsCount)
8076

8177
func add(ms: var RegexMatches2, m: MatchItem) {.inline.} =
@@ -170,7 +166,7 @@ func nextState(
170166
while nti < L:
171167
let isEoe = ntn.kind == reEoe
172168
let nt0 = nt
173-
matched = not smB.hasState(nt) and
169+
matched = nt notin smB and
174170
(ntn.match(c.Rune) or ntn.kind == reEoe)
175171
inc nti
176172
captx = capt
@@ -187,10 +183,10 @@ func nextState(
187183
smA.clear()
188184
if not eoeFound:
189185
eoeFound = true
190-
smA.add (0'i16, -1.CaptIdx, i .. i-1)
186+
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
191187
smi = -1
192188
break
193-
smB.add (nt0, captx, bounds.a .. i-1)
189+
smB.add initPstate(nt0, captx, bounds.a .. i-1)
194190
inc smi
195191
swap smA, smB
196192
capts.recycle()
@@ -214,7 +210,7 @@ func findSomeImpl*(
214210
flags = regex.flags.toMatchFlags + flags
215211
optFlag = mfFindMatchOpt in flags
216212
binFlag = mfBytesInput in flags
217-
smA.add (0'i16, -1.CaptIdx, i .. i-1)
213+
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
218214
if start-1 in 0 .. text.len-1:
219215
cPrev = if binFlag:
220216
text[start-1].int32
@@ -236,7 +232,7 @@ func findSomeImpl*(
236232
return i
237233
if optFlag:
238234
return i
239-
smA.add (0'i16, -1.CaptIdx, i .. i-1)
235+
smA.add initPstate(0'i16, -1.CaptIdx, i .. i-1)
240236
iPrev = i
241237
cPrev = c.int32
242238
nextState(ms, text, regex, iPrev, cPrev, -1'i32, flags)

src/regex/nfamacro.nim

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ type
3838
): NimNode {.nimcall, noSideEffect, raises: [].}
3939
Lookaround = object
4040
ahead, behind: Sig
41-
smL: NimNode
4241

4342
# todo: can not use unicodeplus due to
4443
# https://github.com/nim-lang/Nim/issues/7059
@@ -240,9 +239,7 @@ func genLookaroundMatch(
240239
look: Lookaround
241240
): NimNode =
242241
template nfa: untyped = n.subExp.nfa
243-
template smL: untyped = look.smL
244-
let smlA = quote do: lastA(`smL`)
245-
let smlB = quote do: lastB(`smL`)
242+
defVars smlA, smlB
246243
var flags = {mfAnchored}
247244
if n.subExp.reverseCapts:
248245
flags.incl mfReverseCapts
@@ -262,10 +259,9 @@ func genLookaroundMatch(
262259
`matched` = not `matched`
263260
let nfaLenLit = newLit nfa.s.len
264261
result = quote do:
265-
grow `smL`
266-
`smL`.last.setLen `nfaLenLit`
262+
var `smlA` = initPstates(`nfaLenLit`)
263+
var `smlB` = initPstates(`nfaLenLit`)
267264
`lookaroundStmt`
268-
removeLast `smL`
269265

270266
func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] =
271267
doAssert not isEpsilonTransition(n)
@@ -293,7 +289,7 @@ func genMatchedBody(
293289
let eTransitions = getEpsilonTransitions(nfa, n, nti)
294290
if eTransitions.len == 0:
295291
return quote do:
296-
add(`smB`, (`ntLit`, `capt`, `bounds2`))
292+
add(`smB`, initPstate(`ntLit`, `capt`, `bounds2`))
297293
var matchedBody = newSeq[NimNode]()
298294
matchedBody.add quote do:
299295
`matched` = true
@@ -325,7 +321,7 @@ func genMatchedBody(
325321
doAssert false
326322
matchedBody.add quote do:
327323
if `matched`:
328-
add(`smB`, (`ntLit`, `captx`, `bounds2`))
324+
add(`smB`, initPstate(`ntLit`, `captx`, `bounds2`))
329325
return newStmtList matchedBody
330326

331327
func genNextState(
@@ -339,10 +335,10 @@ func genNextState(
339335
#[
340336
case n
341337
of 0:
342-
if not smB.hasState(1):
338+
if not smB.contains(1):
343339
if c == 'a':
344340
smB.add((1, capt, bounds))
345-
if not smB.hasState(4):
341+
if not smB.contains(4):
346342
if c == 'b':
347343
smB.add((4, capt, bounds))
348344
of 1:
@@ -384,11 +380,11 @@ func genNextState(
384380
i, nti, nfa, look, flags)
385381
if mfAnchored in flags and s[nt].kind == reEoe:
386382
branchBodyN.add quote do:
387-
if not hasState(`smB`, `ntLit`):
383+
if not contains(`smB`, `ntLit`):
388384
`matchedBodyStmt`
389385
else:
390386
branchBodyN.add quote do:
391-
if not hasState(`smB`, `ntLit`) and `matchCond`:
387+
if not contains(`smB`, `ntLit`) and `matchCond`:
392388
`matchedBodyStmt`
393389
doAssert eoeOnly or branchBodyN.len > 0
394390
if branchBodyN.len > 0:
@@ -418,12 +414,15 @@ func nextState(
418414
flags: set[MatchFlag],
419415
eoeOnly = false
420416
): NimNode =
421-
defForVars n, capt, bounds
417+
defForVars pstate
418+
let n = quote do: `pstate`.ni
419+
let capt = quote do: `pstate`.ci
420+
let bounds = quote do: `pstate`.bounds
422421
let eoeBailOut = if mfAnchored in flags:
423422
quote do:
424423
if `n` == `eoe`:
425-
if not hasState(`smB`, `n`):
426-
add(`smB`, (`n`, `capt`, `bounds`))
424+
if not contains(`smB`, `n`):
425+
add(`smB`, initPstate(`n`, `capt`, `bounds`))
427426
break
428427
else:
429428
newEmptyNode()
@@ -433,7 +432,7 @@ func nextState(
433432
flags, eoeOnly)
434433
result = quote do:
435434
`smB`.clear()
436-
for `n`, `capt`, `bounds` in `smA`.items:
435+
for `pstate` in `smA`.items:
437436
`eoeBailOut`
438437
`nextStateStmt`
439438
swap `smA`, `smB`
@@ -483,7 +482,7 @@ func matchImpl(
483482
if `start`-1 in 0 .. `text`.len-1:
484483
`cPrev` = bwRuneAt(`text`, `start`-1).int32
485484
clear(`smA`)
486-
add(`smA`, (0'i16, `captIdx`, `i` .. `i`-1))
485+
add(`smA`, initPstate(0'i16, `captIdx`, `i` .. `i`-1))
487486
while `i` < `text`.len:
488487
fastRuneAt(`text`, iNext, `c`, true)
489488
`nextStateStmt`
@@ -534,7 +533,7 @@ func reversedMatchImpl(
534533
if `start` in 0 .. `text`.len-1:
535534
`cPrev` = runeAt(`text`, `start`).int32
536535
clear(`smA`)
537-
add(`smA`, (0'i16, `captIdx`, `i` .. `i`-1))
536+
add(`smA`, initPstate(0'i16, `captIdx`, `i` .. `i`-1))
538537
while iNext > 0:
539538
bwFastRuneAt(`text`, iNext, `c`)
540539
`nextStateStmt`
@@ -551,11 +550,11 @@ func reversedMatchImpl(
551550
`captsStmt`
552551
`matched` = `smA`.len > 0
553552

554-
template look(smL: NimNode): untyped =
553+
template look: untyped =
555554
Lookaround(
556555
ahead: matchImpl,
557-
behind: reversedMatchImpl,
558-
smL: smL)
556+
behind: reversedMatchImpl
557+
)
559558

560559
template constructSubmatches2(
561560
captures, txt, capts, capt, size: untyped
@@ -578,24 +577,23 @@ proc matchImpl*(text, expLit, body: NimNode): NimNode =
578577
if not (expLit.kind == nnkCallStrLit and $expLit[0] == "rex"):
579578
error "not a regex literal; only rex\"regex\" is allowed", expLit
580579
let exp = expLit[1]
581-
defVars smA, smB, capts, capt, matched, smL
580+
defVars smA, smB, capts, capt, matched
582581
let regex = reCt(exp.strVal)
583582
let startLit = newLit 0
584583
let flags: set[MatchFlag] = {}
585584
let matchImplStmt = matchImpl(
586585
smA, smB, capts, capt, matched,
587-
text, startLit, regex.nfa, look(smL), flags)
586+
text, startLit, regex.nfa, look(), flags)
588587
let nfaLenLit = newLit regex.nfa.s.len
589588
let nfaGroupsLen = int(regex.groupsCount)
590589
result = quote do:
591590
block:
592591
var
593-
`smA` = newSubmatches `nfaLenLit`
594-
`smB` = newSubmatches `nfaLenLit`
592+
`smA` = initPstates `nfaLenLit`
593+
`smB` = initPstates `nfaLenLit`
595594
`capts` = default(Capts)
596595
`capt` = -1'i32
597596
`matched` = false
598-
`smL` {.used.} = default(SmLookaround)
599597
`matchImplStmt`
600598
if `matched`:
601599
var matches {.used, inject.} = newSeq[string]()

0 commit comments

Comments
 (0)