Skip to content

Commit df946b0

Browse files
Use automate-lib to implement DFA related functions
Implement functions to: * transform a regex to a minimized DFA * check if a dfa has multiple accepting states
1 parent 0f068fe commit df946b0

File tree

3 files changed

+81
-0
lines changed

3 files changed

+81
-0
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ dependencies = [
2222
"fuzzingbook",
2323
"rstr",
2424
"exrex",
25+
"automata-lib",
2526
]
2627

2728
[project.optional-dependencies]

src/zkregex_fuzzer/dfa.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
dfa
3+
4+
A number of functions for working with DFAs.
5+
"""
6+
7+
from automata.fa.nfa import NFA
8+
from automata.fa.dfa import DFA
9+
10+
def regex_to_dfa(regex: str) -> DFA:
11+
"""
12+
Convert a regex to a DFA.
13+
"""
14+
try:
15+
nfa = NFA.from_regex(regex)
16+
except Exception as e:
17+
raise ValueError(f"Failed to parse '{regex}' into an automaton: {e}")
18+
try:
19+
return DFA.from_nfa(nfa, minify=True)
20+
except Exception as e:
21+
raise ValueError(f"Failed to convert NFA to DFA: {e}")
22+
23+
def has_multiple_accepting_states_regex(regex: str) -> bool:
24+
"""
25+
Returns True if converting the given regex to a DFA yields
26+
multiple accepting (final) states. Returns False otherwise.
27+
28+
NOTE:
29+
- Only handles a subset of regex syntax recognized by automata-lib.
30+
- For advanced Python regex features, a custom NFA builder is needed.
31+
"""
32+
dfa = regex_to_dfa(regex)
33+
num_final_states = len(dfa.final_states)
34+
35+
return num_final_states > 1
36+
37+
def has_multiple_accepting_states_dfa(dfa: DFA) -> bool:
38+
"""
39+
Returns True if the given DFA has multiple accepting (final) states.
40+
Returns False otherwise.
41+
"""
42+
return len(dfa.final_states) > 1

tests/test_dfa.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from zkregex_fuzzer.dfa import has_multiple_accepting_states_regex
2+
3+
4+
def test_has_multiple_accepting_states_regex_without_multiple():
5+
regex_without_multiple_accepting_states = [
6+
r"(a|b)*",
7+
r"abc",
8+
r"(abc|def|ghi)",
9+
r"(abc)*",
10+
r"(hello)",
11+
r"(ab)*",
12+
r"(a|b|c)*",
13+
r"((a|b|c)*abc)",
14+
r"[a-zA-Z]+",
15+
r"[0-9]+",
16+
r"(abc|abcd|abcde)f",
17+
r"(hello|helloo|hellooo)(foo|foob|fooba)?bar",
18+
r"(foo|foob|fooba)?bar",
19+
r"(abc|def)(gh|jk)(lm|nop)",
20+
]
21+
22+
for regex in regex_without_multiple_accepting_states:
23+
assert not has_multiple_accepting_states_regex(regex)
24+
25+
def test_has_multiple_accepting_states_regex_with_multiple():
26+
regex_with_multiple_accepting_states = [
27+
r"(ab|aba)",
28+
r"(ab|aba)*",
29+
r"(hello|hell)",
30+
r"b(aa|aaa)",
31+
r"(cat|cats)",
32+
r"(xy|xyx)",
33+
r"(a|ab|abc)",
34+
r"(1|12)",
35+
]
36+
37+
for regex in regex_with_multiple_accepting_states:
38+
assert has_multiple_accepting_states_regex(regex)

0 commit comments

Comments
 (0)