Skip to content

Commit a44b678

Browse files
Implement dfa_to_regex and transform_dfa_to_single_accepting_state
1 parent df946b0 commit a44b678

File tree

2 files changed

+231
-10
lines changed

2 files changed

+231
-10
lines changed

src/zkregex_fuzzer/dfa.py

Lines changed: 187 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44
A number of functions for working with DFAs.
55
"""
66

7-
from automata.fa.nfa import NFA
7+
import random
8+
89
from automata.fa.dfa import DFA
10+
from automata.fa.gnfa import GNFA
11+
from automata.fa.nfa import NFA
12+
913

1014
def regex_to_dfa(regex: str) -> DFA:
1115
"""
@@ -20,6 +24,7 @@ def regex_to_dfa(regex: str) -> DFA:
2024
except Exception as e:
2125
raise ValueError(f"Failed to convert NFA to DFA: {e}")
2226

27+
2328
def has_multiple_accepting_states_regex(regex: str) -> bool:
2429
"""
2530
Returns True if converting the given regex to a DFA yields
@@ -34,9 +39,190 @@ def has_multiple_accepting_states_regex(regex: str) -> bool:
3439

3540
return num_final_states > 1
3641

42+
3743
def has_multiple_accepting_states_dfa(dfa: DFA) -> bool:
3844
"""
3945
Returns True if the given DFA has multiple accepting (final) states.
4046
Returns False otherwise.
4147
"""
4248
return len(dfa.final_states) > 1
49+
50+
51+
def transform_dfa_to_regex(dfa: DFA) -> str:
52+
"""
53+
Convert a DFA to a regular expression.
54+
"""
55+
# Convert the DFA to an equivalent GNFA
56+
gnfa = GNFA.from_dfa(dfa)
57+
# Use state elimination to get a regular expression
58+
regex = gnfa.to_regex()
59+
return regex
60+
61+
62+
def _pick_one_strategy(
63+
states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
64+
) -> DFA:
65+
"""
66+
Choose one of the accepting states as the sole final state.
67+
"""
68+
chosen_final = random.choice(list(original_finals))
69+
new_final_states = {chosen_final}
70+
# Redirect transitions that pointed to any other final state
71+
for state in states:
72+
for symbol in alphabet:
73+
if (
74+
transitions[state].get(symbol) in original_finals
75+
and transitions[state][symbol] != chosen_final
76+
):
77+
transitions[state][symbol] = chosen_final
78+
# Remove other final states if they are no longer needed (unreachable and not initial)
79+
for f in list(original_finals):
80+
if f != chosen_final and f != initial:
81+
states.discard(f)
82+
transitions.pop(f, None)
83+
# Construct the new DFA
84+
return DFA(
85+
states=states,
86+
input_symbols=alphabet,
87+
transitions=transitions,
88+
initial_state=initial,
89+
final_states=new_final_states,
90+
allow_partial=True,
91+
)
92+
93+
94+
def _new_dummy_strategy(
95+
states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
96+
) -> DFA:
97+
"""
98+
Introduce a new dummy accepting state.
99+
"""
100+
new_final_name = "DummyFinal"
101+
# Ensure the new state name is unique
102+
while new_final_name in states:
103+
new_final_name += "_X"
104+
# Add the new state
105+
states.add(new_final_name)
106+
# Redirect all transitions that lead into any original final state to the new dummy final
107+
for state in states:
108+
if state == new_final_name:
109+
continue
110+
for symbol in alphabet:
111+
if transitions[state].get(symbol) in original_finals:
112+
transitions[state][symbol] = new_final_name
113+
# Define the new final state's transitions. We can leave it partial (no outgoing transitions)
114+
# or make it a trap for completeness. Here we leave it with no outgoing transitions (partial DFA).
115+
transitions[new_final_name] = {}
116+
# Remove final status from original finals and drop those states if unreachable (except initial)
117+
for f in original_finals:
118+
if f != initial:
119+
states.discard(f)
120+
transitions.pop(f, None)
121+
# New final state set contains only the dummy state
122+
return DFA(
123+
states=states,
124+
input_symbols=alphabet,
125+
transitions=transitions,
126+
initial_state=initial,
127+
final_states={new_final_name},
128+
allow_partial=True,
129+
)
130+
131+
132+
def _merge_strategy(
133+
states: set, alphabet: set, transitions: dict, initial: str, original_finals: set
134+
) -> DFA:
135+
"""
136+
Merge all accepting states into one unified state.
137+
"""
138+
merged_name = "MergedFinal"
139+
while merged_name in states:
140+
merged_name += "_X"
141+
# If the initial state is one of the finals, handle carefully by keeping it (to preserve empty-string acceptance)
142+
if initial in original_finals:
143+
merged_name = (
144+
initial # use initial as the merged final to preserve its identity
145+
)
146+
# Build the merged state's transition function by combining outgoing transitions of all original finals
147+
merged_transitions = {}
148+
for symbol in alphabet:
149+
destinations = set()
150+
for f in original_finals:
151+
if f not in transitions:
152+
continue
153+
dest = transitions[f].get(symbol)
154+
# If the destination is one of the original finals, treat it as a self-loop in the merged state
155+
if dest in original_finals:
156+
destinations.add(merged_name)
157+
elif dest is not None:
158+
destinations.add(dest)
159+
if len(destinations) == 1:
160+
# Exactly one possible destination for this symbol
161+
merged_transitions[symbol] = destinations.pop()
162+
elif len(destinations) > 1:
163+
# Conflict: multiple different destinations for the same symbol.
164+
# To keep the DFA deterministic, choose one arbitrarily (e.g., the first in the set).
165+
merged_transitions[symbol] = next(iter(destinations))
166+
# If destinations is empty, no transition defined (partial DFA for that symbol from merged state).
167+
# Remove all old final states (except if one is initial, which we are reusing as merged_name)
168+
for f in list(original_finals):
169+
if f == initial: # if initial is being used as merged_name, skip removal
170+
continue
171+
states.discard(f)
172+
transitions.pop(f, None)
173+
# Add the merged state to the state set
174+
states.add(merged_name)
175+
# Update transitions: redirect any transition pointing to an old final to point to the merged state
176+
for state in list(states):
177+
if state == merged_name:
178+
continue
179+
for symbol in alphabet:
180+
if transitions[state].get(symbol) in original_finals:
181+
transitions[state][symbol] = merged_name
182+
# Set the merged state's transitions as computed
183+
transitions[merged_name] = merged_transitions
184+
# Define the single new final state
185+
return DFA(
186+
states=states,
187+
input_symbols=alphabet,
188+
transitions=transitions,
189+
initial_state=initial,
190+
final_states={merged_name},
191+
allow_partial=True,
192+
)
193+
194+
195+
def transform_dfa_to_single_accepting_state(dfa: DFA, strategy: str = "random") -> DFA:
196+
"""
197+
Transform a DFA to a single accepting state.
198+
"""
199+
# If there's already one or zero accepting states, no change needed
200+
if len(dfa.final_states) <= 1:
201+
return dfa
202+
203+
assert strategy in ["pick_one", "new_dummy", "merge", "random"]
204+
205+
# Copy components of the DFA for modification
206+
states = set(dfa.states)
207+
alphabet = set(dfa.input_symbols)
208+
transitions = {
209+
state: dict(dest_dict) # copy of inner dict
210+
for state, dest_dict in dfa.transitions.items()
211+
}
212+
initial = dfa.initial_state
213+
original_finals = set(dfa.final_states)
214+
215+
# Randomly choose one of the transformation strategies
216+
if strategy == "random":
217+
strategy = random.choice(["pick_one", "new_dummy", "merge"])
218+
219+
if strategy == "pick_one":
220+
return _pick_one_strategy(
221+
states, alphabet, transitions, initial, original_finals
222+
)
223+
elif strategy == "new_dummy":
224+
return _new_dummy_strategy(
225+
states, alphabet, transitions, initial, original_finals
226+
)
227+
else:
228+
return _merge_strategy(states, alphabet, transitions, initial, original_finals)

tests/test_dfa.py

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,23 @@
1-
from zkregex_fuzzer.dfa import has_multiple_accepting_states_regex
1+
import re
2+
3+
from automata.regex.regex import isequal
4+
from zkregex_fuzzer.dfa import (
5+
has_multiple_accepting_states_regex,
6+
regex_to_dfa,
7+
transform_dfa_to_regex,
8+
transform_dfa_to_single_accepting_state,
9+
)
10+
11+
regex_with_multiple_accepting_states = [
12+
r"(ab|aba)",
13+
r"(ab|aba)*",
14+
r"(hello|hell)",
15+
r"b(aa|aaa)",
16+
r"(cat|cats)",
17+
r"(xy|xyx)",
18+
r"(a|ab|abc)",
19+
r"(1|12)",
20+
]
221

322

423
def test_has_multiple_accepting_states_regex_without_multiple():
@@ -22,17 +41,33 @@ def test_has_multiple_accepting_states_regex_without_multiple():
2241
for regex in regex_without_multiple_accepting_states:
2342
assert not has_multiple_accepting_states_regex(regex)
2443

44+
2545
def test_has_multiple_accepting_states_regex_with_multiple():
26-
regex_with_multiple_accepting_states = [
46+
for regex in regex_with_multiple_accepting_states:
47+
assert has_multiple_accepting_states_regex(regex)
48+
49+
50+
def test_transform_dfa_to_regex():
51+
regexes = [
2752
r"(ab|aba)",
2853
r"(ab|aba)*",
2954
r"(hello|hell)",
30-
r"b(aa|aaa)",
31-
r"(cat|cats)",
32-
r"(xy|xyx)",
33-
r"(a|ab|abc)",
34-
r"(1|12)",
3555
]
56+
for regex in regexes:
57+
dfa = regex_to_dfa(regex)
58+
transformed_regex = transform_dfa_to_regex(dfa)
59+
assert isequal(regex, transformed_regex)
3660

37-
for regex in regex_with_multiple_accepting_states:
38-
assert has_multiple_accepting_states_regex(regex)
61+
62+
def test_transform_dfa_to_regex_with_multiple_accepting_states():
63+
strategies = ["pick_one", "new_dummy", "merge"]
64+
for strategy in strategies:
65+
for regex in regex_with_multiple_accepting_states:
66+
dfa = regex_to_dfa(regex)
67+
transformed_dfa = transform_dfa_to_single_accepting_state(
68+
dfa, strategy=strategy
69+
)
70+
assert len(transformed_dfa.final_states) == 1
71+
transformed_regex = transform_dfa_to_regex(transformed_dfa)
72+
new_dfa = regex_to_dfa(transformed_regex)
73+
assert len(new_dfa.final_states) == 1

0 commit comments

Comments
 (0)