Skip to content

Commit 8627174

Browse files
Add DFA generator
1 parent 5979124 commit 8627174

File tree

6 files changed

+261
-3
lines changed

6 files changed

+261
-3
lines changed

src/zkregex_fuzzer/cli.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pathlib import Path
1010

1111
from zkregex_fuzzer.configs import GENERATORS, TARGETS, VALID_INPUT_GENERATORS
12-
from zkregex_fuzzer.fuzzer import fuzz_with_database, fuzz_with_grammar
12+
from zkregex_fuzzer.fuzzer import fuzz_with_database, fuzz_with_dfa, fuzz_with_grammar
1313
from zkregex_fuzzer.grammar import REGEX_GRAMMAR
1414
from zkregex_fuzzer.harness import HarnessStatus
1515
from zkregex_fuzzer.logger import logger
@@ -234,6 +234,14 @@ def do_fuzz(args):
234234
inputs_num=args.inputs_num,
235235
kwargs=kwargs,
236236
)
237+
elif args.fuzzer == "dfa":
238+
fuzz_with_dfa(
239+
target_implementation=args.target,
240+
oracle_params=(args.oracle == "valid", args.valid_input_generator),
241+
regex_num=args.regex_num,
242+
inputs_num=args.inputs_num,
243+
kwargs=kwargs,
244+
)
237245

238246

239247
def do_reproduce(args):

src/zkregex_fuzzer/configs.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
from zkregex_fuzzer.grammar import REGEX_GRAMMAR
2-
from zkregex_fuzzer.regexgen import DatabaseRegexGenerator, GrammarRegexGenerator
2+
from zkregex_fuzzer.regexgen import (
3+
DatabaseRegexGenerator,
4+
DFARegexGenerator,
5+
GrammarRegexGenerator,
6+
)
37
from zkregex_fuzzer.runner import CircomRunner, NoirRunner, PythonReRunner
48
from zkregex_fuzzer.vinpgen import ExrexGenerator, GrammarBasedGenerator, RstrGenerator
59

@@ -22,4 +26,5 @@
2226
GENERATORS = {
2327
"grammar": GrammarRegexGenerator,
2428
"database": DatabaseRegexGenerator,
29+
"dfa": DFARegexGenerator,
2530
}

src/zkregex_fuzzer/dfa.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
"""
66

77
import random
8+
import string
9+
from typing import Dict, Optional, Set
810

911
from automata.fa.dfa import DFA
1012
from automata.fa.gnfa import GNFA
@@ -226,3 +228,160 @@ def transform_dfa_to_single_accepting_state(dfa: DFA, strategy: str = "random")
226228
)
227229
else:
228230
return _merge_strategy(states, alphabet, transitions, initial, original_finals)
231+
232+
233+
def _get_alphabet(
234+
use_unicode: bool, num_states: int, min_size: int = 2, max_size: int = 10
235+
) -> Set[str]:
236+
"""
237+
Generate a random alphabet for a DFA.
238+
"""
239+
alphabet_size = random.randint(min_size, max_size)
240+
if use_unicode:
241+
alphabet = set()
242+
while len(alphabet) < alphabet_size:
243+
codepoint = random.randint(0, 0x10FFFF)
244+
try:
245+
char = chr(codepoint)
246+
except ValueError:
247+
continue # skip invalid code points (if any)
248+
alphabet.add(char)
249+
else:
250+
# Restricted character set: letters, digits, punctuation, whitespace
251+
allowed_pool = (
252+
string.ascii_letters
253+
+ string.digits
254+
+ string.punctuation
255+
+ string.whitespace
256+
)
257+
alphabet = set(random.sample(allowed_pool, alphabet_size))
258+
return alphabet
259+
260+
261+
def generate_random_dfa(
262+
max_depth: int = 5,
263+
use_unicode: bool = False,
264+
single_final_state: bool = False,
265+
seed: Optional[int] = None,
266+
) -> DFA:
267+
"""
268+
Generate a random DFA with a given seed for reproducibility.
269+
"""
270+
# Seed the random number generator for reproducibility (if seed is given)
271+
if seed is not None:
272+
random.seed(seed)
273+
else:
274+
seed = random.randrange(0, 2**32)
275+
random.seed(seed)
276+
277+
num_states = random.randint(1, max_depth)
278+
279+
# Define state names (q0, q1, ..., qN) and the initial state
280+
states = {f"q{i}" for i in range(num_states)}
281+
initial_state = "q0"
282+
283+
# Determine final state(s)
284+
if single_final_state:
285+
final_state = random.choice(list(states))
286+
final_states = {final_state}
287+
else:
288+
# One or more final states (randomly chosen subset of states)
289+
num_finals = random.randint(1, num_states) # at least one final
290+
final_states = set(random.sample(list(states), num_finals))
291+
292+
alphabet = _get_alphabet(use_unicode, num_states)
293+
294+
# Construct transitions: for each state and each symbol, choose a random next state
295+
transitions: Dict[str, Dict[str, str]] = {}
296+
for state in states:
297+
transitions[state] = {}
298+
for sym in alphabet:
299+
transitions[state][sym] = random.choice(list(states))
300+
301+
# Ensure at least one self-loop (cycle)
302+
loop_exists = any(
303+
state == dest for state in states for dest in transitions[state].values()
304+
)
305+
if not loop_exists:
306+
# Add a self-loop on a random state with a random symbol
307+
some_state = random.choice(list(states))
308+
some_symbol = random.choice(list(alphabet))
309+
transitions[some_state][some_symbol] = some_state
310+
311+
# Ensure at least one branching point (one state with two different outgoing targets)
312+
if len(alphabet) >= 2:
313+
branching_exists = any(len(set(transitions[s].values())) >= 2 for s in states)
314+
if not branching_exists:
315+
# Force branching on the initial state (as an example)
316+
sym_list = list(alphabet)
317+
# Make sure we have at least two symbols to create a branch
318+
if len(sym_list) >= 2:
319+
sym1, sym2 = sym_list[0], sym_list[1]
320+
# Assign different targets for sym1 and sym2 from the initial state
321+
if transitions[initial_state][sym1] == transitions[initial_state][sym2]:
322+
# Pick a different state for sym2 if both symbols currently go to the same target
323+
possible_targets = list(states - {transitions[initial_state][sym1]})
324+
if possible_targets:
325+
transitions[initial_state][sym2] = random.choice(
326+
possible_targets
327+
)
328+
# (If no possible_targets, it means only one state exists, handled by loop above)
329+
330+
# Introduce an "optional" path (allow skipping or taking a symbol):
331+
# We do this by creating an alternate route to a final state.
332+
if single_final_state and len(states) > 1:
333+
# For a single final state, ensure multiple paths (direct & indirect) to it
334+
final_state = next(iter(final_states)) # the only final state
335+
# If initial state doesn't already have a direct transition to final, add one
336+
if final_state not in transitions[initial_state].values():
337+
sym = random.choice(list(alphabet))
338+
transitions[initial_state][sym] = final_state
339+
# Also ensure an indirect path: find a symbol from initial that goes to an intermediate state
340+
intermediate_symbols = [
341+
sym
342+
for sym, dest in transitions[initial_state].items()
343+
if dest != final_state
344+
]
345+
if intermediate_symbols:
346+
sym = intermediate_symbols[0]
347+
intermediate_state = transitions[initial_state][sym]
348+
# Link the intermediate state to the final state on some symbol (if not already final)
349+
if intermediate_state != final_state:
350+
sym2 = random.choice(list(alphabet))
351+
transitions[intermediate_state][sym2] = final_state
352+
elif not single_final_state:
353+
# If multiple finals are allowed, we can treat the start state as an optional accepting state
354+
# (Accept empty string or early termination)
355+
if initial_state not in final_states:
356+
final_states.add(initial_state)
357+
358+
# Construct the DFA with the generated components
359+
dfa = DFA(
360+
states=states,
361+
input_symbols=alphabet,
362+
transitions=transitions,
363+
initial_state=initial_state,
364+
final_states=final_states,
365+
)
366+
367+
# Minimize the DFA for a simpler equivalent automaton
368+
try:
369+
# If automata-lib provides a direct minification method
370+
dfa = dfa.minify()
371+
except AttributeError:
372+
# Fallback: convert to NFA and use DFA.from_nfa for minimization
373+
nfa_transitions: Dict[str, Dict[str, Set[str]]] = {}
374+
for state, trans in transitions.items():
375+
# Each DFA transition becomes a singleton set in the NFA transition
376+
nfa_transitions[state] = {sym: {dest} for sym, dest in trans.items()}
377+
nfa = NFA(
378+
states=states,
379+
input_symbols=alphabet,
380+
transitions=nfa_transitions,
381+
initial_state=initial_state,
382+
final_states=final_states,
383+
)
384+
# Convert NFA to DFA with minimization
385+
dfa = DFA.from_nfa(nfa, minify=True)
386+
387+
return dfa

src/zkregex_fuzzer/fuzzer.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@
77
from zkregex_fuzzer.configs import GRAMMARS, TARGETS, VALID_INPUT_GENERATORS
88
from zkregex_fuzzer.harness import HarnessStatus, harness
99
from zkregex_fuzzer.logger import logger
10-
from zkregex_fuzzer.regexgen import DatabaseRegexGenerator, GrammarRegexGenerator
10+
from zkregex_fuzzer.regexgen import (
11+
DatabaseRegexGenerator,
12+
DFARegexGenerator,
13+
GrammarRegexGenerator,
14+
)
1115
from zkregex_fuzzer.runner import PythonReRunner
1216
from zkregex_fuzzer.runner.base_runner import Runner
1317
from zkregex_fuzzer.transformers import regex_to_grammar
@@ -55,6 +59,25 @@ def fuzz_with_database(
5559
fuzz_with_regexes(regexes, inputs_num, target_runner, oracle_params, kwargs)
5660

5761

62+
def fuzz_with_dfa(
63+
target_implementation: str,
64+
oracle_params: tuple[bool, str],
65+
regex_num: int,
66+
inputs_num: int,
67+
kwargs: dict,
68+
):
69+
"""
70+
Fuzz test with DFA.
71+
"""
72+
target_runner = TARGETS[target_implementation]
73+
74+
regex_generator = DFARegexGenerator()
75+
regexes = regex_generator.generate_many(regex_num)
76+
logger.info(f"Generated {len(regexes)} regexes.")
77+
78+
fuzz_with_regexes(regexes, inputs_num, target_runner, oracle_params, kwargs)
79+
80+
5881
def fuzz_with_regexes(
5982
regexes: list[str],
6083
inputs_num: int,

src/zkregex_fuzzer/regexgen.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222

2323
from fuzzingbook.Grammars import Grammar
2424

25+
from zkregex_fuzzer.dfa import (
26+
generate_random_dfa,
27+
transform_dfa_to_regex,
28+
)
2529
from zkregex_fuzzer.logger import logger
2630
from zkregex_fuzzer.utils import (
2731
check_zkregex_rules_basic,
@@ -144,3 +148,33 @@ def generate_many(self, num):
144148
break
145149

146150
return result
151+
152+
153+
class DFARegexGenerator(RegexGenerator):
154+
"""
155+
Generate regexes using a DFA.
156+
"""
157+
158+
def __init__(
159+
self,
160+
max_depth: int = 5,
161+
use_unicode: bool = False,
162+
single_final_state: bool = True,
163+
):
164+
self.max_depth = max_depth
165+
self.use_unicode = use_unicode
166+
self.single_final_state = single_final_state
167+
168+
def generate_unsafe(self) -> str:
169+
"""
170+
Generate a regex using a DFA.
171+
"""
172+
while True:
173+
try:
174+
dfa = generate_random_dfa(
175+
self.max_depth, self.use_unicode, self.single_final_state
176+
)
177+
return transform_dfa_to_regex(dfa)
178+
except Exception as e:
179+
logger.debug(f"Error generating DFA: {e}")
180+
continue

tests/test_dfa.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from automata.regex.regex import isequal
22
from zkregex_fuzzer.dfa import (
3+
generate_random_dfa,
34
has_multiple_accepting_states_regex,
45
regex_to_dfa,
56
transform_dfa_to_regex,
@@ -69,3 +70,31 @@ def test_transform_dfa_to_regex_with_multiple_accepting_states():
6970
transformed_regex = transform_dfa_to_regex(transformed_dfa)
7071
new_dfa = regex_to_dfa(transformed_regex)
7172
assert len(new_dfa.final_states) == 1
73+
74+
75+
def test_generate_dfa():
76+
while True:
77+
try:
78+
dfa_with_final = generate_random_dfa(
79+
max_depth=10, use_unicode=False, single_final_state=True
80+
)
81+
regex_with_final = transform_dfa_to_regex(dfa_with_final)
82+
break
83+
except Exception:
84+
continue
85+
dfa_from_regex_with_final = regex_to_dfa(regex_with_final)
86+
assert len(dfa_with_final.final_states) == 1
87+
assert len(dfa_from_regex_with_final.final_states) == 1
88+
89+
while True:
90+
try:
91+
dfa_without_final = generate_random_dfa(
92+
max_depth=10, use_unicode=False, single_final_state=False
93+
)
94+
regex_without_final = transform_dfa_to_regex(dfa_without_final)
95+
break
96+
except Exception:
97+
continue
98+
dfa_from_regex_without_final = regex_to_dfa(regex_without_final)
99+
assert len(dfa_without_final.final_states) >= 1
100+
assert len(dfa_from_regex_without_final.final_states) >= 1

0 commit comments

Comments
 (0)