Skip to content

Commit dcffae2

Browse files
MerricxStefanosChaliasos
authored andcommitted
Implement reproduce subcommand
1 parent de1b6ba commit dcffae2

File tree

5 files changed

+172
-25
lines changed

5 files changed

+172
-25
lines changed

src/zkregex_fuzzer/cli.py

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,17 @@
77
import random
88
import uuid
99
from pathlib import Path
10+
from zkregex_fuzzer.reproduce import reproduce
1011
from zkregex_fuzzer.fuzzer import fuzz_with_database, fuzz_with_grammar
1112
from zkregex_fuzzer.grammar import REGEX_GRAMMAR
1213
from zkregex_fuzzer.configs import TARGETS, VALID_INPUT_GENERATORS, GENERATORS
1314
from zkregex_fuzzer.harness import HarnessStatus
1415
from zkregex_fuzzer.logger import logger
1516
from zkregex_fuzzer.runner.circom import CircomSubprocess, SnarkjsSubprocess, ZkRegexSubprocess
1617

17-
def main():
18+
def fuzz_parser():
1819
parser = argparse.ArgumentParser(
19-
description="Generate fuzzed regexes using The Fuzzing Book's GrammarFuzzer."
20+
add_help=False
2021
)
2122
parser.add_argument(
2223
"--regex-num",
@@ -93,17 +94,26 @@ def main():
9394
type=str,
9495
help="Path to the ptau (powers-of-tau) file for the proving step"
9596
)
96-
parser.add_argument(
97-
"--logger-level",
98-
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
99-
default="INFO",
100-
help="Set the logger level (default: INFO)."
97+
98+
99+
return parser
100+
101+
def reproduce_parser():
102+
parser = argparse.ArgumentParser(
103+
add_help=False
101104
)
102105

103-
args = parser.parse_args()
106+
parser.add_argument(
107+
"--path",
108+
nargs="+",
109+
type=str,
110+
help="Path to the target directory output that want to be reproduced (support wildcard pattern).",
111+
required=True
112+
)
104113

105-
logger.setLevel(args.logger_level)
114+
return parser
106115

116+
def do_fuzz(args):
107117
if args.oracle == "valid" and not args.valid_input_generator:
108118
print("Valid input generator is required for valid oracle.")
109119
exit(1)
@@ -182,6 +192,41 @@ def main():
182192
inputs_num=args.inputs_num,
183193
kwargs=kwargs
184194
)
195+
196+
def do_reproduce(args):
197+
reproduce(args.path)
198+
199+
def main():
200+
201+
parser = argparse.ArgumentParser()
202+
parser.add_argument(
203+
"--logger-level",
204+
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
205+
default="INFO",
206+
help="Set the logger level (default: INFO)."
207+
)
208+
209+
subparser = parser.add_subparsers(dest="subcommand")
210+
subparser.add_parser(
211+
"fuzz",
212+
help="Generate fuzzed regexes using The Fuzzing Book's GrammarFuzzer.",
213+
parents=[fuzz_parser()]
214+
)
215+
subparser.add_parser(
216+
"reproduce",
217+
help="Reproduce the bug that found by the fuzzer.",
218+
parents=[reproduce_parser()]
219+
)
220+
221+
args = parser.parse_args()
222+
223+
224+
logger.setLevel(args.logger_level)
225+
226+
if args.subcommand == "fuzz":
227+
do_fuzz(args)
228+
elif args.subcommand == "reproduce":
229+
do_reproduce(args)
185230

186231

187232
if __name__ == "__main__":

src/zkregex_fuzzer/harness.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@
77
"""
88

99
import json
10+
import random
1011
from pathlib import Path
1112
from typing import Type, List, Union
1213
from enum import Enum
1314
from dataclasses import dataclass
1415
from zkregex_fuzzer.logger import logger
1516
from zkregex_fuzzer.runner import Runner, RegexCompileError, RegexRunError
17+
from zkregex_fuzzer.utils import get_random_filename
1618

1719
class HarnessStatus(Enum):
1820
SUCCESS = 0 # Did not find a bug
@@ -40,26 +42,27 @@ def _return_harness_result(
4042
runner: Union[Runner, None],
4143
kwargs: dict,
4244
):
43-
if runner:
44-
if result.status.name in status_to_save:
45+
if result.status.name in status_to_save:
4546

4647
metadata = {
47-
"seed": kwargs.get("seed"),
48-
"target": kwargs.get("target"),
49-
"oracle": kwargs.get("oracle"),
50-
"input_generator": kwargs.get("valid_input_generator"),
51-
"fuzzer": kwargs.get("fuzzer"),
52-
"regex_num": kwargs.get("regex_num"),
53-
"inputs_num": kwargs.get("inputs_num"),
48+
"config": kwargs,
49+
"regex": result.regex,
50+
"inputs": result.failed_inputs,
51+
"status": result.status.name,
5452
}
5553

56-
dir_path = runner.save(output_path)
54+
if runner:
55+
dir_path = runner.save(output_path)
56+
else:
57+
dir_path = Path(output_path) / f"output_{get_random_filename()}"
58+
dir_path.mkdir()
5759

5860
metadata_json = json.dumps(metadata)
5961
metadata_path = Path(dir_path) / "metadata.json"
6062
with open(metadata_path.absolute(), "w") as f:
6163
f.write(metadata_json)
6264

65+
if runner:
6366
runner.clean()
6467

6568
return result
@@ -162,7 +165,7 @@ def harness(
162165
)
163166

164167
return _return_harness_result(
165-
HarnessResult(regex, inp_num, oracle, [], HarnessStatus.SUCCESS),
168+
HarnessResult(regex, inp_num, oracle, inputs, HarnessStatus.SUCCESS),
166169
status_to_save,
167170
output_path,
168171
secondary_runner,

src/zkregex_fuzzer/reproduce.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
Reproduce bugs found by the fuzzer.
3+
"""
4+
5+
import glob
6+
import json
7+
from pathlib import Path
8+
from zkregex_fuzzer.configs import TARGETS
9+
from zkregex_fuzzer.logger import logger
10+
from zkregex_fuzzer.harness import HarnessStatus
11+
from zkregex_fuzzer.runner import RegexCompileError, RegexRunError
12+
from zkregex_fuzzer.utils import pretty_regex
13+
14+
def reproduce(path_list: list[str]):
15+
16+
for pattern in path_list:
17+
expanded_path = glob.glob(pattern)
18+
if not expanded_path:
19+
logger.info(f"Path {pattern} is not exist, skipping.")
20+
continue
21+
22+
for path in expanded_path:
23+
directory = Path(path)
24+
if not directory.exists():
25+
continue
26+
27+
if not (directory / "metadata.json").exists():
28+
logger.info(f"metadata.json is not exist in {directory}, skipping.")
29+
continue
30+
31+
simulate_harness(directory)
32+
33+
34+
def simulate_harness(directory: Path):
35+
36+
with open(str(directory / "metadata.json"), 'r') as f:
37+
metadata = json.loads(f.read())
38+
39+
regex = metadata["regex"]
40+
inputs = metadata["inputs"]
41+
expected_status = metadata["status"]
42+
kwargs = metadata["config"]
43+
44+
target_runner = TARGETS[metadata["config"]["target"]]
45+
oracle = True if metadata["config"]["oracle"] == "valid" else False
46+
47+
print(f"Reproducing regex: {pretty_regex(regex)}")
48+
print("-" * 80)
49+
print(f"Directory path: {directory}")
50+
print(f"Inputs: {inputs}")
51+
print(f"Expected result: {expected_status}")
52+
print("-" * 80)
53+
54+
try:
55+
runner = target_runner(regex, kwargs)
56+
except RegexCompileError as e:
57+
if expected_status == HarnessStatus.COMPILE_ERROR.name:
58+
print("Reproduce completed successfully!")
59+
print("-" * 80)
60+
print(e)
61+
else:
62+
print(f"Unexpected COMPILE_ERROR reproduce status: {e}")
63+
64+
print("=" * 80)
65+
return
66+
67+
failed_inputs = []
68+
for input in inputs:
69+
70+
try:
71+
runner_status, runner_str = runner.match(input)
72+
if runner_status != oracle:
73+
failed_inputs.append(input)
74+
75+
except RegexRunError as e:
76+
if expected_status == HarnessStatus.RUN_ERROR.name:
77+
print("Reproduce completed successfully!")
78+
print("-" * 80)
79+
print(e)
80+
else:
81+
print(f"Unexpected RUN_ERROR reproduce status: {e}")
82+
83+
if len(failed_inputs) > 0:
84+
if expected_status == HarnessStatus.FAILED.name:
85+
print("Reproduce completed successfully!")
86+
else:
87+
print(f"Unexpected FAILED reproduce status: {failed_inputs}")
88+
else:
89+
if expected_status == HarnessStatus.SUCCESS.name:
90+
print("Reproduce completed successfully!")
91+
else:
92+
print(f"Unexpected SUCCESS reproduce status: {inputs}")
93+
94+
runner.clean()
95+
print("=" * 80)

src/zkregex_fuzzer/runner/circom.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,10 +355,10 @@ def save(self, path) -> str:
355355
target_path = Path(path).resolve() / f"output_{r1cs_path.stem}"
356356
target_path.mkdir()
357357

358-
circom_path.replace(target_path / circom_path.name)
359-
input_path.replace(target_path / input_path.name)
360-
r1cs_path.replace(target_path / r1cs_path.name)
361-
wasm_path.replace(target_path / wasm_path.name)
358+
if self._circom_path: circom_path.replace(target_path / circom_path.name)
359+
if self._input_path: input_path.replace(target_path / input_path.name)
360+
if self._r1cs_path: r1cs_path.replace(target_path / r1cs_path.name)
361+
if self._wasm_path: wasm_path.replace(target_path / wasm_path.name)
362362

363363
if self._run_the_prover:
364364
zkey_path = Path(self._zkey_path)

src/zkregex_fuzzer/utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
"""
44

55
import re
6-
6+
import random
7+
import string
78
from fuzzingbook.Grammars import simple_grammar_fuzzer, Grammar
89

910

@@ -98,6 +99,9 @@ def grammar_fuzzer(grammar: Grammar, start_symbol: str, max_nonterminals: int =
9899
max_nonterminals=max_nonterminals,
99100
max_expansion_trials=max_expansion_trials)
100101

102+
def get_random_filename():
103+
return ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
104+
101105
def pretty_regex(regex: str):
102106
"""
103107
Format raw string regex to printable chars

0 commit comments

Comments
 (0)