Skip to content

Commit de1b6ba

Browse files
MerricxStefanosChaliasos
authored andcommitted
Add --save option and limit vinpgen length
1 parent 8fc54e6 commit de1b6ba

File tree

7 files changed

+160
-50
lines changed

7 files changed

+160
-50
lines changed

src/zkregex_fuzzer/cli.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@
44

55
import argparse
66
import os
7+
import random
8+
import uuid
79
from pathlib import Path
810
from zkregex_fuzzer.fuzzer import fuzz_with_database, fuzz_with_grammar
911
from zkregex_fuzzer.grammar import REGEX_GRAMMAR
1012
from zkregex_fuzzer.configs import TARGETS, VALID_INPUT_GENERATORS, GENERATORS
13+
from zkregex_fuzzer.harness import HarnessStatus
1114
from zkregex_fuzzer.logger import logger
1215
from zkregex_fuzzer.runner.circom import CircomSubprocess, SnarkjsSubprocess, ZkRegexSubprocess
1316

@@ -43,7 +46,13 @@ def main():
4346
help=f"The valid input generator to use for the fuzzer (options: {list(VALID_INPUT_GENERATORS.keys())})."
4447
)
4548
parser.add_argument(
46-
"--output",
49+
"--save",
50+
choices=[status.name for status in HarnessStatus],
51+
nargs="*",
52+
help="Save reproducible files according to the specified Harness status",
53+
)
54+
parser.add_argument(
55+
"--save-output",
4756
type=str,
4857
default=os.getcwd(),
4958
help=f"The output path where the reproducible files will be stored (default: .)"
@@ -148,6 +157,13 @@ def main():
148157
print(f"Max depth: {args.grammar_max_depth}")
149158
print("-" * 80)
150159

160+
kwargs = vars(args)
161+
162+
# set global seed
163+
seed = str(uuid.uuid4())
164+
kwargs['seed'] = seed
165+
random.seed(seed)
166+
151167
if args.fuzzer == "grammar":
152168
fuzz_with_grammar(
153169
target_grammar="basic",
@@ -156,15 +172,15 @@ def main():
156172
regex_num=args.regex_num,
157173
inputs_num=args.inputs_num,
158174
max_depth=args.grammar_max_depth,
159-
kwargs=vars(args)
175+
kwargs=kwargs
160176
)
161177
elif args.fuzzer == "database":
162178
fuzz_with_database(
163179
target_implementation=args.target,
164180
oracle_params=(args.oracle == "valid", args.valid_input_generator),
165181
regex_num=args.regex_num,
166182
inputs_num=args.inputs_num,
167-
kwargs=vars(args)
183+
kwargs=kwargs
168184
)
169185

170186

src/zkregex_fuzzer/fuzzer.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Implements the logic for generating regexes using The Fuzzing Book's GrammarFuzzer.
33
"""
44

5-
import re
65
from fuzzingbook.Grammars import simple_grammar_fuzzer, Grammar
76
from zkregex_fuzzer.runner.base_runner import Runner
87
from zkregex_fuzzer.transformers import regex_to_grammar
@@ -31,15 +30,8 @@ def fuzz_with_grammar(
3130
regex_generator = GrammarRegexGenerator(grammar, "<start>")
3231
regexes = regex_generator.generate_many(regex_num)
3332
logger.info(f"Generated {len(regexes)} regexes.")
34-
oracle, oracle_generator = oracle_params
35-
if oracle:
36-
generator = VALID_INPUT_GENERATORS[oracle_generator]
37-
logger.info(f"Generating {inputs_num} inputs for each regex.")
38-
regexes_inputs = [generator(regex).generate_many(inputs_num) for regex in regexes]
39-
else:
40-
raise NotImplementedError("Oracle not implemented")
41-
42-
fuzz_with_regexes(regexes, regexes_inputs, target_runner, oracle, kwargs)
33+
34+
fuzz_with_regexes(regexes, inputs_num, target_runner, oracle_params, kwargs)
4335

4436
def fuzz_with_database(
4537
target_implementation: str,
@@ -56,26 +48,35 @@ def fuzz_with_database(
5648
regex_generator = DatabaseRegexGenerator()
5749
regexes = regex_generator.generate_many(regex_num)
5850
logger.info(f"Generated {len(regexes)} regexes.")
59-
oracle, oracle_generator = oracle_params
60-
if oracle:
61-
generator = VALID_INPUT_GENERATORS[oracle_generator]
62-
logger.info(f"Generating {inputs_num} inputs for each regex.")
63-
regexes_inputs = [generator(regex).generate_many(inputs_num) for regex in regexes]
64-
else:
65-
raise NotImplementedError("Oracle not implemented")
6651

67-
fuzz_with_regexes(regexes, regexes_inputs, target_runner, oracle, kwargs)
52+
fuzz_with_regexes(regexes, inputs_num, target_runner, oracle_params, kwargs)
6853

6954
def fuzz_with_regexes(
7055
regexes: list[str],
71-
regexes_inputs: list[str],
72-
target_runner: Runner,
73-
oracle: bool,
56+
inputs_num: int,
57+
target_runner: type[Runner],
58+
oracle_params: tuple[bool, str],
7459
kwargs: dict,
7560
):
7661
"""
7762
Fuzz test with pre-seeded regexes.
7863
"""
64+
max_input_size = kwargs.get("circom_max_input_size", None)
65+
oracle, oracle_generator = oracle_params
66+
if oracle:
67+
generator = VALID_INPUT_GENERATORS[oracle_generator]
68+
logger.info(f"Generating {inputs_num} inputs for each regex.")
69+
regexes_inputs = []
70+
for regex in regexes:
71+
try:
72+
regex_inputs = generator(regex).generate_many(inputs_num, max_input_size)
73+
except ValueError as e:
74+
logger.warning(e)
75+
regex_inputs = []
76+
regexes_inputs.append(regex_inputs)
77+
else:
78+
raise NotImplementedError("Oracle not implemented")
79+
7980
# We should use the PythonReRunner to check the validity of the regexes and the inputs.
8081
# If there is a bug in the PythonReRunner, we might not find it as we will think that
8182
# either the regex or the input is invalid.

src/zkregex_fuzzer/harness.py

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
In the future, we could pass invalid regexes and check if the secondary runner fails to compile.
77
"""
88

9-
from typing import Type, List
9+
import json
10+
from pathlib import Path
11+
from typing import Type, List, Union
1012
from enum import Enum
1113
from dataclasses import dataclass
1214
from zkregex_fuzzer.logger import logger
@@ -31,6 +33,37 @@ class HarnessResult:
3133
# Error message (if any)
3234
error_message: str = ""
3335

36+
def _return_harness_result(
37+
result: HarnessResult,
38+
status_to_save: list,
39+
output_path: str,
40+
runner: Union[Runner, None],
41+
kwargs: dict,
42+
):
43+
if runner:
44+
if result.status.name in status_to_save:
45+
46+
metadata = {
47+
"seed": kwargs.get("seed"),
48+
"target": kwargs.get("target"),
49+
"oracle": kwargs.get("oracle"),
50+
"input_generator": kwargs.get("valid_input_generator"),
51+
"fuzzer": kwargs.get("fuzzer"),
52+
"regex_num": kwargs.get("regex_num"),
53+
"inputs_num": kwargs.get("inputs_num"),
54+
}
55+
56+
dir_path = runner.save(output_path)
57+
58+
metadata_json = json.dumps(metadata)
59+
metadata_path = Path(dir_path) / "metadata.json"
60+
with open(metadata_path.absolute(), "w") as f:
61+
f.write(metadata_json)
62+
63+
runner.clean()
64+
65+
return result
66+
3467
def harness(
3568
regex: str,
3669
primary_runner_cls: Type[Runner],
@@ -54,26 +87,53 @@ def harness(
5487
"""
5588
regex = regex
5689
inp_num = len(inputs)
57-
output_path = kwargs.get("output")
90+
output_path = kwargs.get("save_output", "")
91+
status_to_save = kwargs.get("save", None) or []
92+
5893
try:
5994
primary_runner = primary_runner_cls(regex, {})
6095
except RegexCompileError as e:
61-
return HarnessResult(regex, inp_num, oracle, [], HarnessStatus.INVALID_SEED, str(e))
96+
return _return_harness_result(
97+
HarnessResult(regex, inp_num, oracle, [], HarnessStatus.INVALID_SEED, str(e)),
98+
status_to_save,
99+
output_path,
100+
None,
101+
kwargs,
102+
)
62103

63104
try:
64105
secondary_runner = secondary_runner_cls(regex, kwargs)
65106
except RegexCompileError as e:
66-
return HarnessResult(regex, inp_num, oracle, [], HarnessStatus.COMPILE_ERROR, str(e))
107+
return _return_harness_result(
108+
HarnessResult(regex, inp_num, oracle, [], HarnessStatus.COMPILE_ERROR, str(e)),
109+
status_to_save,
110+
output_path,
111+
None,
112+
kwargs
113+
)
67114

68115
failed_inputs = []
116+
69117
for input in inputs:
70118
primary_runner_str = None
71119
try:
72120
primary_runner_status, primary_runner_str = primary_runner.match(input)
73121
if primary_runner_status != oracle:
74-
return HarnessResult(regex, inp_num, oracle, [], HarnessStatus.INVALID_SEED)
122+
return _return_harness_result(
123+
HarnessResult(regex, inp_num, oracle, [], HarnessStatus.INVALID_SEED),
124+
status_to_save,
125+
output_path,
126+
None,
127+
kwargs
128+
)
75129
except RegexRunError as e:
76-
return HarnessResult(regex, inp_num, oracle, [], HarnessStatus.INVALID_SEED, str(e))
130+
return _return_harness_result(
131+
HarnessResult(regex, inp_num, oracle, [], HarnessStatus.INVALID_SEED, str(e)),
132+
status_to_save,
133+
output_path,
134+
None,
135+
kwargs
136+
)
77137
try:
78138
secondary_runner_status, secondary_runner_str = secondary_runner.match(input)
79139
if secondary_runner_status != oracle:
@@ -84,11 +144,27 @@ def harness(
84144
# failed_inputs.append(input)
85145
except RegexRunError as e:
86146
secondary_runner.save(output_path)
87-
return HarnessResult(regex, inp_num, oracle, [input], HarnessStatus.RUN_ERROR, str(e))
147+
return _return_harness_result(
148+
HarnessResult(regex, inp_num, oracle, [input], HarnessStatus.RUN_ERROR, str(e)),
149+
status_to_save,
150+
output_path,
151+
secondary_runner,
152+
kwargs
153+
)
88154

89155
if len(failed_inputs) > 0:
90-
secondary_runner.save(output_path)
91-
return HarnessResult(regex, inp_num, oracle, failed_inputs, HarnessStatus.FAILED)
156+
return _return_harness_result(
157+
HarnessResult(regex, inp_num, oracle, failed_inputs, HarnessStatus.FAILED),
158+
status_to_save,
159+
output_path,
160+
secondary_runner,
161+
kwargs
162+
)
92163

93-
secondary_runner.clean()
94-
return HarnessResult(regex, inp_num, oracle, [], HarnessStatus.SUCCESS)
164+
return _return_harness_result(
165+
HarnessResult(regex, inp_num, oracle, [], HarnessStatus.SUCCESS),
166+
status_to_save,
167+
output_path,
168+
secondary_runner,
169+
kwargs
170+
)

src/zkregex_fuzzer/regexgen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class DatabaseRegexGenerator(RegexGenerator):
7878
Generate regexes using a database of regexes.
7979
"""
8080

81-
def __init__(self, dir_path: str = None):
81+
def __init__(self, dir_path: str = ""):
8282
dir_path = dir_path or self._get_default_path()
8383
self.database = self._get_database_from_path(dir_path)
8484

src/zkregex_fuzzer/runner/base_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ def clean(self) -> None:
5656
"""
5757
pass
5858

59-
def save(self, path: str) -> None:
59+
def save(self, path: str) -> str:
6060
"""
6161
Save any produced temporary files.
6262
"""
63-
pass
63+
return ""
6464

src/zkregex_fuzzer/runner/circom.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ def witness_gen(cls, wasm_file_path: str, input_path: str) -> str:
152152
]
153153
result = subprocess.run(cmd, capture_output=True, text=True)
154154

155+
logger.debug(" ".join(cmd))
155156
if result.returncode != 0:
156157
raise RegexRunError(f"Error running with SnarkJS: {result.stdout}")
157158

@@ -233,6 +234,8 @@ class CircomRunner(Runner):
233234
"""
234235

235236
def __init__(self, regex: str, kwargs: dict):
237+
self._circom_path = ""
238+
self._input_path = ""
236239
self._wasm_path = ""
237240
self._r1cs_path = ""
238241
self._zkey_path = ""
@@ -279,6 +282,8 @@ def compile(self, regex: str) -> None:
279282
f.write("\n\n")
280283
f.write("component main {public [msg]} = " + f"{self._template_name}({self._circom_max_input_size});")
281284

285+
self._circom_path = circom_file_path
286+
282287
# Compile the circom code to wasm
283288
logger.debug(f"Compiling circom code starts")
284289
self._wasm_path, self._r1cs_path = CircomSubprocess.compile(circom_file_path, self._link_path)
@@ -302,6 +307,8 @@ def match(self, input: str) -> tuple[bool, str]:
302307
with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp_file:
303308
tmp_file.write(json.dumps({"msg": numeric_input}).encode())
304309
input_path = tmp_file.name
310+
311+
self._input_path = input_path
305312

306313
# Skip if input is larger than circuit max input size
307314
if len(numeric_input) > self._circom_max_input_size:
@@ -311,8 +318,6 @@ def match(self, input: str) -> tuple[bool, str]:
311318
logger.debug(f"Generating witness starts")
312319
witness_path = SnarkjsSubprocess.witness_gen(self._wasm_path, input_path)
313320
logger.debug(f"Generating witness ends")
314-
# Remove input file
315-
Path(input_path).unlink()
316321

317322
# Also run the proving backend if the flag is set
318323
if self._run_the_prover:
@@ -336,16 +341,22 @@ def clean(self):
336341
# Remove all temporary files
337342
if self._wasm_path:
338343
shutil.rmtree(Path(self._wasm_path).parent)
339-
Path.unlink(self._r1cs_path, True)
340-
Path.unlink(self._zkey_path, True)
341-
Path.unlink(self._vkey_path, True)
344+
if self._circom_path: Path(self._circom_path).unlink(True)
345+
if self._input_path: Path(self._input_path).unlink(True)
346+
if self._r1cs_path: Path(self._r1cs_path).unlink(True)
347+
if self._zkey_path: Path(self._zkey_path).unlink(True)
348+
if self._vkey_path: Path(self._vkey_path).unlink(True)
342349

343-
def save(self, path):
350+
def save(self, path) -> str:
351+
circom_path = Path(self._circom_path)
352+
input_path = Path(self._input_path)
344353
r1cs_path = Path(self._r1cs_path)
345354
wasm_path = Path(self._wasm_path)
346355
target_path = Path(path).resolve() / f"output_{r1cs_path.stem}"
347356
target_path.mkdir()
348357

358+
circom_path.replace(target_path / circom_path.name)
359+
input_path.replace(target_path / input_path.name)
349360
r1cs_path.replace(target_path / r1cs_path.name)
350361
wasm_path.replace(target_path / wasm_path.name)
351362

@@ -354,4 +365,6 @@ def save(self, path):
354365
vkey_path = Path(self._vkey_path)
355366

356367
zkey_path.replace(target_path / zkey_path.name)
357-
vkey_path.replace(target_path / vkey_path.name)
368+
vkey_path.replace(target_path / vkey_path.name)
369+
370+
return str(target_path)

0 commit comments

Comments
 (0)