1- from zkregex_fuzzer .utils import is_valid_regex , has_lazy_quantifier , correct_carret_position , check_zkregex_rules_basic
1+ from zkregex_fuzzer .utils import (
2+ check_zkregex_rules_basic ,
3+ correct_carret_position ,
4+ has_lazy_quantifier ,
5+ is_valid_regex ,
6+ )
27
38
49def test_valid_regex ():
@@ -31,15 +36,17 @@ def test_invalid_regex():
3136def test_has_lazy_quantifier ():
3237 """Test that has_lazy_quantifier returns True for patterns with lazy quantifiers."""
3338 patterns = [
34- (r"ab*c" , False ),
35- (r"a+?" , True ),
36- (r"(abc){2,5}?" , True ),
37- (r"xyz" , False ),
38- (r"[a-z]*" , False ),
39- (r".+?" , True ),
39+ (r"ab*c" , False ),
40+ (r"a+?" , True ),
41+ (r"(abc){2,5}?" , True ),
42+ (r"xyz" , False ),
43+ (r"[a-z]*" , False ),
44+ (r".+?" , True ),
4045 ]
4146 for pattern , expected in patterns :
42- assert has_lazy_quantifier (pattern ) == expected , f"Expected { pattern } to have lazy quantifier: { expected } "
47+ assert has_lazy_quantifier (pattern ) == expected , (
48+ f"Expected { pattern } to have lazy quantifier: { expected } "
49+ )
4350
4451
4552def test_correct_carret_position ():
@@ -49,53 +56,49 @@ def test_correct_carret_position():
4956 # Test cases with expected results
5057 test_cases = [
5158 # Basic cases
52- (r"^abc" , True ), # Start of regex
53- (r"abc" , True ), # No caret
54- (r"abc^" , False ), # Invalid position at end
55-
59+ (r"^abc" , True ), # Start of regex
60+ (r"abc" , True ), # No caret
61+ (r"abc^" , False ), # Invalid position at end
5662 # Capturing group cases
57- (r"(^abc)" , False ), # Start of capturing group
58- (r"(|^)" , True ), # Alternative with caret
59- (r"(abc|^def)" , False ), # Caret in middle of alternative
60- (r"(|^)" , True ), # Simple alternative with caret
61- (r"(\n|^)" , True ), # Newline alternative
62- (r"abc(\n|^)" , False ), # Not at start of regex
63- (r"(\r|^)" , True ), # Carriage return alternative
64- (r"(\r\n|^)" , True ), # CRLF alternative
65- (r"(\n\r|^)" , True ), # CRLF alternative
66- (r"( |^)" , True ), # Spaces before alternative
67-
63+ (r"(^abc)" , False ), # Start of capturing group
64+ (r"(|^)" , True ), # Alternative with caret
65+ (r"(abc|^def)" , False ), # Caret in middle of alternative
66+ (r"(|^)" , True ), # Simple alternative with caret
67+ (r"(\n|^)" , True ), # Newline alternative
68+ (r"abc(\n|^)" , False ), # Not at start of regex
69+ (r"(\r|^)" , True ), # Carriage return alternative
70+ (r"(\r\n|^)" , True ), # CRLF alternative
71+ (r"(\n\r|^)" , True ), # CRLF alternative
72+ (r"( |^)" , True ), # Spaces before alternative
6873 # Character class cases
69- (r"[^abc]" , True ), # Simple negated character class
74+ (r"[^abc]" , True ), # Simple negated character class
7075 (r"abc[^xyz]def" , True ), # Negated character class in middle
71- (r"[abc^]" , False ), # Caret not at start of character class
72- (r"[[^]]" , True ), # Nested character class
73- (r"[^]" , True ), # Empty negated character class
74-
76+ (r"[abc^]" , False ), # Caret not at start of character class
77+ (r"[[^]]" , True ), # Nested character class
78+ (r"[^]" , True ), # Empty negated character class
7579 # Multiple caret cases
76- (r"^abc[^xyz]" , True ), # Valid multiple carets
77- (r"^abc^" , False ), # Invalid multiple carets
80+ (r"^abc[^xyz]" , True ), # Valid multiple carets
81+ (r"^abc^" , False ), # Invalid multiple carets
7882 (r"[^abc][^xyz]" , True ), # Multiple negated character classes
79-
8083 # Edge cases
81- (r"" , True ), # Empty string
82- (r"^" , True ), # Just caret
83- (r"[]^]" , False ), # Invalid character class
84- (r"(^)|^" , False ), # Multiple start anchors
84+ (r"" , True ), # Empty string
85+ (r"^" , True ), # Just caret
86+ (r"[]^]" , False ), # Invalid character class
87+ (r"(^)|^" , False ), # Multiple start anchors
8588 (r"(^abc|^def)" , False ), # Multiple start anchors in group
86-
8789 # Complex cases
88- (r"(|^)abc[^xyz]123" , True ), # Combination of valid cases
89- (r"^abc[^xyz](|^)def" , False ), # Invalid multiple start anchors
90- (r"[^abc]^[^xyz]" , False ), # Invalid caret between character classes
91- (r"( \r\n |^)abc" , True ), # Complex whitespace before alternative
92-
90+ (r"(|^)abc[^xyz]123" , True ), # Combination of valid cases
91+ (r"^abc[^xyz](|^)def" , False ), # Invalid multiple start anchors
92+ (r"[^abc]^[^xyz]" , False ), # Invalid caret between character classes
93+ (r"( \r\n |^)abc" , True ), # Complex whitespace before alternative
9394 # Escaped caret cases
9495 (r"abc\^" , True ),
9596 (r"abc\^def" , True ),
9697 ]
9798 for regex , expected in test_cases :
98- assert correct_carret_position (regex ) == expected , f"Expected { regex } to have correct caret position: { expected } "
99+ assert correct_carret_position (regex ) == expected , (
100+ f"Expected { regex } to have correct caret position: { expected } "
101+ )
99102
100103
101104def test_check_zkregex_rules_basic ():
@@ -109,48 +112,47 @@ def test_check_zkregex_rules_basic():
109112 (r"abc$def" , (True , True )), # Valid dollar sign in middle
110113 (r"abc" , (True , True )), # No dollar sign
111114 (r"$abc" , (True , True )), # Dollar sign at start
112-
113115 # 2. Caret position tests
114116 (r"^abc" , (True , True )), # Valid caret at start
115117 (r"(|^)abc" , (True , True )), # Valid caret in alternative
116118 (r"(\r\n|^)abc" , (True , True )), # Valid caret with CRLF alternative
117119 (r"[^abc]" , (True , True )), # Valid caret in character class
118120 (r"abc^" , (False , True )), # Invalid caret at end
119121 (r"abc^def" , (False , True )), # Invalid caret in middle
120-
121122 # 3. Lazy quantifier tests
122123 (r"abc*" , (True , True )), # Valid greedy quantifier
123124 (r"abc*?" , (False , True )), # Invalid lazy star quantifier
124125 (r"abc+?" , (False , True )), # Invalid lazy plus quantifier
125126 (r"abc??" , (False , True )), # Invalid lazy question mark quantifier
126127 (r"abc{1,2}?" , (False , True )), # Invalid lazy range quantifier
127-
128128 # 4. Combined valid cases
129129 (r"^abc$" , (True , True )), # Valid start and end anchors
130130 (r"(|^)abc$" , (True , True )), # Valid alternative and end anchor
131131 (r"[^abc].*$" , (True , True )), # Valid character class and end anchor
132-
133132 # 5. Combined invalid cases
134133 (r"^abc$def" , (True , True )), # Valid dollar position with caret
135134 (r"abc^def$" , (False , True )), # Invalid caret with dollar
136135 (r"[^abc]*?$" , (False , True )), # Invalid lazy quantifier with valid anchors
137-
138136 # 6. Complex cases
139137 (r"(|^)abc[^xyz]*$" , (True , True )), # Complex valid regex
140138 (r"^abc[^xyz]+def$" , (True , True )), # Complex valid regex with quantifiers
141- (r"(|^)abc*?[^xyz]$" , (False , True )), # Complex invalid regex with lazy quantifier
139+ (
140+ r"(|^)abc*?[^xyz]$" ,
141+ (False , True ),
142+ ), # Complex invalid regex with lazy quantifier
142143 (r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$" , (True , True )),
143-
144144 # 7. The common regexes from zkemail
145145 (r">[^<>]+<.*" , (True , True )),
146146 (r"(\r\n|^)to:[^\r\n]+\r\n" , (True , True )),
147147 (r"(\r\n|^)subject:[^\r\n]+\r\n" , (True , True )),
148- #(r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/]+@[A-Za-z0-9.\-@]+", (True, True)),
149- #(r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/@]+@[A-Za-z0-9.\-]+", (True, True)),
148+ # (r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/]+@[A-Za-z0-9.\-@]+", (True, True)),
149+ # (r"[A-Za-z0-9!#$%&'*+=?\-\^_`{|}~.\/@]+@[A-Za-z0-9.\-]+", (True, True)),
150150 (r"(\r\n|^)from:[^\r\n]+\r\n" , (True , True )),
151151 (r"(\r\n|^)dkim-signature:([a-z]+=[^;]+; )+bh=[a-zA-Z0-9+/=]+;" , (True , True )),
152152 (r"(\r\n|^)dkim-signature:([a-z]+=[^;]+; )+t=[0-9]+;" , (True , True )),
153153 (r"(\r\n|^)message-id:<[A-Za-z0-9=@\.\+_-]+>\r\n" , (True , True )),
154154 ]
155155 for regex , expected in test_cases :
156- assert check_zkregex_rules_basic (regex ) == expected , f"Expected { regex } to have correct zk-regex rules: { expected } "
156+ assert check_zkregex_rules_basic (regex ) == expected , (
157+ f"Expected { regex } to have correct zk-regex rules: { expected } "
158+ )
0 commit comments