|
5 | 5 | """ |
6 | 6 |
|
7 | 7 | import random |
| 8 | +import string |
| 9 | +from typing import Dict, Optional, Set |
8 | 10 |
|
9 | 11 | from automata.fa.dfa import DFA |
10 | 12 | from automata.fa.gnfa import GNFA |
@@ -226,3 +228,160 @@ def transform_dfa_to_single_accepting_state(dfa: DFA, strategy: str = "random") |
226 | 228 | ) |
227 | 229 | else: |
228 | 230 | return _merge_strategy(states, alphabet, transitions, initial, original_finals) |
| 231 | + |
| 232 | + |
| 233 | +def _get_alphabet( |
| 234 | + use_unicode: bool, num_states: int, min_size: int = 2, max_size: int = 10 |
| 235 | +) -> Set[str]: |
| 236 | + """ |
| 237 | + Generate a random alphabet for a DFA. |
| 238 | + """ |
| 239 | + alphabet_size = random.randint(min_size, max_size) |
| 240 | + if use_unicode: |
| 241 | + alphabet = set() |
| 242 | + while len(alphabet) < alphabet_size: |
| 243 | + codepoint = random.randint(0, 0x10FFFF) |
| 244 | + try: |
| 245 | + char = chr(codepoint) |
| 246 | + except ValueError: |
| 247 | + continue # skip invalid code points (if any) |
| 248 | + alphabet.add(char) |
| 249 | + else: |
| 250 | + # Restricted character set: letters, digits, punctuation, whitespace |
| 251 | + allowed_pool = ( |
| 252 | + string.ascii_letters |
| 253 | + + string.digits |
| 254 | + + string.punctuation |
| 255 | + + string.whitespace |
| 256 | + ) |
| 257 | + alphabet = set(random.sample(allowed_pool, alphabet_size)) |
| 258 | + return alphabet |
| 259 | + |
| 260 | + |
| 261 | +def generate_random_dfa( |
| 262 | + max_depth: int = 5, |
| 263 | + use_unicode: bool = False, |
| 264 | + single_final_state: bool = False, |
| 265 | + seed: Optional[int] = None, |
| 266 | +) -> DFA: |
| 267 | + """ |
| 268 | + Generate a random DFA with a given seed for reproducibility. |
| 269 | + """ |
| 270 | + # Seed the random number generator for reproducibility (if seed is given) |
| 271 | + if seed is not None: |
| 272 | + random.seed(seed) |
| 273 | + else: |
| 274 | + seed = random.randrange(0, 2**32) |
| 275 | + random.seed(seed) |
| 276 | + |
| 277 | + num_states = random.randint(1, max_depth) |
| 278 | + |
| 279 | + # Define state names (q0, q1, ..., qN) and the initial state |
| 280 | + states = {f"q{i}" for i in range(num_states)} |
| 281 | + initial_state = "q0" |
| 282 | + |
| 283 | + # Determine final state(s) |
| 284 | + if single_final_state: |
| 285 | + final_state = random.choice(list(states)) |
| 286 | + final_states = {final_state} |
| 287 | + else: |
| 288 | + # One or more final states (randomly chosen subset of states) |
| 289 | + num_finals = random.randint(1, num_states) # at least one final |
| 290 | + final_states = set(random.sample(list(states), num_finals)) |
| 291 | + |
| 292 | + alphabet = _get_alphabet(use_unicode, num_states) |
| 293 | + |
| 294 | + # Construct transitions: for each state and each symbol, choose a random next state |
| 295 | + transitions: Dict[str, Dict[str, str]] = {} |
| 296 | + for state in states: |
| 297 | + transitions[state] = {} |
| 298 | + for sym in alphabet: |
| 299 | + transitions[state][sym] = random.choice(list(states)) |
| 300 | + |
| 301 | + # Ensure at least one self-loop (cycle) |
| 302 | + loop_exists = any( |
| 303 | + state == dest for state in states for dest in transitions[state].values() |
| 304 | + ) |
| 305 | + if not loop_exists: |
| 306 | + # Add a self-loop on a random state with a random symbol |
| 307 | + some_state = random.choice(list(states)) |
| 308 | + some_symbol = random.choice(list(alphabet)) |
| 309 | + transitions[some_state][some_symbol] = some_state |
| 310 | + |
| 311 | + # Ensure at least one branching point (one state with two different outgoing targets) |
| 312 | + if len(alphabet) >= 2: |
| 313 | + branching_exists = any(len(set(transitions[s].values())) >= 2 for s in states) |
| 314 | + if not branching_exists: |
| 315 | + # Force branching on the initial state (as an example) |
| 316 | + sym_list = list(alphabet) |
| 317 | + # Make sure we have at least two symbols to create a branch |
| 318 | + if len(sym_list) >= 2: |
| 319 | + sym1, sym2 = sym_list[0], sym_list[1] |
| 320 | + # Assign different targets for sym1 and sym2 from the initial state |
| 321 | + if transitions[initial_state][sym1] == transitions[initial_state][sym2]: |
| 322 | + # Pick a different state for sym2 if both symbols currently go to the same target |
| 323 | + possible_targets = list(states - {transitions[initial_state][sym1]}) |
| 324 | + if possible_targets: |
| 325 | + transitions[initial_state][sym2] = random.choice( |
| 326 | + possible_targets |
| 327 | + ) |
| 328 | + # (If no possible_targets, it means only one state exists, handled by loop above) |
| 329 | + |
| 330 | + # Introduce an "optional" path (allow skipping or taking a symbol): |
| 331 | + # We do this by creating an alternate route to a final state. |
| 332 | + if single_final_state and len(states) > 1: |
| 333 | + # For a single final state, ensure multiple paths (direct & indirect) to it |
| 334 | + final_state = next(iter(final_states)) # the only final state |
| 335 | + # If initial state doesn't already have a direct transition to final, add one |
| 336 | + if final_state not in transitions[initial_state].values(): |
| 337 | + sym = random.choice(list(alphabet)) |
| 338 | + transitions[initial_state][sym] = final_state |
| 339 | + # Also ensure an indirect path: find a symbol from initial that goes to an intermediate state |
| 340 | + intermediate_symbols = [ |
| 341 | + sym |
| 342 | + for sym, dest in transitions[initial_state].items() |
| 343 | + if dest != final_state |
| 344 | + ] |
| 345 | + if intermediate_symbols: |
| 346 | + sym = intermediate_symbols[0] |
| 347 | + intermediate_state = transitions[initial_state][sym] |
| 348 | + # Link the intermediate state to the final state on some symbol (if not already final) |
| 349 | + if intermediate_state != final_state: |
| 350 | + sym2 = random.choice(list(alphabet)) |
| 351 | + transitions[intermediate_state][sym2] = final_state |
| 352 | + elif not single_final_state: |
| 353 | + # If multiple finals are allowed, we can treat the start state as an optional accepting state |
| 354 | + # (Accept empty string or early termination) |
| 355 | + if initial_state not in final_states: |
| 356 | + final_states.add(initial_state) |
| 357 | + |
| 358 | + # Construct the DFA with the generated components |
| 359 | + dfa = DFA( |
| 360 | + states=states, |
| 361 | + input_symbols=alphabet, |
| 362 | + transitions=transitions, |
| 363 | + initial_state=initial_state, |
| 364 | + final_states=final_states, |
| 365 | + ) |
| 366 | + |
| 367 | + # Minimize the DFA for a simpler equivalent automaton |
| 368 | + try: |
| 369 | + # If automata-lib provides a direct minification method |
| 370 | + dfa = dfa.minify() |
| 371 | + except AttributeError: |
| 372 | + # Fallback: convert to NFA and use DFA.from_nfa for minimization |
| 373 | + nfa_transitions: Dict[str, Dict[str, Set[str]]] = {} |
| 374 | + for state, trans in transitions.items(): |
| 375 | + # Each DFA transition becomes a singleton set in the NFA transition |
| 376 | + nfa_transitions[state] = {sym: {dest} for sym, dest in trans.items()} |
| 377 | + nfa = NFA( |
| 378 | + states=states, |
| 379 | + input_symbols=alphabet, |
| 380 | + transitions=nfa_transitions, |
| 381 | + initial_state=initial_state, |
| 382 | + final_states=final_states, |
| 383 | + ) |
| 384 | + # Convert NFA to DFA with minimization |
| 385 | + dfa = DFA.from_nfa(nfa, minify=True) |
| 386 | + |
| 387 | + return dfa |
0 commit comments