|
| 1 | +"""Grammar Error Factory (Phase 3). |
| 2 | +
|
| 3 | +Provides structured, introspection-enriched grammar errors referencing |
| 4 | +canonical rules (U1-U4 primary, U6 confinement read-only) and TNFR |
| 5 | +invariants. Reuses existing :class:`StructuralGrammarError` base from |
| 6 | +``grammar_types`` to avoid duplication. |
| 7 | +
|
| 8 | +Why a Factory? |
| 9 | +-------------- |
| 10 | +Existing validation returns (bool, message) pairs. Downstream tooling |
| 11 | +needs richer payloads tying violations to: |
| 12 | + - Rule identifier (U1a, U1b, U2, U3, U4a, U4b, U6) |
| 13 | + - Related canonical invariants (AGENTS.md § Canonical Invariants) |
| 14 | + - Operator metadata (category, contracts, grammar roles) |
| 15 | + - Sequence context (window slice, involved operators) |
| 16 | +
|
| 17 | +The factory assembles this without modifying core validator logic, |
| 18 | +preserving backward compatibility. |
| 19 | +
|
| 20 | +Public API |
| 21 | +---------- |
| 22 | +collect_grammar_errors(sequence, epi_initial=0.0) -> list[ExtendedGrammarError] |
| 23 | +make_grammar_error(rule, candidate, message, sequence, index=None) |
| 24 | + -> ExtendedGrammarError |
| 25 | +
|
| 26 | +Invariants Mapping (Minimal) |
| 27 | +---------------------------- |
| 28 | +U1a -> (1,4) # EPI initiation & operator closure precondition |
| 29 | +U1b -> (4) # Closure / bounded sequence end |
| 30 | +U2 -> (3,4) # ΔNFR semantics & closure (stabilizer presence) |
| 31 | +U3 -> (5) # Phase verification |
| 32 | +U4a -> (3,4,5) # Trigger handling (ΔNFR pressure + handlers + phase) |
| 33 | +U4b -> (3,4,7) # Transformers need stabilised base & fractality preserved |
| 34 | +U6 -> (3,9) # Potential confinement + metrics integrity |
| 35 | +
|
| 36 | +NOTE: Mapping kept intentionally lean; can be extended in future without |
| 37 | +breaking existing consumers. |
| 38 | +""" |
| 39 | + |
| 40 | +from __future__ import annotations |
| 41 | + |
| 42 | +from dataclasses import dataclass |
| 43 | +from typing import Any, List, Sequence |
| 44 | + |
| 45 | +from .definitions import get_operator_meta |
| 46 | +from .grammar_core import GrammarValidator |
| 47 | +from .grammar_types import StructuralGrammarError |
| 48 | + |
| 49 | +__all__ = [ |
| 50 | + "ExtendedGrammarError", |
| 51 | + "collect_grammar_errors", |
| 52 | + "make_grammar_error", |
| 53 | +] |
| 54 | + |
| 55 | + |
| 56 | +_RULE_INVARIANTS = { |
| 57 | + "U1a": (1, 4), |
| 58 | + "U1b": (4,), |
| 59 | + "U2": (3, 4), |
| 60 | + "U3": (5,), |
| 61 | + "U4a": (3, 4, 5), |
| 62 | + "U4b": (3, 4, 7), |
| 63 | + "U6_CONFINEMENT": (3, 9), |
| 64 | +} |
| 65 | + |
| 66 | + |
| 67 | +@dataclass(slots=True) |
| 68 | +class ExtendedGrammarError: |
| 69 | + """Structured grammar error with invariant & operator metadata. |
| 70 | +
|
| 71 | + Attributes |
| 72 | + ---------- |
| 73 | + rule : str |
| 74 | + Grammar rule identifier (U1a, U2, ...) |
| 75 | + candidate : str |
| 76 | + Operator mnemonic or 'sequence' |
| 77 | + message : str |
| 78 | + Human-readable description |
| 79 | + invariants : tuple[int, ...] |
| 80 | + Canonical invariant IDs related to violation |
| 81 | + operator_meta : dict[str, Any] | None |
| 82 | + Introspection metadata if candidate resolves to operator |
| 83 | + order : tuple[str, ...] |
| 84 | + Canonical sequence slice (may be full sequence) |
| 85 | + index : int | None |
| 86 | + Index in sequence of offending operator (if applicable) |
| 87 | + """ |
| 88 | + |
| 89 | + rule: str |
| 90 | + candidate: str |
| 91 | + message: str |
| 92 | + invariants: tuple[int, ...] |
| 93 | + operator_meta: dict[str, Any] | None |
| 94 | + order: tuple[str, ...] |
| 95 | + index: int | None = None |
| 96 | + |
| 97 | + def to_payload(self) -> dict[str, Any]: # noqa: D401 |
| 98 | + return { |
| 99 | + "rule": self.rule, |
| 100 | + "candidate": self.candidate, |
| 101 | + "message": self.message, |
| 102 | + "invariants": self.invariants, |
| 103 | + "operator_meta": self.operator_meta, |
| 104 | + "order": self.order, |
| 105 | + "index": self.index, |
| 106 | + } |
| 107 | + |
| 108 | + def to_structural_error(self) -> StructuralGrammarError: |
| 109 | + """Convert to existing StructuralGrammarError for compatibility.""" |
| 110 | + return StructuralGrammarError( |
| 111 | + rule=self.rule, |
| 112 | + candidate=self.candidate, |
| 113 | + message=self.message, |
| 114 | + order=list(self.order), |
| 115 | + context={ |
| 116 | + "invariants": self.invariants, |
| 117 | + "operator_meta": self.operator_meta, |
| 118 | + "index": self.index, |
| 119 | + }, |
| 120 | + ) |
| 121 | + |
| 122 | + |
| 123 | +def make_grammar_error( |
| 124 | + *, |
| 125 | + rule: str, |
| 126 | + candidate: str, |
| 127 | + message: str, |
| 128 | + sequence: Sequence[str], |
| 129 | + index: int | None = None, |
| 130 | +) -> ExtendedGrammarError: |
| 131 | + """Create an ExtendedGrammarError with invariants + introspection.""" |
| 132 | + invariants = _RULE_INVARIANTS.get(rule, ()) |
| 133 | + op_meta: dict[str, Any] | None = None |
| 134 | + try: |
| 135 | + meta = get_operator_meta(candidate) |
| 136 | + except KeyError: |
| 137 | + meta = None |
| 138 | + if meta is not None: |
| 139 | + op_meta = { |
| 140 | + "name": meta.name, |
| 141 | + "mnemonic": meta.mnemonic, |
| 142 | + "category": meta.category, |
| 143 | + "grammar_roles": meta.grammar_roles, |
| 144 | + "contracts": meta.contracts, |
| 145 | + } |
| 146 | + return ExtendedGrammarError( |
| 147 | + rule=rule, |
| 148 | + candidate=candidate, |
| 149 | + message=message, |
| 150 | + invariants=invariants, |
| 151 | + operator_meta=op_meta, |
| 152 | + order=tuple(sequence), |
| 153 | + index=index, |
| 154 | + ) |
| 155 | + |
| 156 | + |
| 157 | +def collect_grammar_errors( |
| 158 | + sequence: Sequence[Any], |
| 159 | + epi_initial: float = 0.0, |
| 160 | +) -> List[ExtendedGrammarError]: |
| 161 | + """Run canonical validations and build structured error list. |
| 162 | +
|
| 163 | + Only U1-U4 are active fail conditions; U6 confinement would attach |
| 164 | + separately when integrated with telemetry (read-only safety check). |
| 165 | + """ |
| 166 | + validator = GrammarValidator() |
| 167 | + errors: List[ExtendedGrammarError] = [] |
| 168 | + |
| 169 | + # Canonical operator names for reporting |
| 170 | + canonical = [ |
| 171 | + getattr(op, "canonical_name", getattr(op, "name", "?")) |
| 172 | + for op in sequence |
| 173 | + ] |
| 174 | + |
| 175 | + # U1a |
| 176 | + ok, msg = validator.validate_initiation(list(sequence), epi_initial) |
| 177 | + if not ok: |
| 178 | + errors.append( |
| 179 | + make_grammar_error( |
| 180 | + rule="U1a", |
| 181 | + candidate=canonical[0] if canonical else "sequence", |
| 182 | + message=msg, |
| 183 | + sequence=canonical, |
| 184 | + index=0 if canonical else None, |
| 185 | + ) |
| 186 | + ) |
| 187 | + # U1b |
| 188 | + ok, msg = validator.validate_closure(list(sequence)) |
| 189 | + if not ok: |
| 190 | + errors.append( |
| 191 | + make_grammar_error( |
| 192 | + rule="U1b", |
| 193 | + candidate=canonical[-1] if canonical else "sequence", |
| 194 | + message=msg, |
| 195 | + sequence=canonical, |
| 196 | + index=(len(canonical) - 1) if canonical else None, |
| 197 | + ) |
| 198 | + ) |
| 199 | + # U2 |
| 200 | + ok, msg = validator.validate_convergence(list(sequence)) |
| 201 | + if not ok: |
| 202 | + errors.append( |
| 203 | + make_grammar_error( |
| 204 | + rule="U2", |
| 205 | + candidate="sequence", |
| 206 | + message=msg, |
| 207 | + sequence=canonical, |
| 208 | + ) |
| 209 | + ) |
| 210 | + # U3 |
| 211 | + ok, msg = validator.validate_resonant_coupling(list(sequence)) |
| 212 | + if not ok: |
| 213 | + # Find first coupling/resonance candidate if available |
| 214 | + idx = next( |
| 215 | + ( |
| 216 | + i |
| 217 | + for i, c in enumerate(canonical) |
| 218 | + if c in {"coupling", "resonance"} |
| 219 | + ), |
| 220 | + None, |
| 221 | + ) |
| 222 | + cand = canonical[idx] if idx is not None else "sequence" |
| 223 | + errors.append( |
| 224 | + make_grammar_error( |
| 225 | + rule="U3", |
| 226 | + candidate=cand, |
| 227 | + message=msg, |
| 228 | + sequence=canonical, |
| 229 | + index=idx, |
| 230 | + ) |
| 231 | + ) |
| 232 | + # U4 (validator returns both in one pass) |
| 233 | + ok, msg = validator.validate_bifurcation(list(sequence)) |
| 234 | + if not ok: |
| 235 | + # Distinguish U4a vs U4b heuristically using message content |
| 236 | + rule_key = "U4b" if "transform" in msg.lower() else "U4a" |
| 237 | + errors.append( |
| 238 | + make_grammar_error( |
| 239 | + rule=rule_key, |
| 240 | + candidate="sequence", |
| 241 | + message=msg, |
| 242 | + sequence=canonical, |
| 243 | + ) |
| 244 | + ) |
| 245 | + return errors |
0 commit comments