From 46e2edad446d70496fcd8a5f5913b19fa45d6be8 Mon Sep 17 00:00:00 2001 From: Andrew Monostate <165841485+andrewmonostate@users.noreply.github.com> Date: Sun, 17 Aug 2025 21:07:57 -0700 Subject: [PATCH] Fix Python reserved keyword 'class' in auto-generated Kaitai parser Problem: The auto-generated openpgp_message.py uses 'self.class = ...' which causes SyntaxError since 'class' is a reserved keyword in Python. Solution: Added automatic patching in polyfile/__init__.py that: - Detects if openpgp_message.py uses the reserved keyword - Automatically replaces self.class with self.class_ before module imports - Runs silently without breaking imports if any issues occur - Ensures the package works out-of-the-box without manual intervention Changes: - polyfile/__init__.py: Added _fix_class_keyword_if_needed() auto-fix - polyfile/kaitai/parsers/openpgp_message.py: Auto-fixed by import hook - fix_class_keyword.py: Standalone script for manual fixing if needed - README.md: Added Known Issues section documenting the problem and fix - .gitignore: Added *.egg-info/ to ignore build artifacts Note: A fix has been submitted upstream to the Kaitai Struct compiler. Once merged, this workaround will no longer be necessary. --- .gitignore | 1 + README.md | 17 ++++++++ fix_class_keyword.py | 46 ++++++++++++++++++++++ polyfile/__init__.py | 29 ++++++++++++++ polyfile/kaitai/parsers/openpgp_message.py | 8 ++-- 5 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 fix_class_keyword.py diff --git a/.gitignore b/.gitignore index fc186aa1..8a9218ef 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ polyfile/trie_partial.gz *.pyc .vscode/ .vscode/* +*.egg-info/ diff --git a/README.md b/README.md index 3c017210..fa8c0824 100644 --- a/README.md +++ b/README.md @@ -123,3 +123,20 @@ Bits](https://www.trailofbits.com/) with funding from the Defense Advanced Research Projects Agency (DARPA) under the SafeDocs program as a subcontractor to [Galois](https://galois.com). It is licensed under the [Apache 2.0 license](LICENSE). © 2019, Trail of Bits. + +## Known Issues & Fixes + +### Python Reserved Keyword in Auto-generated Code + +The Kaitai Struct compiler may generate Python code that uses `class` as a variable name (e.g., `self.class = ...`), which is invalid syntax since `class` is a reserved keyword in Python. This issue specifically affects the auto-generated `polyfile/kaitai/parsers/openpgp_message.py` file. + +**Automatic Fix:** As of this version, polyfile automatically patches this issue on import. The fix is applied transparently when you first import the package, ensuring it works out-of-the-box. + +**Manual Fix:** If you need to manually apply the fix (e.g., for development or debugging), you can run the included `fix_class_keyword.py` script: +```bash +python fix_class_keyword.py +``` + +This will patch all occurrences of `self.class` to `self.class_` in the affected file. + +**Note:** A fix has been submitted upstream to the Kaitai Struct compiler. Once that is merged and a new version of the parsers is generated, this workaround will no longer be necessary. diff --git a/fix_class_keyword.py b/fix_class_keyword.py new file mode 100644 index 00000000..3735ff08 --- /dev/null +++ b/fix_class_keyword.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +""" +Fix the 'self.class' reserved keyword issue in auto-generated Kaitai parsers. +This script replaces 'self.class' with 'self.class_' to make it valid Python. +""" + +import re +from pathlib import Path + +def fix_class_keyword(file_path): + """Replace self.class with self.class_ in the given file.""" + with open(file_path, 'r') as f: + content = f.read() + + # Replace self.class with self.class_ (but not self.class_name, etc.) + # Use word boundary to match exactly 'class' not 'class_something' + fixed_content = re.sub(r'\bself\.class\b', 'self.class_', content) + + # Also fix debug references ['class'] -> ['class_'] + fixed_content = re.sub(r"\['class'\]", "['class_']", fixed_content) + + # And fix SEQ_FIELDS if it contains "class" + fixed_content = re.sub(r'SEQ_FIELDS = \[(.*)"class"(.*)\]', r'SEQ_FIELDS = [\1"class_"\2]', fixed_content) + + if content != fixed_content: + with open(file_path, 'w') as f: + f.write(fixed_content) + print(f"Fixed: {file_path}") + return True + return False + +if __name__ == "__main__": + parser_dir = Path(__file__).parent / "polyfile" / "kaitai" / "parsers" + + if parser_dir.exists(): + # Fix openpgp_message.py + openpgp_file = parser_dir / "openpgp_message.py" + if openpgp_file.exists(): + if fix_class_keyword(openpgp_file): + print("Successfully fixed the 'class' keyword issue!") + else: + print("No changes needed for 'class' keyword.") + else: + print(f"Error: {openpgp_file} not found.") + else: + print(f"Error: Parser directory {parser_dir} not found.") diff --git a/polyfile/__init__.py b/polyfile/__init__.py index ae532736..7c696e46 100644 --- a/polyfile/__init__.py +++ b/polyfile/__init__.py @@ -1,3 +1,32 @@ +# Auto-fix for Kaitai Struct generated code using reserved keyword 'class' +import os +import re +from pathlib import Path + +def _fix_class_keyword_if_needed(): + """Automatically fix the 'class' keyword issue in openpgp_message.py if needed.""" + try: + parser_file = Path(__file__).parent / "kaitai" / "parsers" / "openpgp_message.py" + if parser_file.exists(): + with open(parser_file, 'r') as f: + content = f.read() + + # Check if the file needs fixing + if re.search(r'\bself\.class\b', content): + # Apply the fix + fixed_content = re.sub(r'\bself\.class\b', 'self.class_', content) + fixed_content = re.sub(r"\['class'\]", "['class_']", fixed_content) + fixed_content = re.sub(r'SEQ_FIELDS = \[(.*)"class"(.*)\]', r'SEQ_FIELDS = [\1"class_"\2]', fixed_content) + + with open(parser_file, 'w') as f: + f.write(fixed_content) + except Exception: + # Silently ignore any errors - don't break the import + pass + +# Run the fix before importing modules +_fix_class_keyword_if_needed() + from . import ( nes, pdf, diff --git a/polyfile/kaitai/parsers/openpgp_message.py b/polyfile/kaitai/parsers/openpgp_message.py index 69436a49..8a044d69 100644 --- a/polyfile/kaitai/parsers/openpgp_message.py +++ b/polyfile/kaitai/parsers/openpgp_message.py @@ -358,7 +358,7 @@ def _read(self): class RevocationKey(KaitaiStruct): - SEQ_FIELDS = ["class", "public_key_algorithm", "fingerprint"] + SEQ_FIELDS = ["class_", "public_key_algorithm", "fingerprint"] def __init__(self, _io, _parent=None, _root=None): self._io = _io self._parent = _parent @@ -366,9 +366,9 @@ def __init__(self, _io, _parent=None, _root=None): self._debug = collections.defaultdict(dict) def _read(self): - self._debug['class']['start'] = self._io.pos() - self.class = self._io.read_u1() - self._debug['class']['end'] = self._io.pos() + self._debug['class_']['start'] = self._io.pos() + self.class_ = self._io.read_u1() + self._debug['class_']['end'] = self._io.pos() self._debug['public_key_algorithm']['start'] = self._io.pos() self.public_key_algorithm = KaitaiStream.resolve_enum(OpenpgpMessage.PublicKeyAlgorithms, self._io.read_u1()) self._debug['public_key_algorithm']['end'] = self._io.pos()