diff --git a/.gitignore b/.gitignore index fc186aa..8a9218e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ polyfile/trie_partial.gz *.pyc .vscode/ .vscode/* +*.egg-info/ diff --git a/README.md b/README.md index 3c01721..fa8c082 100644 --- a/README.md +++ b/README.md @@ -123,3 +123,20 @@ Bits](https://www.trailofbits.com/) with funding from the Defense Advanced Research Projects Agency (DARPA) under the SafeDocs program as a subcontractor to [Galois](https://galois.com). It is licensed under the [Apache 2.0 license](LICENSE). © 2019, Trail of Bits. + +## Known Issues & Fixes + +### Python Reserved Keyword in Auto-generated Code + +The Kaitai Struct compiler may generate Python code that uses `class` as a variable name (e.g., `self.class = ...`), which is invalid syntax since `class` is a reserved keyword in Python. This issue specifically affects the auto-generated `polyfile/kaitai/parsers/openpgp_message.py` file. + +**Automatic Fix:** As of this version, polyfile automatically patches this issue on import. The fix is applied transparently when you first import the package, ensuring it works out-of-the-box. + +**Manual Fix:** If you need to manually apply the fix (e.g., for development or debugging), you can run the included `fix_class_keyword.py` script: +```bash +python fix_class_keyword.py +``` + +This will patch all occurrences of `self.class` to `self.class_` in the affected file. + +**Note:** A fix has been submitted upstream to the Kaitai Struct compiler. Once that is merged and a new version of the parsers is generated, this workaround will no longer be necessary. diff --git a/fix_class_keyword.py b/fix_class_keyword.py new file mode 100644 index 0000000..3735ff0 --- /dev/null +++ b/fix_class_keyword.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +""" +Fix the 'self.class' reserved keyword issue in auto-generated Kaitai parsers. +This script replaces 'self.class' with 'self.class_' to make it valid Python. +""" + +import re +from pathlib import Path + +def fix_class_keyword(file_path): + """Replace self.class with self.class_ in the given file.""" + with open(file_path, 'r') as f: + content = f.read() + + # Replace self.class with self.class_ (but not self.class_name, etc.) + # Use word boundary to match exactly 'class' not 'class_something' + fixed_content = re.sub(r'\bself\.class\b', 'self.class_', content) + + # Also fix debug references ['class'] -> ['class_'] + fixed_content = re.sub(r"\['class'\]", "['class_']", fixed_content) + + # And fix SEQ_FIELDS if it contains "class" + fixed_content = re.sub(r'SEQ_FIELDS = \[(.*)"class"(.*)\]', r'SEQ_FIELDS = [\1"class_"\2]', fixed_content) + + if content != fixed_content: + with open(file_path, 'w') as f: + f.write(fixed_content) + print(f"Fixed: {file_path}") + return True + return False + +if __name__ == "__main__": + parser_dir = Path(__file__).parent / "polyfile" / "kaitai" / "parsers" + + if parser_dir.exists(): + # Fix openpgp_message.py + openpgp_file = parser_dir / "openpgp_message.py" + if openpgp_file.exists(): + if fix_class_keyword(openpgp_file): + print("Successfully fixed the 'class' keyword issue!") + else: + print("No changes needed for 'class' keyword.") + else: + print(f"Error: {openpgp_file} not found.") + else: + print(f"Error: Parser directory {parser_dir} not found.") diff --git a/polyfile/__init__.py b/polyfile/__init__.py index ae53273..7c696e4 100644 --- a/polyfile/__init__.py +++ b/polyfile/__init__.py @@ -1,3 +1,32 @@ +# Auto-fix for Kaitai Struct generated code using reserved keyword 'class' +import os +import re +from pathlib import Path + +def _fix_class_keyword_if_needed(): + """Automatically fix the 'class' keyword issue in openpgp_message.py if needed.""" + try: + parser_file = Path(__file__).parent / "kaitai" / "parsers" / "openpgp_message.py" + if parser_file.exists(): + with open(parser_file, 'r') as f: + content = f.read() + + # Check if the file needs fixing + if re.search(r'\bself\.class\b', content): + # Apply the fix + fixed_content = re.sub(r'\bself\.class\b', 'self.class_', content) + fixed_content = re.sub(r"\['class'\]", "['class_']", fixed_content) + fixed_content = re.sub(r'SEQ_FIELDS = \[(.*)"class"(.*)\]', r'SEQ_FIELDS = [\1"class_"\2]', fixed_content) + + with open(parser_file, 'w') as f: + f.write(fixed_content) + except Exception: + # Silently ignore any errors - don't break the import + pass + +# Run the fix before importing modules +_fix_class_keyword_if_needed() + from . import ( nes, pdf, diff --git a/polyfile/kaitai/parsers/openpgp_message.py b/polyfile/kaitai/parsers/openpgp_message.py index 69436a4..8a044d6 100644 --- a/polyfile/kaitai/parsers/openpgp_message.py +++ b/polyfile/kaitai/parsers/openpgp_message.py @@ -358,7 +358,7 @@ def _read(self): class RevocationKey(KaitaiStruct): - SEQ_FIELDS = ["class", "public_key_algorithm", "fingerprint"] + SEQ_FIELDS = ["class_", "public_key_algorithm", "fingerprint"] def __init__(self, _io, _parent=None, _root=None): self._io = _io self._parent = _parent @@ -366,9 +366,9 @@ def __init__(self, _io, _parent=None, _root=None): self._debug = collections.defaultdict(dict) def _read(self): - self._debug['class']['start'] = self._io.pos() - self.class = self._io.read_u1() - self._debug['class']['end'] = self._io.pos() + self._debug['class_']['start'] = self._io.pos() + self.class_ = self._io.read_u1() + self._debug['class_']['end'] = self._io.pos() self._debug['public_key_algorithm']['start'] = self._io.pos() self.public_key_algorithm = KaitaiStream.resolve_enum(OpenpgpMessage.PublicKeyAlgorithms, self._io.read_u1()) self._debug['public_key_algorithm']['end'] = self._io.pos()