From 90ed69cb0aa0ac64cde3809280d8b53f39a8292b Mon Sep 17 00:00:00 2001 From: Andrew Monostate <165841485+andrewmonostate@users.noreply.github.com> Date: Sun, 17 Aug 2025 20:42:27 -0700 Subject: [PATCH] Fix Python reserved keyword escaping in code generation Problem: The Kaitai Struct compiler generates Python code that uses reserved keywords as identifiers (e.g., 'self.class = ...'), causing SyntaxError when the generated code is imported. Solution: Added proper escaping of Python reserved keywords by appending an underscore to any identifier that matches a Python reserved word. Changes: - Added PYTHON_RESERVED_WORDS set with all Python reserved keywords - Added escapePythonKeyword() function to check and escape keywords - Modified idToStr() to apply escaping for NamedIdentifier and InstanceIdentifier - Added comprehensive test suite in PythonCompilerSpec.scala Test results: All 12 tests pass, verifying correct escaping behavior This fixes generated code that uses fields named 'class', 'def', 'if', etc., which are common in binary format specifications like OpenPGP. --- .../struct/languages/PythonCompilerSpec.scala | 83 +++++++++++++++++++ .../struct/languages/PythonCompiler.scala | 22 ++++- 2 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 jvm/src/test/scala/io/kaitai/struct/languages/PythonCompilerSpec.scala diff --git a/jvm/src/test/scala/io/kaitai/struct/languages/PythonCompilerSpec.scala b/jvm/src/test/scala/io/kaitai/struct/languages/PythonCompilerSpec.scala new file mode 100644 index 000000000..d07d78aea --- /dev/null +++ b/jvm/src/test/scala/io/kaitai/struct/languages/PythonCompilerSpec.scala @@ -0,0 +1,83 @@ +package io.kaitai.struct.languages + +import io.kaitai.struct.format.{Identifier, NamedIdentifier, InstanceIdentifier} +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should.Matchers._ + +class PythonCompilerSpec extends AnyFunSpec { + describe("PythonCompiler") { + describe("reserved keyword escaping") { + it("should escape Python reserved keyword 'class'") { + val id = NamedIdentifier("class") + val result = PythonCompiler.idToStr(id) + result should be("class_") + } + + it("should escape Python reserved keyword 'def'") { + val id = NamedIdentifier("def") + val result = PythonCompiler.idToStr(id) + result should be("def_") + } + + it("should escape Python reserved keyword 'if'") { + val id = NamedIdentifier("if") + val result = PythonCompiler.idToStr(id) + result should be("if_") + } + + it("should escape Python reserved keyword 'lambda'") { + val id = NamedIdentifier("lambda") + val result = PythonCompiler.idToStr(id) + result should be("lambda_") + } + + it("should escape Python reserved keyword 'return'") { + val id = NamedIdentifier("return") + val result = PythonCompiler.idToStr(id) + result should be("return_") + } + + it("should escape Python reserved keyword 'async'") { + val id = NamedIdentifier("async") + val result = PythonCompiler.idToStr(id) + result should be("async_") + } + + it("should escape Python reserved keyword 'await'") { + val id = NamedIdentifier("await") + val result = PythonCompiler.idToStr(id) + result should be("await_") + } + + it("should not escape non-reserved word 'class_name'") { + val id = NamedIdentifier("class_name") + val result = PythonCompiler.idToStr(id) + result should be("class_name") + } + + it("should not escape non-reserved word 'my_field'") { + val id = NamedIdentifier("my_field") + val result = PythonCompiler.idToStr(id) + result should be("my_field") + } + + it("should escape reserved keyword in InstanceIdentifier") { + val id = InstanceIdentifier("class") + val result = PythonCompiler.idToStr(id) + result should be("_m_class_") + } + + it("should handle privateMemberName with reserved keyword") { + val id = NamedIdentifier("class") + val result = PythonCompiler.privateMemberName(id) + result should be("self.class_") + } + + it("should handle privateMemberName with normal identifier") { + val id = NamedIdentifier("normal_field") + val result = PythonCompiler.privateMemberName(id) + result should be("self.normal_field") + } + } + } +} diff --git a/shared/src/main/scala/io/kaitai/struct/languages/PythonCompiler.scala b/shared/src/main/scala/io/kaitai/struct/languages/PythonCompiler.scala index 152186f5b..205db9cb3 100644 --- a/shared/src/main/scala/io/kaitai/struct/languages/PythonCompiler.scala +++ b/shared/src/main/scala/io/kaitai/struct/languages/PythonCompiler.scala @@ -546,12 +546,30 @@ object PythonCompiler extends LanguageCompilerStatic config: RuntimeConfig ): LanguageCompiler = new PythonCompiler(tp, config) + // Python reserved keywords that need to be escaped + // https://docs.python.org/3/reference/lexical_analysis.html#keywords + val PYTHON_RESERVED_WORDS: Set[String] = Set( + "False", "None", "True", "and", "as", "assert", "async", "await", + "break", "class", "continue", "def", "del", "elif", "else", "except", + "finally", "for", "from", "global", "if", "import", "in", "is", + "lambda", "nonlocal", "not", "or", "pass", "raise", "return", + "try", "while", "with", "yield" + ) + + def escapePythonKeyword(name: String): String = { + if (PYTHON_RESERVED_WORDS.contains(name)) { + name + "_" + } else { + name + } + } + def idToStr(id: Identifier): String = id match { case SpecialIdentifier(name) => name - case NamedIdentifier(name) => name + case NamedIdentifier(name) => escapePythonKeyword(name) case NumberedIdentifier(idx) => s"_${NumberedIdentifier.TEMPLATE}$idx" - case InstanceIdentifier(name) => s"_m_$name" + case InstanceIdentifier(name) => s"_m_${escapePythonKeyword(name)}" case RawIdentifier(innerId) => s"_raw_${idToStr(innerId)}" }