diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c3af7eb68e..ecf80830d5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -473,7 +473,9 @@ jobs: CIBW_SKIP: '*musllinux*' CIBW_ARCHS: 'auto64' CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 - CIBW_BEFORE_ALL_LINUX: yum install -y libXt-devel + CIBW_BEFORE_ALL_LINUX: yum install -y libXt-devel doxygen + CIBW_BEFORE_ALL_MACOS: brew install doxygen + CIBW_BEFORE_ALL_WINDOWS: choco install doxygen.install -y CIBW_BUILD_VERBOSITY: 1 CIBW_ENVIRONMENT: CMAKE_BUILD_PARALLEL_LEVEL=2 MACOSX_DEPLOYMENT_TARGET: '11.0' diff --git a/CMakeLists.txt b/CMakeLists.txt index 357270cc89..11a55867ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,6 +135,7 @@ set(MATERIALX_PYTHON_EXECUTABLE "" CACHE FILEPATH "Python executable to be used in building the MaterialX Python package (e.g. 'C:/Python39/python.exe').") set(MATERIALX_PYTHON_PYBIND11_DIR "" CACHE PATH "Path to a folder containing the PyBind11 source to be used in building MaterialX Python.") +option(MATERIALX_PYTHON_FORCE_REPLACE_DOCS "Force replace existing docstrings when generating Python binding documentation from Doxygen." OFF) # Settings to define installation layout set(MATERIALX_INSTALL_INCLUDE_PATH "include" CACHE STRING "Install header include path (e.g. 'inc', 'include').") @@ -210,6 +211,7 @@ mark_as_advanced(MATERIALX_DYNAMIC_ANALYSIS) mark_as_advanced(MATERIALX_PYTHON_VERSION) mark_as_advanced(MATERIALX_PYTHON_EXECUTABLE) mark_as_advanced(MATERIALX_PYTHON_PYBIND11_DIR) +mark_as_advanced(MATERIALX_PYTHON_FORCE_REPLACE_DOCS) mark_as_advanced(MATERIALX_OSL_BINARY_OSLC) mark_as_advanced(MATERIALX_OSL_BINARY_TESTRENDER) mark_as_advanced(MATERIALX_OSL_INCLUDE_PATH) @@ -548,16 +550,16 @@ if(MATERIALX_BUILD_TESTS) add_subdirectory(source/MaterialXTest) endif() +if (MATERIALX_BUILD_DOCS) + add_subdirectory(documents) +endif() + # Add Python subdirectories if(MATERIALX_BUILD_PYTHON) add_subdirectory(source/PyMaterialX) add_subdirectory(python) endif() -if(MATERIALX_BUILD_DOCS) - add_subdirectory(documents) -endif() - if(MATERIALX_BUILD_JS) add_subdirectory(source/JsMaterialX) endif() diff --git a/documents/Doxyfile.in b/documents/Doxyfile.in index d3897739cf..890d2d7e04 100644 --- a/documents/Doxyfile.in +++ b/documents/Doxyfile.in @@ -23,3 +23,6 @@ FULL_SIDEBAR = NO QUIET = YES WARN_IF_UNDOCUMENTED = NO + +GENERATE_XML = YES +XML_OUTPUT = doxygen_xml diff --git a/pyproject.toml b/pyproject.toml index 2f5f1cbe17..12aba95f51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,17 +56,25 @@ logging.level = "DEBUG" # where the package is. wheel.packages = ["python/MaterialX"] +sdist.include = [ + "/documents", +] + sdist.exclude = [ "/build", "/dist", "/resources", "/javascript", - "/documents", "/.github", "MANIFEST.in", "/source/JsMaterialX", ] +wheel.exclude = [ + "/documents", + "documents/", +] + [tool.scikit-build.metadata.version] # https://scikit-build-core.readthedocs.io/en/latest/configuration.html#dynamic-metadata provider = "scikit_build_core.metadata.regex" @@ -81,6 +89,8 @@ result = "{major}.{minor}.{build}" [tool.scikit-build.cmake.define] MATERIALX_BUILD_SHARED_LIBS = 'OFF' # Be explicit MATERIALX_BUILD_PYTHON = 'ON' +MATERIALX_BUILD_DOCS = 'ON' +MATERIALX_PYTHON_FORCE_REPLACE_DOCS = 'ON' MATERIALX_TEST_RENDER = 'OFF' MATERIALX_WARNINGS_AS_ERRORS = 'ON' MATERIALX_BUILD_TESTS = 'OFF' diff --git a/python/Scripts/pybind_docs.py b/python/Scripts/pybind_docs.py new file mode 100644 index 0000000000..207fbfe843 --- /dev/null +++ b/python/Scripts/pybind_docs.py @@ -0,0 +1,514 @@ +#!/usr/bin/env python +""" +pybind11 documentation insertion tool. + +Extracts documentation from Doxygen XML and inserts it into pybind11 bindings +using string matching via signature lookup table. + +Logic: +- Builds a multi-key lookup for all functions (MaterialX::, mx::, Class::method, method) +- Handles free functions without by assuming MaterialX namespace +- Adds class context tracking to correctly document lambda-based bindings +- Supports .def(...) and .def_static(...); skips .def_readonly_static(...) +""" + +import argparse +import re +import json +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Dict, Optional + +# Defaults (can be overridden by CLI) +DOXYGEN_XML_DIR = Path("build/documents/doxygen_xml") +PYBIND_DIR = Path("source/PyMaterialX") + + +class DocExtractor: + """Extracts documentation from Doxygen XML files and builds a lookup table.""" + + def __init__(self, xml_dir: Path): + self.xml_dir = xml_dir + self.class_docs: Dict[str, str] = {} + self.func_docs: Dict[str, Dict] = {} + # Multi-key lookup: all name variants point to the same doc + self.func_lookup: Dict[str, Dict] = {} + + def extract(self): + if not self.xml_dir.exists(): + raise FileNotFoundError(f"Doxygen XML directory not found: {self.xml_dir}") + + for xml_file in self.xml_dir.glob("*.xml"): + self._process_xml_file(xml_file) + + self._build_lookup_table() + print(f"Extracted {len(self.class_docs)} classes and {len(self.func_docs)} functions") + print(f"Built lookup table with {len(self.func_lookup)} keys") + + def _process_xml_file(self, xml_file: Path): + tree = ET.parse(xml_file) + root = tree.getroot() + + # Class / struct documentation + for compound in root.findall(".//compounddef[@kind='class']") + root.findall(".//compounddef[@kind='struct']"): + self._extract_class_doc(compound) + + # Function documentation + for member in root.findall(".//memberdef[@kind='function']"): + self._extract_func_doc(member) + + def _extract_class_doc(self, compound): + name = self._get_text(compound.find("compoundname")) + brief = self._get_text(compound.find("briefdescription/para")) + detail = self._extract_detail(compound.find("detaileddescription")) + doc = "\n\n".join(filter(None, [brief, detail])) + if doc: + normalized = self._normalize_name(name) + self.class_docs[normalized] = doc + + def _extract_func_doc(self, member): + name = self._get_text(member.find("name")) + qualified = self._get_text(member.find("qualifiedname")) + + # Many free functions have no ; use the bare name + # and normalize to MaterialX::name so lookups can resolve. + if not qualified and name: + qualified = name + + if not qualified: + return + + brief = self._get_text(member.find("briefdescription/para")) + detail = self._extract_detail(member.find("detaileddescription")) + params = self._extract_params(member) + returns = self._get_text(member.find(".//simplesect[@kind='return']")) + + normalized = self._normalize_name(qualified) + self.func_docs[normalized] = { + "brief": brief, + "detail": detail, + "params": params, + "returns": returns, + } + + def _build_lookup_table(self): + for qualified_name, doc in self.func_docs.items(): + for variant in self._generate_name_variants(qualified_name): + if variant not in self.func_lookup: + self.func_lookup[variant] = doc + + def _generate_name_variants(self, qualified_name: str) -> list: + variants = [qualified_name] + parts = qualified_name.split("::") + # Class::method + if len(parts) >= 2: + variants.append("::".join(parts[-2:])) + # method + if len(parts) >= 1: + variants.append(parts[-1]) + # mx:: variant if MaterialX:: + if qualified_name.startswith("MaterialX::"): + mx_variant = qualified_name.replace("MaterialX::", "mx::", 1) + variants.append(mx_variant) + if len(parts) >= 3: + variants.append(f"mx::{parts[-2]}::{parts[-1]}") + return variants + + def _normalize_name(self, name: str) -> str: + if not name: + return name + return name if name.startswith("MaterialX::") else f"MaterialX::{name}" + + def _get_text(self, elem) -> str: + if elem is None: + return "" + text = "".join(elem.itertext()) + return re.sub(r"\s+", " ", text).strip() + + def _extract_detail(self, elem, exclude_tags={"parameterlist", "simplesect"}) -> str: + if elem is None: + return "" + parts = [] + for para in elem.findall("para"): + if not any(para.find(tag) is not None for tag in exclude_tags): + t = self._get_text(para) + if t: + parts.append(t) + return "\n\n".join(parts) + + def _extract_params(self, member) -> Dict[str, str]: + params = {} + for param_item in member.findall(".//parameterlist[@kind='param']/parameteritem"): + name = self._get_text(param_item.find("parameternamelist/parametername")) + desc = self._get_text(param_item.find("parameterdescription")) + if name: + params[name] = desc + return params + + +class DocInserter: + """Inserts documentation into pybind11 binding files.""" + + def __init__(self, extractor: DocExtractor, pybind_dir: Path, force_replace: bool = False): + self.extractor = extractor + self.pybind_dir = pybind_dir + self.force_replace = force_replace + + self.class_pattern = re.compile(r"py::class_<") + self.def_pattern = re.compile(r"\.def(?:_static)?\s*\(") + # Match .def and .def_static; skip .def_readonly_static (constants) + self.def_pattern = re.compile(r"\.def(?:_static)?\s*\(") + self.skip_pattern = re.compile(r"\.def_readonly_static\s*\(") + + def process_all_files(self): + cpp_files = list(self.pybind_dir.rglob("*.cpp")) + patched = 0 + for cpp_file in cpp_files: + if self._process_file(cpp_file): + patched += 1 + print(f"\nProcessed {len(cpp_files)} files, patched {patched}") + + def _process_file(self, cpp_file: Path) -> bool: + content = cpp_file.read_text(encoding="utf-8") + original = content + + content = self._insert_class_docs(content) + content = self._insert_method_docs(content) + + if content != original: + cpp_file.write_text(content, encoding="utf-8") + print(f" - {cpp_file.relative_to(self.pybind_dir.parent)}") + return True + else: + print(f" - {cpp_file.relative_to(self.pybind_dir.parent)}") + return False + + def _insert_class_docs(self, content: str) -> str: + result = [] + pos = 0 + + for match in self.class_pattern.finditer(content): + result.append(content[pos:match.start()]) + + start = match.start() + template_end = self._find_template_end(content, start) + if template_end == -1: + result.append(content[start:match.end()]) + pos = match.end() + continue + + paren_start = content.find('(', template_end) + if paren_start == -1: + result.append(content[start:match.end()]) + pos = match.end() + continue + + paren_end = self._find_matching_paren(content, paren_start) + if paren_end == -1: + result.append(content[start:match.end()]) + pos = match.end() + continue + + args_text = content[paren_start + 1:paren_end] + class_name = self._extract_class_name(args_text) + + if class_name: + doc = self.extractor.class_docs.get(self.extractor._normalize_name(class_name)) + if doc: + args = self._split_args(args_text) + if len(args) >= 3 and not self.force_replace: + result.append(content[start:paren_end + 1]) + pos = paren_end + 1 + continue + + escaped = self._escape_for_cpp(doc) + if len(args) >= 3 and self.force_replace: + new_args = args[:2] + [f'"{escaped}"'] + args[3:] + result.append(content[start:paren_start + 1]) + result.append(", ".join(new_args)) + result.append(")") + else: + result.append(content[start:paren_end]) + result.append(f', "{escaped}")') + pos = paren_end + 1 + continue + + result.append(content[start:paren_end + 1]) + pos = paren_end + 1 + + result.append(content[pos:]) + return "".join(result) + + def _insert_method_docs(self, content: str) -> str: + # Build a map of line numbers to class contexts + class_contexts = self._extract_class_contexts(content) + + result = [] + pos = 0 + + for match in self.def_pattern.finditer(content): + if self.skip_pattern.match(content, match.start()): + continue + + result.append(content[pos:match.start()]) + + start = match.start() + paren_start = content.find('(', start) + if paren_start == -1: + result.append(content[start:match.end()]) + pos = match.end() + continue + + paren_end = self._find_matching_paren(content, paren_start) + if paren_end == -1: + result.append(content[start:match.end()]) + pos = match.end() + continue + + args_text = content[paren_start + 1:paren_end] + args = self._split_args(args_text) + + if len(args) < 2: + result.append(content[start:paren_end + 1]) + pos = paren_end + 1 + continue + + has_doc = self._has_docstring(args) + if has_doc and not self.force_replace: + result.append(content[start:paren_end + 1]) + pos = paren_end + 1 + continue + + callable_ref = args[1].strip() + + current_line = content[:start].count('\n') + class_context = class_contexts.get(current_line) + + doc_entry = self._find_doc_for_callable(callable_ref, class_context) + + if doc_entry: + docstring = self._build_docstring(doc_entry) + escaped = self._escape_for_cpp(docstring) + + if has_doc and self.force_replace: + doc_idx = self._find_docstring_arg_index(args) + if doc_idx is not None: + new_args = args[:doc_idx] + [f'"{escaped}"'] + args[doc_idx + 1:] + result.append(content[start:paren_start + 1]) + result.append(", ".join(new_args)) + result.append(")") + pos = paren_end + 1 + continue + + result.append(content[start:paren_end]) + result.append(f', "{escaped}")') + pos = paren_end + 1 + continue + + result.append(content[start:paren_end + 1]) + pos = paren_end + 1 + + result.append(content[pos:]) + return "".join(result) + + def _extract_class_contexts(self, content: str) -> Dict[int, str]: + contexts = {} + for match in self.class_pattern.finditer(content): + start = match.start() + template_end = self._find_template_end(content, start) + if template_end == -1: + continue + template_start = content.find('<', start) + 1 + template_content = content[template_start:template_end - 1] + class_type = template_content.split(',')[0].strip() + class_name = class_type.split('::')[-1] if '::' in class_type else class_type + + start_line = content[:start].count('\n') + end_pos = content.find(';', start) + if end_pos != -1: + end_line = content[:end_pos].count('\n') + for line in range(start_line, end_line + 1): + contexts[line] = class_name + return contexts + + def _find_doc_for_callable(self, callable_ref: str, class_context: Optional[str] = None) -> Optional[Dict]: + callable_ref = callable_ref.strip() + + # Function pointers like &mx::Class::method or &MaterialX::name + if callable_ref.startswith('&'): + name = callable_ref[1:].strip() + name = re.sub(r'[,\s]+$', '', name) + return self.extractor.func_lookup.get(name) + + # Lambdas: look for elem.method( or obj->method( + method_match = re.search(r'[\.\->](\w+)\s*\(', callable_ref) + if method_match: + method_name = method_match.group(1) + if class_context: + for prefix in ("", "mx::", "MaterialX::"): + qualified = f"{prefix}{class_context}::{method_name}" if prefix else f"{class_context}::{method_name}" + doc = self.extractor.func_lookup.get(qualified) + if doc: + return doc + return self.extractor.func_lookup.get(method_name) + + return None + + def _build_docstring(self, doc_entry: Dict) -> str: + parts = [] + if doc_entry.get("brief"): + parts.append(doc_entry["brief"]) + if doc_entry.get("detail"): + parts.append(doc_entry["detail"]) + params = doc_entry.get("params", {}) + if params: + param_lines = ["Args:"] + for name, desc in params.items(): + param_lines.append(f" {name}: {desc}" if desc else f" {name}:") + parts.append("\n".join(param_lines)) + if doc_entry.get("returns"): + parts.append(f"Returns:\n {doc_entry['returns']}") + return "\n\n".join(parts) + + def _escape_for_cpp(self, s: str) -> str: + if not s: + return "" + s = s.replace("\\", "\\\\").replace('"', '\\"') + s = s.replace("\n", "\\n") + return s + + def _find_template_end(self, content: str, start: int) -> int: + pos = content.find('<', start) + if pos == -1: + return -1 + depth = 1 + i = pos + 1 + in_string = False + while i < len(content) and depth > 0: + c = content[i] + if c == '"' and content[i - 1] != '\\': + in_string = not in_string + elif not in_string: + if c == '<': + depth += 1 + elif c == '>': + depth -= 1 + i += 1 + return i if depth == 0 else -1 + + def _find_matching_paren(self, content: str, start: int) -> int: + depth = 0 + in_string = False + escape = False + for i in range(start, len(content)): + c = content[i] + if escape: + escape = False + continue + if c == '\\': + escape = True + continue + if c == '"': + in_string = not in_string + continue + if not in_string: + if c == '(': + depth += 1 + elif c == ')': + depth -= 1 + if depth == 0: + return i + return -1 + + def _split_args(self, args_text: str) -> list: + args = [] + current = [] + depth = 0 + in_string = False + escape = False + for c in args_text: + if escape: + current.append(c) + escape = False + continue + if c == '\\': + current.append(c) + escape = True + continue + if c == '"': + in_string = not in_string + current.append(c) + continue + if not in_string: + if c in '(<': + depth += 1 + elif c in ')>': + depth -= 1 + elif c == ',' and depth == 0: + args.append("".join(current).strip()) + current = [] + continue + current.append(c) + if current: + args.append("".join(current).strip()) + return args + + def _extract_class_name(self, args_text: str) -> Optional[str]: + args = self._split_args(args_text) + if len(args) >= 2: + return args[1].strip().strip('"') + return None + + def _has_docstring(self, args: list) -> bool: + for arg in args[2:]: + a = arg.strip() + if not a.startswith("py::arg") and a.startswith('"'): + return True + return False + + def _find_docstring_arg_index(self, args: list) -> Optional[int]: + for i, arg in enumerate(args[2:], start=2): + a = arg.strip() + if not a.startswith("py::arg") and a.startswith('"'): + return i + return None + + +def main(): + parser = argparse.ArgumentParser(description="Extract Doxygen docs and insert into pybind11 bindings (simplified)") + parser.add_argument("-d", "--doxygen_xml_dir", type=Path, default=Path("build/documents/doxygen_xml"), help="Path to Doxygen XML output directory") + parser.add_argument("-p", "--pybind_dir", type=Path, default=Path("source/PyMaterialX"), help="Path to pybind11 bindings directory") + parser.add_argument("-f", "--force", action="store_true", help="Force replace existing docstrings") + parser.add_argument("-j", "--write_json", action="store_true", help="Write extracted docs to JSON files") + + args = parser.parse_args() + + if not args.doxygen_xml_dir.exists(): + print(f"Error: Doxygen XML directory not found: {args.doxygen_xml_dir}") + return 1 + if not args.pybind_dir.exists(): + print(f"Error: Pybind directory not found: {args.pybind_dir}") + return 1 + + print("Extracting documentation from Doxygen XML...") + extractor = DocExtractor(args.doxygen_xml_dir) + extractor.extract() + + if args.write_json: + print("\nWriting JSON files...") + Path("class_docs.json").write_text(json.dumps(extractor.class_docs, indent=2), encoding="utf-8") + Path("func_docs.json").write_text(json.dumps(extractor.func_docs, indent=2), encoding="utf-8") + print(" - class_docs.json") + print(" - func_docs.json") + + print(f"\n{'Replacing' if args.force else 'Inserting'} documentation in pybind11 files...") + inserter = DocInserter(extractor, args.pybind_dir, args.force) + inserter.process_all_files() + + print("\nDone!") + return 0 + + +if __name__ == "__main__": + exit(main()) + diff --git a/source/PyMaterialX/CMakeLists.txt b/source/PyMaterialX/CMakeLists.txt index 8e5d70c1ce..cafbb79245 100644 --- a/source/PyMaterialX/CMakeLists.txt +++ b/source/PyMaterialX/CMakeLists.txt @@ -67,3 +67,30 @@ if (MATERIALX_BUILD_RENDER) add_subdirectory(PyMaterialXRenderMsl) endif() endif() + +if (MATERIALX_BUILD_DOCS) + # Ensure Doxygen docs are generated, then extract docs for pybind11 bindings. + set(PYBIND_DOCS_ARGS -d ${CMAKE_BINARY_DIR}/documents/doxygen_xml -p ${CMAKE_SOURCE_DIR}/source/PyMaterialX) + if(MATERIALX_PYTHON_FORCE_REPLACE_DOCS) + list(APPEND PYBIND_DOCS_ARGS --force) + endif() + add_custom_target(PyBindDocs + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/python/Scripts/pybind_docs.py ${PYBIND_DOCS_ARGS} + COMMENT "Generating PyMaterialX binding docs from Doxygen XML" + VERBATIM + ) + + # Run MaterialXDocs before attempting to generate pybind docs + add_dependencies(PyBindDocs MaterialXDocs) + + # Make pybind modules depend on the generated docs so the binding docs + # are integrated prior to building the Python extension modules. + foreach(_py_mod IN ITEMS PyMaterialXCore PyMaterialXFormat PyMaterialXGenShader + PyMaterialXGenGlsl PyMaterialXGenMsl PyMaterialXGenMdl + PyMaterialXGenOsl PyMaterialXRender PyMaterialXRenderGlsl + PyMaterialXRenderOsl PyMaterialXRenderMsl) + if(TARGET ${_py_mod}) + add_dependencies(${_py_mod} PyBindDocs) + endif() + endforeach() +endif()