diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..650050b --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + + - package-ecosystem: "gitsubmodule" + directory: "/" + schedule: + interval: "daily" diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml deleted file mode 100644 index c210579..0000000 --- a/.github/workflows/build_wheels.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: Build Manylinux Wheels - -on: - workflow_dispatch: - inputs: - -jobs: - build_wheels: - strategy: - matrix: - include: - - os: windows-2022 - manylinux: notlinux - archs: auto64 - name: windows - - os: macos-12 - manylinux: notlinux - archs: auto64 - name: macos - - os: ubuntu-20.04 - manylinux: quay.io/pypa/manylinux2014_x86_64 - archs: auto - name: manylinux2014 - - os: ubuntu-20.04 - manylinux: quay.io/pypa/manylinux_2_28_x86_64 - archs: auto - name: manylinux_2_28 - - name: Build ${{ matrix.os }} / ${{ matrix.manylinux }} - runs-on: ${{ matrix.os }} - - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - - run: git config --global --add safe.directory '*' || true - - - name: Setup python - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - - name: Update pip - run: python3 -m pip install -U pip wheel setuptools - - - name: Install requirements - run: python3 -m pip install -Ur requirements.txt - - - name: Cythonize - run: make _pytidyhtml5.cpp - - - name: Build wheels - run: python3 -m cibuildwheel --output-dir wheelhouse-${{ matrix.name }} - env: - CIBW_SKIP: "cp27-* cp34-* cp35-* cp36-* pp*" # FIXME: Unicode strings are broken in Pypy - CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux }} - CIBW_MANYLINUX_I686_IMAGE: ${{ matrix.manylinux }} - CIBW_ARCHS: ${{ matrix.archs }} - CIBW_BEFORE_BUILD: make clean-artifacts && make tidy-html5/build/cmake/libtidy.a - CIBW_BUILD_FRONTEND: build - CIBW_TEST_COMMAND: "{project}/basic-sanity-test.py" - - - name: Store artifacts - uses: actions/upload-artifact@v3 - with: - name: Wheelhouse-${{ matrix.name }} - path: ./wheelhouse-${{ matrix.name }}/*.whl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f86d19d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,68 @@ +name: CI + +on: [push] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + python: + - '3.8' + - '3.11' + - '3.13' + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Cache pip + uses: actions/cache@v4 + with: + key: cache--${{ matrix.os }}--${{ matrix.python }}--${{ hashFiles('./requirements*.txt', './Makefile') }} + restore-keys: cache--${{ matrix.os }}--${{ matrix.python }}-- + path: ~/.cache/pip + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + + - name: Display Python version + run: python -c 'import sys; print(sys.version)' + + - name: Update pip + run: python -m pip install -U pip wheel setuptools + + - name: Install requirements + run: python -m pip install -Ur requirements-dev.txt + + - name: Compile project + run: make install + + - name: Run basic sanity test + run: python basic-sanity-test.py + + black: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Update black + run: python -m pip install -U black + + - name: Run black + run: python -m black --check ./*.py ./docs/ ./lib/ diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index 5c8b804..0000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: "CodeQL" - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - schedule: - - cron: '18 6 * * 6' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'cpp', 'python' ] - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - with: - submodules: true - - - name: Setup python - uses: actions/setup-python@v2 - with: - python-version: ${{ github.event.inputs.python }} - - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - with: - languages: ${{ matrix.language }} - - - name: Update pip - run: python -m pip install -U pip wheel setuptools - - - name: Install requirements - run: python -m pip install -Ur requirements.txt - - - name: Compile - run: make bdist_wheel - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 0000000..ec4acbc --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,105 @@ +name: Build Wheels (Linux v3) + +on: + workflow_dispatch: + inputs: + platform: + required: true + default: x86_64 i686 aarch64 ppc64le s390x armv7l + +jobs: + define-matrix: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - run: python -m pip install -U pip wheel setuptools + - run: python -m pip install -U 'cibuildwheel==2.*' + + - id: set-matrix + run: | + TARGETS="$(python -m cibuildwheel --archs "${{ github.event.inputs.platform }}" --print-build-identifiers)" + echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT + env: + CIBW_BUILD_FRONTEND: build + CIBW_SKIP: 'cp27-* pp*' + CIBW_DEPENDENCY_VERSIONS: pinned + CIBW_PLATFORM: linux + + build: + runs-on: ubuntu-latest + + needs: + - define-matrix + strategy: + matrix: + only: ${{ fromJSON(needs.define-matrix.outputs.matrix) }} + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: all + + - name: Cache pip + uses: actions/cache@v4 + with: + key: cache--${{ hashFiles('./requirements-dev.txt') }} + path: ~/.cache/pip + + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - run: python -m pip install -U pip wheel setuptools + - run: python -m pip install -Ur requirements-dev.txt + - run: python -m pip install -U 'cibuildwheel==2.*' + + - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }} + env: + CIBW_BUILD_FRONTEND: build + CIBW_SKIP: 'cp27-* pp*' + CIBW_DEPENDENCY_VERSIONS: pinned + CIBW_PLATFORM: linux + CIBW_TEST_COMMAND: python {project}/basic-sanity-test.py + CIBW_BEFORE_BUILD: make prepare + + - uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.only }} + path: ./wheelhouse + retention-days: 1 + + combine: + runs-on: ubuntu-latest + needs: + - define-matrix + - build + steps: + - uses: actions/download-artifact@v4 + with: + path: ./wheelhouse + - run: | + find -name '*.zip' -exec unzip '{}' ';' + find -name '*.zip' -exec rm '{}' + + find -name '*.whl' -exec mv -t. '{}' + + find -type d -delete + working-directory: ./wheelhouse + - uses: actions/upload-artifact@v4 + with: + name: wheelhouse + path: ./wheelhouse diff --git a/.gitignore b/.gitignore index 1af79ad..55be68e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,11 @@ *.py[cdo] -/env*/ -/build/ -/built_wheel/ -/cython_debug/ -/dist/ -/*.egg-info/ +env*/ +build/ +built_wheel/ +cython_debug/ +dist/ +*.egg-info/ *.c *.cpp diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index 387c8fc..0000000 --- a/LICENSE.md +++ /dev/null @@ -1,13 +0,0 @@ -Copyright (c) 2019-2020 Freie Universität Berlin - -Permission to use, copy, modify, and/or distribute this software for any purpose -with or without fee is hereby granted, provided that the above copyright notice -and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in index a7a9a88..24d6901 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,3 +3,4 @@ include pytidyhtml5.cpp include pytidyhtml5.pyx recursive-include lib ** recursive-include pytidyhtml5 ** +include tidy-html5/build/cmake/libtidy.a diff --git a/Makefile b/Makefile index 947203c..155c5f7 100644 --- a/Makefile +++ b/Makefile @@ -4,18 +4,16 @@ all: sdist bdist_wheel docs NAME := pytidyhtml5 -.PHONY: all sdist bdist_wheel clean docs prepare clean-generated clean-artifacts export-environ +.PHONY: all sdist bdist_wheel clean docs prepare clean-generated clean-artifacts export-environ install FILES := Makefile MANIFEST.in _${NAME}.pyx README.rst setup.py \ lib/native.hpp lib/VERSION.txt lib/DESCRIPTION.txt \ tidy-html5/build/cmake/libtidy.a -TIDY_CFLAGS := -O2 -fomit-frame-pointer -flto -TIDY_CFLAGS += -fPIC -ggdb1 -pipe -TIDY_CFLAGS += -fstack-protector-strong --param=ssp-buffer-size=8 +TIDY_CFLAGS := -O3 -flto -fPIC -g0 -pipe TIDY_CFLAGS += -fvisibility=internal -fmerge-all-constants -TIDY_CFLAGS += -std=c11 -D_ISOC11_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE +TIDY_CFLAGS += -std=c11 -D_ISOC11_SOURCE -D_GNU_SOURCE CC:=$(shell which gcc clang g++ clang++ | head -n1 2> /dev/null) @@ -87,14 +85,16 @@ prepare: _${NAME}.cpp ${FILES} sdist: _${NAME}.cpp ${FILES} -rm -- dist/${NAME}-*.tar.gz - python3 setup.py sdist --format=gztar - python3 setup.py sdist --format=xztar + python3 -m build --sdist bdist_wheel: _${NAME}.cpp ${FILES} | sdist -rm -- dist/${NAME}-*.whl - python3 setup.py bdist_wheel + python3 -m build --wheel -docs: bdist_wheel $(wildcard docs/* docs/*/*) +install: bdist_wheel + python3 -m pip install --force dist/pytidyhtml5-*.whl + +docs: install $(wildcard docs/* docs/*/*) -rm -r -- dist/html/ pip install --force dist/${NAME}-*.whl python3 -m sphinx -M html docs/ dist/ diff --git a/_pytidyhtml5.pyx b/_pytidyhtml5.pyx index 2115130..cbaf0f8 100644 --- a/_pytidyhtml5.pyx +++ b/_pytidyhtml5.pyx @@ -1,6 +1,19 @@ # distutils: language = c++ # cython: embedsignature = True, language_level = 3, warn.unreachable = True, warn.maybe_uninitialized = True +# Copyright 2019-2024 René Kijewski +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. include 'lib/_import_tidy_platform.pyx' include 'lib/_imports.pyx' diff --git a/basic-sanity-test.py b/basic-sanity-test.py index eaf0a35..6259c06 100755 --- a/basic-sanity-test.py +++ b/basic-sanity-test.py @@ -1,46 +1,52 @@ #!/usr/bin/env python3 -from pytidyhtml5 import AttrSortStrategy, LineEnding, OptionId, set_language, tidy_document +from pytidyhtml5 import ( + AttrSortStrategy, + LineEnding, + OptionId, + set_language, + tidy_document, +) options = { - OptionId.alt_text: '', - OptionId.doctype: 'html5', - OptionId.drop_empty_elems: False, - OptionId.drop_empty_paras: False, - OptionId.fix_backslash: False, - OptionId.force_output: True, - OptionId.hide_comments: True, - OptionId.html_out: True, - OptionId.indent_attributes: False, - OptionId.indent_cdata: False, - OptionId.indent_content: False, - OptionId.indent_spaces: 0, - OptionId.join_classes: True, - OptionId.join_styles: True, - OptionId.mark: False, - OptionId.merge_divs: False, - OptionId.merge_emphasis: False, - OptionId.merge_spans: False, - OptionId.meta_charset: True, - OptionId.newline: LineEnding.crlf, - OptionId.num_entities: True, - OptionId.quote_ampersand: True, - OptionId.quote_marks: True, - OptionId.quote_nbsp: True, - OptionId.sort_attributes: AttrSortStrategy.alpha, + OptionId.alt_text: "", + OptionId.doctype: "html5", + OptionId.drop_empty_elems: False, + OptionId.drop_empty_paras: False, + OptionId.fix_backslash: False, + OptionId.force_output: True, + OptionId.hide_comments: True, + OptionId.html_out: True, + OptionId.indent_attributes: False, + OptionId.indent_cdata: False, + OptionId.indent_content: False, + OptionId.indent_spaces: 0, + OptionId.join_classes: True, + OptionId.join_styles: True, + OptionId.mark: False, + OptionId.merge_divs: False, + OptionId.merge_emphasis: False, + OptionId.merge_spans: False, + OptionId.meta_charset: True, + OptionId.newline: LineEnding.crlf, + OptionId.num_entities: True, + OptionId.quote_ampersand: True, + OptionId.quote_marks: True, + OptionId.quote_nbsp: True, + OptionId.sort_attributes: AttrSortStrategy.alpha, } -set_language('de') +set_language("de") expected = ( '\r\n\r\n\r\n\r\n\r\n\r\n\r\nHallöchen\r\n\r\n\r\n', "Zeile 1 Spalte 1 - Warnung: fehlende Deklaration\nZeile 1 Spalte 1 - Warnung: Klartext ist im Element nicht erlaubt\nZeile 1 Spalte 1 - Info: bereits vermerkt\nZeile 1 Spalte 1 - Warnung: füge implizites ein\nZeile 1 Spalte 1 - Warnung: füge fehlendes 'title' Element ein\nZeile 1 Spalte 10 - Info: Fehlendes wurde in hinzugefügt\n", ) -actual = tidy_document('Hallöchen', options=options) +actual = tidy_document("Hallöchen", options=options) if expected != actual: - print('actual=' + repr(actual)) + print("actual=" + repr(actual)) raise SystemExit(1) -print('OK') +print("OK") diff --git a/docs/conf.py b/docs/conf.py index 71f18c9..e5c4e9d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,203 +1,63 @@ -# -*- coding: utf-8 -*- -# -# PyJSON5 documentation build configuration file, created by -# sphinx-quickstart on Wed May 2 18:15:32 2018. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# import os import sys -sys.path.insert(0, os.path.abspath('..')) -import pytidyhtml5 -import pytidyhtml5 +sys.path.insert(0, os.path.abspath("..")) +import pytidyhtml5 -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.napoleon', - 'sphinx.ext.intersphinx', - 'sphinx.ext.inheritance_diagram', - 'sphinx_autodoc_typehints', - 'sphinx.ext.autosectionlabel', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx_autodoc_typehints", + "sphinx.ext.autosectionlabel", ] -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" -# The master toctree document. -master_doc = 'index' +master_doc = "index" -# General information about the project. -project = u'PyTidyHTML5' -copyright = u'2018-2021, René Kijewski' -author = u'René Kijewski' +project = "PyTidyHTML5" +copyright = "2019-2024, René Kijewski" +author = "René Kijewski" -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -with open(os.path.join(os.path.abspath('..'), 'lib', 'VERSION.txt'), 'rt') as f: +with open(os.path.join(os.path.abspath(".."), "lib", "VERSION.txt"), "rt") as f: # The full version, including alpha/beta/rc tags. release = eval(f.read()) # The short X.Y version. version = release # '.'.join(release.split('.', 2)[:2]) -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None +language = "en" -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path exclude_patterns = [] -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" -# If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'sphinx_rtd_theme' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. +html_theme = "sphinx_rtd_theme" html_theme_options = { - 'navigation_depth': -1, + "navigation_depth": -1, } -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# This is required for the alabaster theme -# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { - '**': [ - 'localtoc.html', - 'searchbox.html', + "**": [ + "localtoc.html", + "searchbox.html", ] } - -# -- Options for HTMLHelp output ------------------------------------------ - -# Output file base name for HTML help builder. -htmlhelp_basename = 'PyJSON5doc' - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'PyJSON5.tex', u'PyJSON5 Documentation', - u'René Kijewski', 'manual'), -] - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'pyjson5', u'PyJSON5 Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'PyJSON5', u'PyJSON5 Documentation', - author, 'PyJSON5', 'One line description of project.', - 'Miscellaneous'), -] - +htmlhelp_basename = "PyTidyHtml5doc" display_toc = True -autodoc_default_flags = ['members'] +autodoc_default_flags = ["members"] autosummary_generate = True intersphinx_mapping = { - 'python': ('https://docs.python.org/3', None), -} - -inheritance_graph_attrs = { - 'size': '"6.0, 8.0"', - 'fontsize': 32, - 'bgcolor': 'transparent', -} -inheritance_node_attrs = { - 'color': 'black', - 'fillcolor': 'white', - 'style': '"filled,solid"', -} -inheritance_edge_attrs = { - 'penwidth': 1.2, - 'arrowsize': 0.8, + "python": ("https://docs.python.org/3", None), } diff --git a/docs/document.rst b/docs/document.rst index 7494038..84c9d81 100644 --- a/docs/document.rst +++ b/docs/document.rst @@ -5,3 +5,8 @@ Documents .. autosummary:: ~pytidyhtml5.Document + + +.. autoclass:: pytidyhtml5.Document + :members: + :undoc-members: diff --git a/generate_imports.py b/generate_imports.py index 98d77d2..4ed0256 100755 --- a/generate_imports.py +++ b/generate_imports.py @@ -11,14 +11,19 @@ root = abspath(dirname(__file__)) -Config.set_library_file((sorted( - glob('/usr/lib/llvm-*/lib/libclang.so.1'), - key=lambda p: float(match(r'/usr/lib/llvm-(\d+(?:[.]\d+)?)/', p).group(1)), - reverse=True, -) or ('/usr/lib64/llvm/libclang.so',))[0]) +Config.set_library_file( + ( + sorted( + glob("/usr/lib/llvm-*/lib/libclang.so.1"), + key=lambda p: float(match(r"/usr/lib/llvm-(\d+(?:[.]\d+)?)/", p).group(1)), + reverse=True, + ) + or ("/usr/lib64/llvm/libclang.so",) + )[0] +) index = Index.create() -tu = index.parse(join(root, 'generate_imports_transclusion.h')) +tu = index.parse(join(root, "generate_imports_transclusion.h")) enums = {} @@ -28,105 +33,135 @@ if len(direct_children) != 1: continue - direct_child, = direct_children + (direct_child,) = direct_children if direct_child.kind is not CursorKind.ENUM_DECL: continue enums[cursor.spelling] = { - 'type': direct_child.enum_type.spelling, - 'values': { + "type": direct_child.enum_type.spelling, + "values": { definition.spelling: definition.enum_value for definition in direct_child.get_children() }, } -with open(join(root, 'lib', '_import_tidy_enum.pyx'), 'wt') as f: +with open(join(root, "lib", "_import_tidy_enum.pyx"), "wt") as f: print("# GENERATED FILE: all modifications will be overwritten.", file=f) print(file=f) print("cdef extern from 'tidyenum.h' nogil:", file=f) print(file=f) - print(" # Rationale: If Cython thinks the enums are T*, then it won't automagically convert to ints,", file=f) + print( + " # Rationale: If Cython thinks the enums are T*, then it won't automagically convert to ints,", + file=f, + ) print(" # so it's easier to find wrong type usage.", file=f) print(file=f) for enumname in sorted(enums): - if enumname.lower().startswith('tidy'): - print(" ctypedef struct __Enum__", enumname, file=f, sep='') + if enumname.lower().startswith("tidy"): + print(" ctypedef struct __Enum__", enumname, file=f, sep="") print(file=f) for enumname in sorted(enums): - if enumname.lower().startswith('tidy'): - print(" ctypedef __Enum__", enumname, " *", enumname, " ", repr(enumname), file=f, sep='') + if enumname.lower().startswith("tidy"): + print( + " ctypedef __Enum__", + enumname, + " *", + enumname, + " ", + repr(enumname), + file=f, + sep="", + ) for enumname, definition in sorted(enums.items()): - if enumname.lower().startswith('tidy'): + if enumname.lower().startswith("tidy"): print(file=f) - for valuename in sorted(definition['values']): - print(" const ", enumname, " ", valuename, file=f, sep='') + for valuename in sorted(definition["values"]): + print(" const ", enumname, " ", valuename, file=f, sep="") clsnames = { - 'AttrId': ('TidyAttr_', '', 'TidyAttrId'), - 'AttrSortStrategy': ('TidySortAttr', '', 'TidyAttrSortStrategy'), - 'ConfigCategory': ('Tidy', '', 'TidyConfigCategory'), - 'DoctypeModes': ('TidyDoctype', '', 'TidyDoctypeModes'), - 'DupAttrModes': ('Tidy', '', 'TidyDupAttrModes'), - 'EncodingOptions': ('TidyEnc', '', 'TidyEncodingOptions'), - 'FormatParameterType': ('tidyFormatType_', '', 'TidyFormatParameterType'), - 'LineEnding': ('Tidy', '', 'TidyLineEnding'), - 'NodeType': ('TidyNode_', '', 'TidyNodeType'), - 'OptionId': ('Tidy', '', 'TidyOptionId'), - 'OptionType': ('Tidy', '', 'TidyOptionType'), - 'ReportLevel': ('Tidy', '', 'TidyReportLevel'), - 'Strings': ('', '', 'tidyStrings'), - 'TagId': ('TidyTag_', '', 'TidyTagId'), - 'TriState': ('Tidy', 'State', 'TidyTriState'), - 'Uppercase': ('TidyUppercase', '', 'TidyUppercase'), - 'UseCustomTagsState': ('TidyCustom', '', 'TidyUseCustomTagsState'), + "AttrId": ("TidyAttr_", "", "TidyAttrId"), + "AttrSortStrategy": ("TidySortAttr", "", "TidyAttrSortStrategy"), + "ConfigCategory": ("Tidy", "", "TidyConfigCategory"), + "DoctypeModes": ("TidyDoctype", "", "TidyDoctypeModes"), + "DupAttrModes": ("Tidy", "", "TidyDupAttrModes"), + "EncodingOptions": ("TidyEnc", "", "TidyEncodingOptions"), + "FormatParameterType": ("tidyFormatType_", "", "TidyFormatParameterType"), + "LineEnding": ("Tidy", "", "TidyLineEnding"), + "NodeType": ("TidyNode_", "", "TidyNodeType"), + "OptionId": ("Tidy", "", "TidyOptionId"), + "OptionType": ("Tidy", "", "TidyOptionType"), + "ReportLevel": ("Tidy", "", "TidyReportLevel"), + "Strings": ("", "", "tidyStrings"), + "TagId": ("TidyTag_", "", "TidyTagId"), + "TriState": ("Tidy", "State", "TidyTriState"), + "Uppercase": ("TidyUppercase", "", "TidyUppercase"), + "UseCustomTagsState": ("TidyCustom", "", "TidyUseCustomTagsState"), } -with open(join(root, 'lib', '_tidy_enum.pyx'), 'wt') as f: +with open(join(root, "lib", "_tidy_enum.pyx"), "wt") as f: print("# GENERATED FILE: all modifications will be overwritten.", file=f) print(file=f) for clsname, (prefix, suffix, enumname) in sorted(clsnames.items()): - print("cdef object _", clsname, file=f, sep='') + print("cdef object _", clsname, file=f, sep="") print(file=f) for clsname, (prefix, suffix, enumname) in sorted(clsnames.items()): - print("global ", clsname, file=f, sep='') + print("global ", clsname, file=f, sep="") print(file=f) for clsname in sorted(clsnames): print(file=f) - print("cdef object ", underscore(clsname), "_for_name(name):", file=f, sep='') - print(" return _generic_id_for_name(_", clsname, ", name)", file=f, sep='') + print("cdef object ", underscore(clsname), "_for_name(name):", file=f, sep="") + print(" return _generic_id_for_name(_", clsname, ", name)", file=f, sep="") for clsname, (prefix, suffix, enumname) in sorted(clsnames.items()): definition = enums[enumname] print(file=f) print(file=f) - print("_", clsname, " = IntEnum(", repr(clsname), ", {", file=f, sep='') - for valuename in sorted(definition['values']): + print("_", clsname, " = IntEnum(", repr(clsname), ", {", file=f, sep="") + for valuename in sorted(definition["values"]): pretty_name = valuename if prefix and pretty_name.startswith(prefix): - pretty_name = pretty_name[len(prefix):] + pretty_name = pretty_name[len(prefix) :] if suffix and pretty_name.endswith(suffix): - pretty_name = pretty_name[:-len(suffix)] + pretty_name = pretty_name[: -len(suffix)] pretty_name = underscore(pretty_name) - if pretty_name.startswith('n_tidy_'): + if pretty_name.startswith("n_tidy_"): continue if iskeyword(pretty_name): - pretty_name = pretty_name + '_' - - print(" ", repr(pretty_name), ": <", definition['type'], "> ", valuename, ",", file=f, sep='') + pretty_name = pretty_name + "_" + + print( + " ", + repr(pretty_name), + ": <", + definition["type"], + "> ", + valuename, + ",", + file=f, + sep="", + ) print("})", file=f) print(file=f) - print("_", clsname, ".for_name = ", underscore(clsname), "_for_name", file=f, sep='') + print( + "_", + clsname, + ".for_name = ", + underscore(clsname), + "_for_name", + file=f, + sep="", + ) print(file=f) - print(clsname, " = _", clsname, file=f, sep='') + print(clsname, " = _", clsname, file=f, sep="") diff --git a/lib/VERSION.txt b/lib/VERSION.txt index dc5c43b..693318c 100644 --- a/lib/VERSION.txt +++ b/lib/VERSION.txt @@ -1 +1 @@ -"2021.8.7" +"2024.10.24" diff --git a/lib/_allocator.pyx b/lib/_allocator.pyx index 44404f8..f2d4951 100644 --- a/lib/_allocator.pyx +++ b/lib/_allocator.pyx @@ -1,8 +1,8 @@ -cdef void *allocator_realloc_raw(TidyAllocator *self, void *block, size_t nBytes) nogil: +cdef void *allocator_realloc_raw(TidyAllocator *self, void *block, size_t nBytes) noexcept nogil: return PyMem_RawRealloc(block, nBytes) -cdef void *allocator_realloc_gil(TidyAllocator *self, void *block, size_t nBytes) nogil: +cdef void *allocator_realloc_gil(TidyAllocator *self, void *block, size_t nBytes) noexcept nogil: cdef void *result with gil: result = ObjectRealloc(block, nBytes) @@ -12,23 +12,23 @@ cdef void *allocator_realloc_gil(TidyAllocator *self, void *block, size_t nBytes return result -cdef void *allocator_alloc_raw(TidyAllocator *self, size_t nBytes) nogil: +cdef void *allocator_alloc_raw(TidyAllocator *self, size_t nBytes) noexcept nogil: return allocator_realloc_raw(self, NULL, nBytes) -cdef void *allocator_alloc_gil(TidyAllocator *self, size_t nBytes) nogil: +cdef void *allocator_alloc_gil(TidyAllocator *self, size_t nBytes) noexcept nogil: return allocator_realloc_gil(self, NULL, nBytes) -cdef void allocator_free_raw(TidyAllocator *self, void *block) nogil: +cdef void allocator_free_raw(TidyAllocator *self, void *block) noexcept nogil: allocator_realloc_raw(self, block, 0) -cdef void allocator_free_gil(TidyAllocator *self, void *block) nogil: +cdef void allocator_free_gil(TidyAllocator *self, void *block) noexcept nogil: allocator_realloc_gil(self, block, 0) -cdef void allocator_panic(TidyAllocator *self, ctmbstr msg) nogil: +cdef void allocator_panic(TidyAllocator *self, ctmbstr msg) noexcept nogil: with gil: raise SystemError(unicode(msg, 'UTF-8', 'replace')) diff --git a/lib/_enum_by_name.pyx b/lib/_enum_by_name.pyx index 380fcbf..84531a7 100644 --- a/lib/_enum_by_name.pyx +++ b/lib/_enum_by_name.pyx @@ -1,6 +1,6 @@ cdef cppclass _GetEnum [T, V]: @staticmethod - inline object _do(T elem, V fn(T) nogil, EnumCls): + inline object _do(T elem, V fn(T) noexcept nogil, EnumCls): cdef V value cdef object result = None @@ -19,7 +19,7 @@ cdef cppclass _EnsureId [V]: @staticmethod inline Py_ssize_t _do( object name, - V fn(ctmbstr) nogil, + V fn(ctmbstr) noexcept nogil, Py_ssize_t min_excl, Py_ssize_t max_excl, ) except -2: diff --git a/lib/_import_python.pyx b/lib/_import_python.pyx index 0cf83e6..1f4ba34 100644 --- a/lib/_import_python.pyx +++ b/lib/_import_python.pyx @@ -32,13 +32,13 @@ cdef extern from 'Python.h': PyUnicode_4BYTE_KIND int PyUnicode_READY(object o) except -1 - Py_ssize_t PyUnicode_GET_LENGTH(object o) nogil - int PyUnicode_KIND(object o) nogil - boolean PyUnicode_IS_ASCII(object) nogil - Py_UCS1 *PyUnicode_1BYTE_DATA(object o) nogil - Py_UCS2 *PyUnicode_2BYTE_DATA(object o) nogil - Py_UCS4 *PyUnicode_4BYTE_DATA(object o) nogil - void *PyUnicode_DATA(object o) nogil + Py_ssize_t PyUnicode_GET_LENGTH(object o) noexcept nogil + int PyUnicode_KIND(object o) noexcept nogil + boolean PyUnicode_IS_ASCII(object) noexcept nogil + Py_UCS1 *PyUnicode_1BYTE_DATA(object o) noexcept nogil + Py_UCS2 *PyUnicode_2BYTE_DATA(object o) noexcept nogil + Py_UCS4 *PyUnicode_4BYTE_DATA(object o) noexcept nogil + void *PyUnicode_DATA(object o) noexcept nogil int PyDict_SetItemString(object p, const char *key, object val) except -1 @@ -50,8 +50,8 @@ cdef extern from 'Python.h': object PyMemoryView_FromObject(object) object PyByteArray_FromStringAndSize(const char*, Py_ssize_t) - char *PyByteArray_AS_STRING(object) nogil - Py_ssize_t PyByteArray_GET_SIZE(object) nogil + char *PyByteArray_AS_STRING(object) noexcept nogil + Py_ssize_t PyByteArray_GET_SIZE(object) noexcept nogil ctypedef signed long Py_hash ctypedef signed short wchar_t @@ -71,14 +71,12 @@ cdef extern from 'Python.h': ctypedef struct PyASCIIObject: Py_ssize_t length Py_hash hash - wchar_t *wstr __ascii_object_state state ctypedef struct PyCompactUnicodeObject: # PyASCIIObject Py_ssize_t utf8_length char *utf8 - Py_ssize_t wstr_length ctypedef struct PyVarObject: pass @@ -99,7 +97,7 @@ cdef extern from 'Python.h': boolean RaiseIfErrOccurred 'PyErr_Occurred'() except True AlwaysTrue RaiseErrOccurred 'PyErr_Occurred'() except True - void *PyMem_RawRealloc(void *p, size_t n) nogil + void *PyMem_RawRealloc(void *p, size_t n) noexcept nogil PyObject *Py_None diff --git a/lib/_import_tidy.pyx b/lib/_import_tidy.pyx index dfb4129..6093ec6 100644 --- a/lib/_import_tidy.pyx +++ b/lib/_import_tidy.pyx @@ -1,10 +1,10 @@ cdef extern from 'tidy.h' nogil: ctypedef struct TidyAllocator - ctypedef void *__allocator_alloc(TidyAllocator *self, size_t nBytes) - ctypedef void *__allocator_realloc(TidyAllocator *self, void *block, size_t nBytes) - ctypedef void __allocator_free(TidyAllocator *self, void *block) - ctypedef void __allocator_panic(TidyAllocator *self, ctmbstr msg) + ctypedef void *__allocator_alloc(TidyAllocator *self, size_t nBytes) noexcept + ctypedef void *__allocator_realloc(TidyAllocator *self, void *block, size_t nBytes) noexcept + ctypedef void __allocator_free(TidyAllocator *self, void *block) noexcept + ctypedef void __allocator_panic(TidyAllocator *self, ctmbstr msg) noexcept ctypedef struct TidyAllocatorVtbl: __allocator_alloc alloc @@ -15,11 +15,11 @@ cdef extern from 'tidy.h' nogil: ctypedef struct TidyAllocator: const TidyAllocatorVtbl *vtbl - ctypedef int TidyGetByteFunc(void *sourceData) - ctypedef void TidyUngetByteFunc(void *sourceData, byte bt) - ctypedef Bool TidyEOFFunc(void *sourceData) + ctypedef int TidyGetByteFunc(void *sourceData) noexcept + ctypedef void TidyUngetByteFunc(void *sourceData, byte bt) noexcept + ctypedef Bool TidyEOFFunc(void *sourceData) noexcept - ctypedef void TidyPutByteFunc(void *sinkData, byte bt) + ctypedef void TidyPutByteFunc(void *sinkData, byte bt) noexcept ctypedef struct TidyInputSource: void *sourceData @@ -48,14 +48,14 @@ cdef extern from 'tidy.h' nogil: ctypedef __TidyMessage *TidyMessage ctypedef __TidyMessageArgument *TidyMessageArgument - ctmbstr tidyReleaseDate() - ctmbstr tidyLibraryVersion() - ctmbstr tidyPlatform() + ctmbstr tidyReleaseDate() noexcept + ctmbstr tidyLibraryVersion() noexcept + ctmbstr tidyPlatform() noexcept - Bool tidySetLanguage(ctmbstr languageCode) - ctmbstr tidyGetLanguage() + Bool tidySetLanguage(ctmbstr languageCode) noexcept + ctmbstr tidyGetLanguage() noexcept - ctypedef Bool TidyMessageCallback(TidyMessage tmessage) + ctypedef Bool TidyMessageCallback(TidyMessage tmessage) noexcept # Document diff --git a/lib/_imports.pyx b/lib/_imports.pyx index 0864d33..b21bb64 100644 --- a/lib/_imports.pyx +++ b/lib/_imports.pyx @@ -72,6 +72,9 @@ cdef extern from 'native.hpp' namespace 'PyTidyHtml5' nogil: Out _reinterpret_cast[Out, In](In ptr) int call_bool_fn_1(object fn, object arg1) void reset_hash[T](T *obj) + void reset_wstr[T](T *obj) + void reset_wstr_length[T](T *obj) + void set_ready[T](T *obj) ctypedef boolean AlwaysTrue diff --git a/lib/_input_source.pyx b/lib/_input_source.pyx index e50eb13..81786b9 100644 --- a/lib/_input_source.pyx +++ b/lib/_input_source.pyx @@ -90,25 +90,25 @@ cdef class FiledescriptorSource(InputSource): self.close() @staticmethod - cdef int _get_byte(void *sourceData) nogil: + cdef int _get_byte(void *sourceData) noexcept nogil: return ( sourceData).get_byte() @staticmethod - cdef void _unget_byte(void *sourceData, byte bt) nogil: + cdef void _unget_byte(void *sourceData, byte bt) noexcept nogil: ( sourceData).unget_byte(bt) @staticmethod - cdef Bool _eof(void *sourceData) nogil: + cdef Bool _eof(void *sourceData) noexcept nogil: return ( sourceData).eof() - cdef void unget_byte(FiledescriptorSource self, byte bt) nogil: + cdef void unget_byte(FiledescriptorSource self, byte bt) noexcept nogil: cdef Py_ssize_t index self.pushback_remaining += 1 index = self.pushback_length - self.pushback_remaining PyByteArray_AS_STRING(self.buffer)[index] = bt - cdef int get_byte(FiledescriptorSource self) nogil: + cdef int get_byte(FiledescriptorSource self) noexcept nogil: cdef uint8_t result cdef Py_ssize_t index cdef Py_ssize_t pushback_remaining = self.pushback_remaining @@ -124,7 +124,7 @@ cdef class FiledescriptorSource(InputSource): return result - cdef Bool eof(FiledescriptorSource self) nogil: + cdef Bool eof(FiledescriptorSource self) noexcept nogil: cdef ssize_t count if self.pushback_remaining > 0: diff --git a/lib/_input_ucs.pyx b/lib/_input_ucs.pyx index e96aa33..d16c872 100644 --- a/lib/_input_ucs.pyx +++ b/lib/_input_ucs.pyx @@ -7,7 +7,7 @@ ctypedef fused Codepoint: uint32_t -cdef inline int encode_utf8(SourceData *source_data, Codepoint v) nogil: +cdef inline int encode_utf8(SourceData *source_data, Codepoint v) noexcept nogil: cdef int32_t codepoint = v if (Codepoint is ascii_char) or (codepoint < 0x80): @@ -36,7 +36,7 @@ cdef inline int encode_utf8(SourceData *source_data, Codepoint v) nogil: return b' ' -cdef inline int ucsGetByteFunc(SourceData *source_data, Codepoint *data) nogil: +cdef inline int ucsGetByteFunc(SourceData *source_data, Codepoint *data) noexcept nogil: cdef int result result = source_data.pushback @@ -55,22 +55,22 @@ cdef inline int ucsGetByteFunc(SourceData *source_data, Codepoint *data) nogil: return result -cdef int asciiGetByteFunc(void *sourceData_) nogil: +cdef int asciiGetByteFunc(void *sourceData_) noexcept nogil: cdef SourceData *source_data = sourceData_ return ucsGetByteFunc(source_data, source_data.string) -cdef int ucs1GetByteFunc(void *sourceData_) nogil: +cdef int ucs1GetByteFunc(void *sourceData_) noexcept nogil: cdef SourceData *source_data = sourceData_ return ucsGetByteFunc(source_data, source_data.string) -cdef int ucs2GetByteFunc(void *sourceData_) nogil: +cdef int ucs2GetByteFunc(void *sourceData_) noexcept nogil: cdef SourceData *source_data = sourceData_ return ucsGetByteFunc(source_data, source_data.string) -cdef int ucs4GetByteFunc(void *sourceData_) nogil: +cdef int ucs4GetByteFunc(void *sourceData_) noexcept nogil: cdef SourceData *source_data = sourceData_ return ucsGetByteFunc(source_data, source_data.string) @@ -84,12 +84,12 @@ ctypedef struct SourceData: UChar3 sub_string -cdef void ungetByteFunc(void *sourceData_, byte bt) nogil: +cdef void ungetByteFunc(void *sourceData_, byte bt) noexcept nogil: cdef SourceData *source_data = sourceData_ source_data.pushback = bt -cdef Bool eofFunc(void *sourceData_) nogil: +cdef Bool eofFunc(void *sourceData_) noexcept nogil: cdef SourceData *source_data = sourceData_ if source_data.remaining != 0: return no diff --git a/lib/_output_buffer.pyx b/lib/_output_buffer.pyx index a19b4a2..72b7bb2 100644 --- a/lib/_output_buffer.pyx +++ b/lib/_output_buffer.pyx @@ -200,17 +200,17 @@ cdef class StringBuffer(Buffer): ( result).length = length reset_hash( result) - ( result).wstr = NULL + reset_wstr( result) ( result).state.interned = SSTATE_NOT_INTERNED ( result).state.kind = PyUnicode_1BYTE_KIND ( result).state.compact = True - ( result).state.ready = True + set_ready( result) ( result).state.ascii = is_ascii if not is_ascii: ( result).utf8_length = 0 ( result).utf8 = NULL - ( result).wstr_length = 0 + reset_wstr_length( result) self.tidy_buffer.allocator = NULL self.tidy_buffer.bp = NULL diff --git a/lib/_output_sink.pyx b/lib/_output_sink.pyx index df30181..245939e 100644 --- a/lib/_output_sink.pyx +++ b/lib/_output_sink.pyx @@ -89,17 +89,17 @@ cdef class CallbackSink(OutputSink): raise @staticmethod - cdef void put_byte_integer(void *sinkData, byte bt) nogil: + cdef void put_byte_integer(void *sinkData, byte bt) noexcept nogil: with gil: ( sinkData)._put_byte_integer(bt) @staticmethod - cdef void put_byte_bytes(void *sinkData, byte bt) nogil: + cdef void put_byte_bytes(void *sinkData, byte bt) noexcept nogil: with gil: ( sinkData)._put_byte_bytes(bt) @staticmethod - cdef void put_byte_latin1(void *sinkData, byte bt) nogil: + cdef void put_byte_latin1(void *sinkData, byte bt) noexcept nogil: with gil: ( sinkData)._put_byte_latin(bt) @@ -168,7 +168,7 @@ cdef class FiledescriptorSink(OutputSink): if result < 0: RaiseErrOccurred() - cdef ssize_t _flush(FiledescriptorSink self) nogil: + cdef ssize_t _flush(FiledescriptorSink self) noexcept nogil: cdef ssize_t result cdef Py_ssize_t filled = self.filled cdef int fd = self.fd @@ -193,7 +193,7 @@ cdef class FiledescriptorSink(OutputSink): return 0 @staticmethod - cdef void put_byte(void *sinkData, byte bt) nogil: + cdef void put_byte(void *sinkData, byte bt) noexcept nogil: cdef Py_ssize_t *empty = &( sinkData).empty cdef Py_ssize_t *filled = &( sinkData).filled cdef char *buf = PyByteArray_AS_STRING(( sinkData).buffer) @@ -234,5 +234,5 @@ cdef class VoidSink(OutputSink): pass @staticmethod - cdef void put_byte(void *sinkData, byte bt) nogil: + cdef void put_byte(void *sinkData, byte bt) noexcept nogil: pass diff --git a/lib/_tidy_attr.pyx b/lib/_tidy_attr.pyx index a31361b..1ccb445 100644 --- a/lib/_tidy_attr.pyx +++ b/lib/_tidy_attr.pyx @@ -28,7 +28,7 @@ cdef class Attr: 'An Attr is truthy if the attribute exists and the Node did not expire.' ) - cdef inline boolean _nonzero(Attr self) nogil: + cdef inline boolean _nonzero(Attr self) noexcept nogil: if self is None: return False elif self.tidy_attr is NULL: diff --git a/lib/_tidy_document.pyx b/lib/_tidy_document.pyx index 1304031..7f9a146 100644 --- a/lib/_tidy_document.pyx +++ b/lib/_tidy_document.pyx @@ -38,7 +38,7 @@ cdef class DocumentIterOptions: self.tidy_iterator = tidy_iterator self.document = document - cdef boolean _nonzero__(DocumentIterOptions self) nogil: + cdef boolean _nonzero__(DocumentIterOptions self) noexcept nogil: if self is None: return False elif self.tidy_iterator is NULL: @@ -95,7 +95,7 @@ cdef class DocumentIterOptionIds: self.tidy_iterator = tidy_iterator self.document = document - cdef inline boolean _nonzero(DocumentIterOptionIds self) nogil: + cdef inline boolean _nonzero(DocumentIterOptionIds self) noexcept nogil: if self is None: return False elif self.tidy_iterator is NULL: @@ -155,7 +155,7 @@ cdef class DocumentIterDeclTags: self.document = document self.option_id = option_id - cdef inline boolean _nonzero(DocumentIterDeclTags self) nogil: + cdef inline boolean _nonzero(DocumentIterDeclTags self) noexcept nogil: if self is None: return False elif self.tidy_iterator is NULL: @@ -201,7 +201,7 @@ cdef class DocumentOptionsProxy: def __cinit__(DocumentOptionsProxy self, Document document): self.document = document - cdef inline boolean _nonzero(DocumentOptionsProxy self) nogil: + cdef inline boolean _nonzero(DocumentOptionsProxy self) noexcept nogil: if self is None: return False else: @@ -263,7 +263,7 @@ cdef class DocumentOptionsProxy: raise KeyError -cdef document_iter_ctmbstr_init(Document document, TidyIterator *out_tidy_iterator, TidyIterator fn(TidyDoc) nogil): +cdef document_iter_ctmbstr_init(Document document, TidyIterator *out_tidy_iterator, TidyIterator fn(TidyDoc) noexcept nogil): cdef TidyDoc tidy_doc cdef TidyIterator tidy_iterator @@ -287,7 +287,7 @@ cdef _result_to_outcome(int result): return parse_outcome -cdef document_iter_ctmbstr_next(PyObject **document, TidyIterator *tidy_iterator, ctmbstr fn(TidyDoc, TidyIterator*) nogil): +cdef document_iter_ctmbstr_next(PyObject **document, TidyIterator *tidy_iterator, ctmbstr fn(TidyDoc, TidyIterator*) noexcept nogil): cdef TidyDoc tidy_doc cdef TidyOption tidy_option cdef ctmbstr text @@ -321,7 +321,7 @@ cdef class DocumentIterPriorityAttrs: def __cinit__(DocumentIterPriorityAttrs self, Document document): self.document = document_iter_ctmbstr_init(document, &self.tidy_iterator, tidyOptGetPriorityAttrList) - cdef inline boolean _nonzero(DocumentIterPriorityAttrs self) nogil: + cdef inline boolean _nonzero(DocumentIterPriorityAttrs self) noexcept nogil: if self is None: return False elif self.tidy_iterator is not NULL: @@ -353,7 +353,7 @@ cdef class DocumentIterMutedMessages: def __cinit__(DocumentIterMutedMessages self, Document document): self.document = document_iter_ctmbstr_init(document, &self.tidy_iterator, tidyOptGetMutedMessageList) - cdef inline boolean _nonzero(DocumentIterMutedMessages self) nogil: + cdef inline boolean _nonzero(DocumentIterMutedMessages self) noexcept nogil: if self is None: return False elif self.tidy_iterator is NULL: @@ -416,7 +416,7 @@ cdef class Document: if tidy_doc is not NULL: tidyRelease(tidy_doc) - cdef inline boolean _nonzero(Document self) nogil: + cdef inline boolean _nonzero(Document self) noexcept nogil: if self is None: return False else: @@ -444,7 +444,7 @@ cdef class Document: else: return (self.tidy_doc is NULL) or (self.tidy_doc is not ( other).tidy_doc) - cdef object __get(Document self, TidyNode fn(TidyDoc) nogil): + cdef object __get(Document self, TidyNode fn(TidyDoc) noexcept nogil): cdef Node result cdef TidyDoc tidy_doc = self.tidy_doc cdef TidyNode tidy_node = NULL @@ -803,7 +803,7 @@ cdef class Document: ) cdef TidyInputSource input_source cdef int input_kind - cdef int (*get_fun)(void *sourceData_) nogil + cdef int (*get_fun)(void *sourceData_) noexcept nogil cdef TidyDoc tidy_doc = self.tidy_doc if tidy_doc is NULL: @@ -958,7 +958,7 @@ cdef class Document: self._set_message_callback(value) @staticmethod - cdef Bool message_callback_nogil(TidyMessage tidy_message) nogil: + cdef Bool message_callback_nogil(TidyMessage tidy_message) noexcept nogil: cdef TidyDoc tidy_doc = tidyGetMessageDoc(tidy_message) cdef void *app_data diff --git a/lib/_tidy_message.pyx b/lib/_tidy_message.pyx index cbfb009..39bb2b2 100644 --- a/lib/_tidy_message.pyx +++ b/lib/_tidy_message.pyx @@ -29,7 +29,7 @@ cdef class Message: ''' return self.document - cdef inline boolean _nonzero(Message self) nogil: + cdef inline boolean _nonzero(Message self) noexcept nogil: if self is None: return False elif self.tidy_message is NULL: @@ -322,7 +322,7 @@ cdef class MessageArg: def __init__(MessageArg self, Message message): self.message = message - cdef inline boolean _nonzero(MessageArg self) nogil: + cdef inline boolean _nonzero(MessageArg self) noexcept nogil: if self is None: return False elif self.tidy_arg is NULL: @@ -462,7 +462,7 @@ cdef class MessageIterArgs: self.tidy_iterator = tidy_iterator self.message = message - cdef inline boolean _nonzero(self) nogil: + cdef inline boolean _nonzero(self) noexcept nogil: if self is None: return False elif self.tidy_iterator is NULL: @@ -519,7 +519,7 @@ cdef class MessageIterValues: self.tidy_iterator = tidy_iterator self.message = message - cdef inline boolean _nonzero(self) nogil: + cdef inline boolean _nonzero(self) noexcept nogil: if self is None: return False elif self.tidy_iterator is NULL: diff --git a/lib/_tidy_node.pyx b/lib/_tidy_node.pyx index 3f0c96a..58f3658 100644 --- a/lib/_tidy_node.pyx +++ b/lib/_tidy_node.pyx @@ -8,7 +8,7 @@ cdef class NodeIterChildren: if parent is not None: self.node = parent.get_child() - cdef inline boolean _nonzero(NodeIterChildren self) nogil: + cdef inline boolean _nonzero(NodeIterChildren self) noexcept nogil: if self is None: return False else: @@ -38,7 +38,7 @@ cdef class NodeIterAttributes: if parent is not None: self.attr = parent.get_attr_first() - cdef inline boolean _nonzero(NodeIterAttributes self) nogil: + cdef inline boolean _nonzero(NodeIterAttributes self) noexcept nogil: if self is None: return False else: @@ -68,7 +68,7 @@ cdef class NodeIterAttributeIds: if parent is not None: self.attr = parent.get_attr_first() - cdef inline boolean _nonzero(NodeIterAttributeIds self) nogil: + cdef inline boolean _nonzero(NodeIterAttributeIds self) noexcept nogil: if self is None: return False else: @@ -105,7 +105,7 @@ cdef class NodeAttrProxy: def __cinit__(NodeAttrProxy self, Node node): self.node = node - cdef inline boolean _nonzero(NodeAttrProxy self) nogil: + cdef inline boolean _nonzero(NodeAttrProxy self) noexcept nogil: if self is None: return False else: @@ -176,7 +176,7 @@ cdef class Node: 'and the document has was not been released in the meantime.' ) - cdef inline boolean _nonzero(Node self) nogil: + cdef inline boolean _nonzero(Node self) noexcept nogil: if self is None: return False elif self.tidy_node is NULL: @@ -216,7 +216,7 @@ cdef class Node: else: return (self.tidy_node is NULL) or (self.tidy_node is not ( other).tidy_node) - cdef object __get_node(Node self, TidyNode fn(TidyNode) nogil): + cdef object __get_node(Node self, TidyNode fn(TidyNode) noexcept nogil): cdef Node result cdef TidyNode tidy_node = self.tidy_node diff --git a/lib/_tidy_options.pyx b/lib/_tidy_options.pyx index 0b72e11..ac8416d 100644 --- a/lib/_tidy_options.pyx +++ b/lib/_tidy_options.pyx @@ -19,7 +19,7 @@ cdef class OptionPicklist: self.tidy_iterator = tidy_iterator self.option = option - cdef inline boolean _nonzero(OptionPicklist self) nogil: + cdef inline boolean _nonzero(OptionPicklist self) noexcept nogil: if self is None: return False elif self.tidy_iterator is NULL: @@ -80,7 +80,7 @@ cdef class OptionIterLinkedOptions: self.tidy_iterator = tidy_iterator self.document = document - cdef inline boolean _nonzero(OptionIterLinkedOptions self) nogil: + cdef inline boolean _nonzero(OptionIterLinkedOptions self) noexcept nogil: if self is None: return False elif self.tidy_iterator is NULL: @@ -139,7 +139,7 @@ cdef class Option: 'and the document has was not been released in the meantime.' ) - cdef inline boolean _nonzero(Option self) nogil: + cdef inline boolean _nonzero(Option self) noexcept nogil: if self is None: return False elif self.tidy_option is NULL: diff --git a/lib/native.hpp b/lib/native.hpp index 444261f..6a4f8d2 100644 --- a/lib/native.hpp +++ b/lib/native.hpp @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include "Python.h" @@ -19,9 +18,9 @@ const char LONGDESCRIPTION[] = const std::size_t VERSION_LENGTH = sizeof(VERSION) - 1; const std::size_t LONGDESCRIPTION_LENGTH = sizeof(LONGDESCRIPTION) - 1; -using UChar3 = std::array; +using UChar3 = unsigned char[3]; -constexpr const UChar3 utf8_bom{{0xBFu, 0xBBu, 0xEFu}}; +constexpr const UChar3 utf8_bom{0xBFu, 0xBBu, 0xEFu}; template ctmbstr _text_fn(Type elem, ctmbstr fn(Type, ArgsFn...), ArgsIn &&...args) { @@ -133,16 +132,23 @@ int call_bool_fn_1(PyObject *fn, PyObject *arg1) { } +template +struct VoidT_ { + using Value = void*; +}; + +// hash | ob_shash + template struct has_ob_shash { - template static std::uint8_t test(decltype(&C::ob_shash)) ; + template static std::uint8_t test(typename VoidT_().ob_shash, true))>::Value); template static std::uint64_t test(...); enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; }; template struct has_hash { - template static std::uint8_t test(decltype(&C::hash)) ; + template static std::uint8_t test(typename VoidT_().hash, true))>::Value); template static std::uint64_t test(...); enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; }; @@ -176,5 +182,98 @@ static inline void reset_hash(T *obj) { ResetHash_::reset(obj); } +// wstr + +template +struct has_wstr { + template static std::uint8_t test(typename VoidT_().wstr, true))>::Value); + template static std::uint64_t test(...); + enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; +}; + +template::value> +struct ResetWstr_; + +template +struct ResetWstr_ { + static inline void reset(T *obj) { + obj->wstr = nullptr; // CPython >= 3.12: absent + } +}; + +template +struct ResetWstr_ { + static inline void reset(T *) { + (void) 0; + } +}; + +template +static inline void reset_wstr(T *obj) { + ResetWstr_::reset(obj); +} + +// ready + +template +struct has_ready { + template static std::uint8_t test(typename VoidT_().state.ready, true))>::Value); + template static std::uint64_t test(...); + enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; +}; + +template::value> +struct SetReady_; + +template +struct SetReady_ { + static inline void set(T *obj) { + obj->state.ready = true; // CPython >= 3.12: absent + } +}; + +template +struct SetReady_ { + static inline void set(T *) { + (void) 0; + } +}; + +template +static inline void set_ready(T *obj) { + SetReady_::set(obj); +} + +// wstr_length + +template +struct has_wstr_length { + template static std::uint8_t test(typename VoidT_().wstr_length, true))>::Value); + template static std::uint64_t test(...); + enum { value = sizeof(test(0)) == sizeof(std::uint8_t) }; +}; + +template::value> +struct ResetWstrLength_; + +template +struct ResetWstrLength_ { + static inline void reset(T *obj) { + obj->wstr_length = nullptr; // CPython >= 3.12: absent + } +}; + +template +struct ResetWstrLength_ { + static inline void reset(T *) { + (void) 0; + } +}; + +template +static inline void reset_wstr_length(T *obj) { + ResetWstrLength_::reset(obj); +} + } } diff --git a/lib/pytidyhtml5/__init__.py b/lib/pytidyhtml5/__init__.py new file mode 100644 index 0000000..f86c519 --- /dev/null +++ b/lib/pytidyhtml5/__init__.py @@ -0,0 +1,72 @@ +# Copyright 2019-2024 René Kijewski +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import _pytidyhtml5 + +from _pytidyhtml5 import * +from ._doc import BUILDING_DOC + + +__all__ = _pytidyhtml5.__all__ +__version__ = _pytidyhtml5.__version__ + + +if BUILDING_DOC: + from enum import IntEnum + + def _omit_signature(doc): + if doc: + return "\n\n".join(doc.split("\n\n", 1)[1:]).strip() + + for name in __all__: + obj = getattr(_pytidyhtml5, name) + if isinstance(obj, type): + if issubclass(obj, IntEnum): + src = [f"class {name}(IntEnum):"] + src.extend(f" {__elem.name} = {int(__elem)}" for __elem in obj) + src.extend( + ( + " __slots__ = ()", + " __name__ = __qualname__ = obj.__name__", + " __doc__ = obj.__doc__", + ) + ) + eval(compile("\n".join(src), __file__, "exec")) + else: + src = [f"class {name}:"] + src.extend( + f" {__name} = obj.{__name}" + for __name in vars(obj) + if __name not in ("__new__", "__pyx_vtable__") + ) + src.extend( + f" {__name} = property(doc=_omit_signature(obj.{__name}.__doc__))" + for __name in vars(obj) + if isinstance(getattr(obj, __name), property) + ) + src.extend( + ( + " __slots__ = ()", + " __doc__ = obj.__doc__", + " __mro__ = obj.__mro__", + ) + ) + if hasattr(obj, "_non_zero_doc"): + src.extend( + ( + " def __bool__(self): pass", + " __bool__.__doc__ = obj._non_zero_doc", + ) + ) + eval(compile("\n".join(src), __file__, "exec")) diff --git a/lib/pytidyhtml5/_doc.py b/lib/pytidyhtml5/_doc.py new file mode 100644 index 0000000..d0670b3 --- /dev/null +++ b/lib/pytidyhtml5/_doc.py @@ -0,0 +1,7 @@ +from os.path import basename, dirname +from sys import argv + + +BUILDING_DOC = (basename(argv[0]) in ("sphinx-build",)) or ( + basename(dirname(argv[0])) in ("sphinx",) +) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4388bd9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "Cython == 3.*", + "setuptools", +] +build-backend = "setuptools.build_meta" diff --git a/pytidyhtml5/__init__.py b/pytidyhtml5/__init__.py deleted file mode 100644 index 86c150b..0000000 --- a/pytidyhtml5/__init__.py +++ /dev/null @@ -1,55 +0,0 @@ -import _pytidyhtml5 - -from _pytidyhtml5 import * -from ._doc import BUILDING_DOC - - -__all__ = _pytidyhtml5.__all__ -__version__ = _pytidyhtml5.__version__ - - -if BUILDING_DOC: - from enum import IntEnum - - def _omit_signature(doc): - if doc: - return '\n\n'.join(doc.split('\n\n', 1)[1:]).strip() - - for name in __all__: - obj = getattr(_pytidyhtml5, name) - if isinstance(obj, type): - if issubclass(obj, IntEnum): - src = [f'class {name}(IntEnum):'] - src.extend( - f' {__elem.name} = {int(__elem)}' - for __elem in obj - ) - src.extend(( - ' __slots__ = ()', - ' __name__ = __qualname__ = obj.__name__', - ' __doc__ = obj.__doc__', - )) - eval(compile('\n'.join(src), __file__, 'exec')) - else: - src = [f'class {name}:'] - src.extend( - f' {__name} = obj.{__name}' - for __name in vars(obj) - if __name not in ('__new__', '__pyx_vtable__') - ) - src.extend( - f' {__name} = property(doc=_omit_signature(obj.{__name}.__doc__))' - for __name in vars(obj) - if isinstance(getattr(obj, __name), property) - ) - src.extend(( - ' __slots__ = ()', - ' __doc__ = obj.__doc__', - ' __mro__ = obj.__mro__', - )) - if hasattr(obj, '_non_zero_doc'): - src.extend(( - ' def __bool__(self): pass', - ' __bool__.__doc__ = obj._non_zero_doc', - )) - eval(compile('\n'.join(src), __file__, 'exec')) diff --git a/pytidyhtml5/_doc.py b/pytidyhtml5/_doc.py deleted file mode 100644 index 5b7e56e..0000000 --- a/pytidyhtml5/_doc.py +++ /dev/null @@ -1,9 +0,0 @@ -from os.path import basename, dirname -from sys import argv - - -BUILDING_DOC = (basename(argv[0]) in ( - 'sphinx-build', -)) or (basename(dirname(argv[0])) in ( - 'sphinx', -)) diff --git a/readthedocs.yml b/readthedocs.yml index 22bef0e..052748d 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -5,10 +5,11 @@ version: 2 sphinx: configuration: docs/conf.py -formats: - - pdf +build: + os: ubuntu-24.04 + tools: + python: "3.12" python: - version: "3.8" install: - - requirements: requirements.readthedocs.txt + - requirements: requirements-readthedocs.txt diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..3dd9abc --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,12 @@ +black +build +clang == 17.* +cython == 3.* +inflection == 0.5.* +pytest == 8.* +wheel + +# keep synchronous to requirements-readthedocs.txt +sphinx == 8.* +sphinx_autodoc_typehints == 2.* +sphinx_rtd_theme == 3.* diff --git a/requirements-readthedocs.txt b/requirements-readthedocs.txt new file mode 100644 index 0000000..4549e30 --- /dev/null +++ b/requirements-readthedocs.txt @@ -0,0 +1,7 @@ +# keep synchronous to src/VERSION.inc +pytidyhtml5 == 2024.10.24 + +# keep synchronous to requirements.txt +sphinx == 8.* +sphinx_autodoc_typehints == 2.* +sphinx_rtd_theme == 3.* diff --git a/requirements.readthedocs.txt b/requirements.readthedocs.txt deleted file mode 100644 index ff263bf..0000000 --- a/requirements.readthedocs.txt +++ /dev/null @@ -1,4 +0,0 @@ -pytidyhtml5 == 2021.8.7 -sphinx == 3.*, >= 3.4 -sphinx_autodoc_typehints == 1.*, >= 1.3, < 1.8 -sphinx_rtd_theme == 0.*, >= 0.4 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 254af0c..0000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -cibuildwheel == 2.* -clang == 6.*, >= 6.0.0.1 -cython == 0.*, >= 0.28.4 -sphinx_autodoc_typehints == 1.*, >= 1.3, < 1.8 -sphinx_rtd_theme == 0.*, >= 0.4 -wheel == 0.*, >= 0.31.0 -inflection == 0.*, >= 0.3.1 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..e6e6832 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,61 @@ +[metadata] +# keep synchronous to requirements-readthedocs.txt +# keep synchronous to src/VERSION.inc +version = 2024.10.24 + +name = pytidyhtml5 +description = HTML clean-up and repair: Statically linked Python interface for tidy-html5. +url = https://github.com/Kijewski/pytidyhtml5 +project_urls = + Code = https://github.com/Kijewski/pytidyhtml5 + Documentation = https://pytidyhtml5.readthedocs.io/ + Download = https://pypi.org/project/pytidyhtml5/ + Homepage = https://github.com/Kijewski/pytidyhtml5 + Tracker = https://github.com/Kijewski/pytidyhtml5/issues + +author = René Kijewski +maintainer = René Kijewski +author_email = pypi.org@k6i.de +maintainer_email = pypi.org@k6i.de + +long_description = file: README.rst +long_description_content_type = text/x-rst + +license = Apache License 2.0 +license_files = LICENSE + +classifiers = + Development Status :: 5 - Production/Stable + Intended Audience :: Developers + Intended Audience :: System Administrators + License :: OSI Approved :: Apache Software License + Operating System :: POSIX :: Linux + Operating System :: MacOS :: MacOS X + Operating System :: Microsoft :: Windows + Programming Language :: Cython + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 + Programming Language :: Python :: 3.13 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: Implementation :: CPython + Programming Language :: Python :: Implementation :: PyPy + Topic :: Text Processing :: Markup :: HTML + +[options] +zip_safe = False + +python_requires = ~= 3.6 +setup_requires = + Cython == 3.* + setuptools + +include_package_data = True +packages = pytidyhtml5 +package_dir = + = lib diff --git a/setup.py b/setup.py index 07f028c..9fb2395 100644 --- a/setup.py +++ b/setup.py @@ -1,130 +1,33 @@ #!/usr/bin/env python3 -from os.path import dirname, join, abspath -from os import environ -from platform import system from setuptools import setup, Extension -from subprocess import check_output -from sys import argv -from traceback import print_last -def get_text(name): - root = abspath(dirname(__file__)) - with open(join(root, 'lib', name), 'rt') as f: - return eval(f.read().strip()) - - -if system() == 'Linux': - extra_compile_args = [ - '-std=c++11', '-flto', - '-O2', '-fomit-frame-pointer', '-fPIC', '-ggdb1', '-pipe', - '-D_FORTIFY_SOURCE=2', '-fstack-protector-strong', '--param=ssp-buffer-size=8', - '-isystem', 'tidy-html5/include/', - ] - extra_link_args = [ - *extra_compile_args, - 'tidy-html5/build/cmake/libtidy.a', - '-fPIC', - '-Wl,-zrelro,-znow,-zcombreloc,-znocommon,-znoexecstack', - ] -else: - # OSX or Windows - extra_compile_args = [ - '-std=c++11', '-flto', - '-O2', '-fomit-frame-pointer', '-fPIC', '-ggdb1', '-pipe', - '-D_FORTIFY_SOURCE=2', '-fstack-protector-strong', '--param=ssp-buffer-size=8', - '-isystem', 'tidy-html5/include/', - ] - extra_link_args = [ - *extra_compile_args, - 'tidy-html5/build/cmake/libtidy.a', - '-fPIC', - ] - -if system() == 'Windows': - # I cannot get cibuildwheel to accept my CC + CXX overrides. :( - # So monkey patching it is ... - - for line in check_output(['make', 'export-environ']).decode('UTF-8').splitlines(): - (key, value) = line.split('=', 1) - environ[key] = value - - import distutils.command.build_ext - - def customize_compiler(compiler): - compiler_settings = [ - environ['CXX'], '-m64', - '-Wall', '-Wno-unused-result', '-Wformat', '-Werror=format-security', '-Wdate-time', - '-O2', '-g', '-fwrapv', '-fstack-protector-strong', - '-DNDEBUG', '-D_FORTIFY_SOURCE=2', - - '-DMS_WIN64', # https://github.com/cython/cython/issues/3405#issuecomment-596975159 - ] - - compiler.preprocessor = [environ['CXX'], '-m64', '-E', '-Wdate-time', '-D_FORTIFY_SOURCE=2'] - compiler.compiler = [*compiler_settings] - compiler.compiler_cxx = [*compiler_settings] - compiler.compiler_so = [*compiler_settings, '-fPIC'] - compiler.linker_so = [*compiler_settings, '-shared'] - compiler.linker_exe = [*compiler_settings, '-fPIC'] - compiler.archiver = [environ['AR'], environ['ARFLAGS']] - compiler.ranlib = [environ['RANLIB']] - - class build_ext(distutils.command.build_ext.build_ext): - def run(self): - self.compiler = 'unix' - return super().run() - - def build_extensions(self): - print('self.compiler', repr(vars(self.compiler))) - return super().build_extensions() - - distutils.command.build_ext.customize_compiler = customize_compiler - distutils.command.build_ext.build_ext = build_ext - - -name = 'pytidyhtml5' +extra_compile_args = [ + "-std=c++11", + "-flto", + "-O3", + "-fPIC", + "-g0", + "-pipe", + "-isystem", + "tidy-html5/include/", +] +extra_link_args = [ + *extra_compile_args, + "tidy-html5/build/cmake/libtidy.a", +] + +name = "pytidyhtml5" setup( - name=name, - version=get_text('VERSION.txt'), - long_description=get_text('DESCRIPTION.txt'), - description='HTML clean-up and repair: Statically linked Python interface for tidy-html5.', - author='René Kijewski', - author_email='pypi.org@k6i.de', - maintainer='René Kijewski', - maintainer_email='pypi.org@k6i.de', - url='https://github.com/Kijewski/pytidyhtml5', - python_requires='~= 3.6', - zip_safe=False, - ext_modules=[Extension( - '_' + name, - sources=['_' + name + '.pyx'], - include_dirs=['lib'], - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, - language='c++', - )], - packages=[name], - platforms=['any'], - license='ISC', - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Intended Audience :: System Administrators', - 'License :: OSI Approved :: ISC License (ISCL)', - 'Operating System :: POSIX :: Linux', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: Microsoft :: Windows', - 'Programming Language :: Cython', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Topic :: Text Processing :: Markup :: HTML ', + ext_modules=[ + Extension( + "_" + name, + sources=["_" + name + ".cpp"], + include_dirs=["lib"], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + language="c++", + ) ], ) diff --git a/tidy-html5 b/tidy-html5 index 1ca3747..d08ddc2 160000 --- a/tidy-html5 +++ b/tidy-html5 @@ -1 +1 @@ -Subproject commit 1ca37471b48a3498f985509828cb3cf85ea129f8 +Subproject commit d08ddc2860aa95ba8e301343a30837f157977cba