diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..650050b
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,16 @@
+version: 2
+updates:
+ - package-ecosystem: "pip"
+ directory: "/"
+ schedule:
+ interval: "daily"
+
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "daily"
+
+ - package-ecosystem: "gitsubmodule"
+ directory: "/"
+ schedule:
+ interval: "daily"
diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
deleted file mode 100644
index c210579..0000000
--- a/.github/workflows/build_wheels.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: Build Manylinux Wheels
-
-on:
- workflow_dispatch:
- inputs:
-
-jobs:
- build_wheels:
- strategy:
- matrix:
- include:
- - os: windows-2022
- manylinux: notlinux
- archs: auto64
- name: windows
- - os: macos-12
- manylinux: notlinux
- archs: auto64
- name: macos
- - os: ubuntu-20.04
- manylinux: quay.io/pypa/manylinux2014_x86_64
- archs: auto
- name: manylinux2014
- - os: ubuntu-20.04
- manylinux: quay.io/pypa/manylinux_2_28_x86_64
- archs: auto
- name: manylinux_2_28
-
- name: Build ${{ matrix.os }} / ${{ matrix.manylinux }}
- runs-on: ${{ matrix.os }}
-
- steps:
- - uses: actions/checkout@v3
- with:
- submodules: true
-
- - run: git config --global --add safe.directory '*' || true
-
- - name: Setup python
- uses: actions/setup-python@v3
- with:
- python-version: "3.10"
-
- - name: Update pip
- run: python3 -m pip install -U pip wheel setuptools
-
- - name: Install requirements
- run: python3 -m pip install -Ur requirements.txt
-
- - name: Cythonize
- run: make _pytidyhtml5.cpp
-
- - name: Build wheels
- run: python3 -m cibuildwheel --output-dir wheelhouse-${{ matrix.name }}
- env:
- CIBW_SKIP: "cp27-* cp34-* cp35-* cp36-* pp*" # FIXME: Unicode strings are broken in Pypy
- CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux }}
- CIBW_MANYLINUX_I686_IMAGE: ${{ matrix.manylinux }}
- CIBW_ARCHS: ${{ matrix.archs }}
- CIBW_BEFORE_BUILD: make clean-artifacts && make tidy-html5/build/cmake/libtidy.a
- CIBW_BUILD_FRONTEND: build
- CIBW_TEST_COMMAND: "{project}/basic-sanity-test.py"
-
- - name: Store artifacts
- uses: actions/upload-artifact@v3
- with:
- name: Wheelhouse-${{ matrix.name }}
- path: ./wheelhouse-${{ matrix.name }}/*.whl
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..f86d19d
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,68 @@
+name: CI
+
+on: [push]
+
+jobs:
+ build:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os:
+ - ubuntu-latest
+ - macos-latest
+ - windows-latest
+ python:
+ - '3.8'
+ - '3.11'
+ - '3.13'
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: true
+
+ - name: Cache pip
+ uses: actions/cache@v4
+ with:
+ key: cache--${{ matrix.os }}--${{ matrix.python }}--${{ hashFiles('./requirements*.txt', './Makefile') }}
+ restore-keys: cache--${{ matrix.os }}--${{ matrix.python }}--
+ path: ~/.cache/pip
+
+ - name: Setup python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python }}
+
+ - name: Display Python version
+ run: python -c 'import sys; print(sys.version)'
+
+ - name: Update pip
+ run: python -m pip install -U pip wheel setuptools
+
+ - name: Install requirements
+ run: python -m pip install -Ur requirements-dev.txt
+
+ - name: Compile project
+ run: make install
+
+ - name: Run basic sanity test
+ run: python basic-sanity-test.py
+
+ black:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: true
+
+ - name: Setup python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Update black
+ run: python -m pip install -U black
+
+ - name: Run black
+ run: python -m black --check ./*.py ./docs/ ./lib/
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
deleted file mode 100644
index 5c8b804..0000000
--- a/.github/workflows/codeql-analysis.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-name: "CodeQL"
-
-on:
- push:
- branches: [ master ]
- pull_request:
- branches: [ master ]
- schedule:
- - cron: '18 6 * * 6'
-
-jobs:
- analyze:
- name: Analyze
- runs-on: ubuntu-latest
- permissions:
- actions: read
- contents: read
- security-events: write
-
- strategy:
- fail-fast: false
- matrix:
- language: [ 'cpp', 'python' ]
-
- steps:
- - name: Checkout repository
- uses: actions/checkout@v2
- with:
- submodules: true
-
- - name: Setup python
- uses: actions/setup-python@v2
- with:
- python-version: ${{ github.event.inputs.python }}
-
- - name: Initialize CodeQL
- uses: github/codeql-action/init@v1
- with:
- languages: ${{ matrix.language }}
-
- - name: Update pip
- run: python -m pip install -U pip wheel setuptools
-
- - name: Install requirements
- run: python -m pip install -Ur requirements.txt
-
- - name: Compile
- run: make bdist_wheel
-
- - name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v1
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
new file mode 100644
index 0000000..ec4acbc
--- /dev/null
+++ b/.github/workflows/linux.yml
@@ -0,0 +1,105 @@
+name: Build Wheels (Linux v3)
+
+on:
+ workflow_dispatch:
+ inputs:
+ platform:
+ required: true
+ default: x86_64 i686 aarch64 ppc64le s390x armv7l
+
+jobs:
+ define-matrix:
+ runs-on: ubuntu-latest
+ outputs:
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: true
+
+ - name: Setup python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.13'
+
+ - run: python -m pip install -U pip wheel setuptools
+ - run: python -m pip install -U 'cibuildwheel==2.*'
+
+ - id: set-matrix
+ run: |
+ TARGETS="$(python -m cibuildwheel --archs "${{ github.event.inputs.platform }}" --print-build-identifiers)"
+ echo 'matrix=["'$(echo $TARGETS | sed -e 's/ /","/g')'"]' >> $GITHUB_OUTPUT
+ env:
+ CIBW_BUILD_FRONTEND: build
+ CIBW_SKIP: 'cp27-* pp*'
+ CIBW_DEPENDENCY_VERSIONS: pinned
+ CIBW_PLATFORM: linux
+
+ build:
+ runs-on: ubuntu-latest
+
+ needs:
+ - define-matrix
+ strategy:
+ matrix:
+ only: ${{ fromJSON(needs.define-matrix.outputs.matrix) }}
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: true
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+ with:
+ platforms: all
+
+ - name: Cache pip
+ uses: actions/cache@v4
+ with:
+ key: cache--${{ hashFiles('./requirements-dev.txt') }}
+ path: ~/.cache/pip
+
+ - name: Setup python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.13'
+
+ - run: python -m pip install -U pip wheel setuptools
+ - run: python -m pip install -Ur requirements-dev.txt
+ - run: python -m pip install -U 'cibuildwheel==2.*'
+
+ - run: python -m cibuildwheel --output-dir wheelhouse --only ${{ matrix.only }}
+ env:
+ CIBW_BUILD_FRONTEND: build
+ CIBW_SKIP: 'cp27-* pp*'
+ CIBW_DEPENDENCY_VERSIONS: pinned
+ CIBW_PLATFORM: linux
+ CIBW_TEST_COMMAND: python {project}/basic-sanity-test.py
+ CIBW_BEFORE_BUILD: make prepare
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: ${{ matrix.only }}
+ path: ./wheelhouse
+ retention-days: 1
+
+ combine:
+ runs-on: ubuntu-latest
+ needs:
+ - define-matrix
+ - build
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ path: ./wheelhouse
+ - run: |
+ find -name '*.zip' -exec unzip '{}' ';'
+ find -name '*.zip' -exec rm '{}' +
+ find -name '*.whl' -exec mv -t. '{}' +
+ find -type d -delete
+ working-directory: ./wheelhouse
+ - uses: actions/upload-artifact@v4
+ with:
+ name: wheelhouse
+ path: ./wheelhouse
diff --git a/.gitignore b/.gitignore
index 1af79ad..55be68e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,11 @@
*.py[cdo]
-/env*/
-/build/
-/built_wheel/
-/cython_debug/
-/dist/
-/*.egg-info/
+env*/
+build/
+built_wheel/
+cython_debug/
+dist/
+*.egg-info/
*.c
*.cpp
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/LICENSE.md b/LICENSE.md
deleted file mode 100644
index 387c8fc..0000000
--- a/LICENSE.md
+++ /dev/null
@@ -1,13 +0,0 @@
-Copyright (c) 2019-2020 Freie Universität Berlin
-
-Permission to use, copy, modify, and/or distribute this software for any purpose
-with or without fee is hereby granted, provided that the above copyright notice
-and this permission notice appear in all copies.
-
-THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
-REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
-INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
-TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
-THIS SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
index a7a9a88..24d6901 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,3 +3,4 @@ include pytidyhtml5.cpp
include pytidyhtml5.pyx
recursive-include lib **
recursive-include pytidyhtml5 **
+include tidy-html5/build/cmake/libtidy.a
diff --git a/Makefile b/Makefile
index 947203c..155c5f7 100644
--- a/Makefile
+++ b/Makefile
@@ -4,18 +4,16 @@ all: sdist bdist_wheel docs
NAME := pytidyhtml5
-.PHONY: all sdist bdist_wheel clean docs prepare clean-generated clean-artifacts export-environ
+.PHONY: all sdist bdist_wheel clean docs prepare clean-generated clean-artifacts export-environ install
FILES := Makefile MANIFEST.in _${NAME}.pyx README.rst setup.py \
lib/native.hpp lib/VERSION.txt lib/DESCRIPTION.txt \
tidy-html5/build/cmake/libtidy.a
-TIDY_CFLAGS := -O2 -fomit-frame-pointer -flto
-TIDY_CFLAGS += -fPIC -ggdb1 -pipe
-TIDY_CFLAGS += -fstack-protector-strong --param=ssp-buffer-size=8
+TIDY_CFLAGS := -O3 -flto -fPIC -g0 -pipe
TIDY_CFLAGS += -fvisibility=internal -fmerge-all-constants
-TIDY_CFLAGS += -std=c11 -D_ISOC11_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE
+TIDY_CFLAGS += -std=c11 -D_ISOC11_SOURCE -D_GNU_SOURCE
CC:=$(shell which gcc clang g++ clang++ | head -n1 2> /dev/null)
@@ -87,14 +85,16 @@ prepare: _${NAME}.cpp ${FILES}
sdist: _${NAME}.cpp ${FILES}
-rm -- dist/${NAME}-*.tar.gz
- python3 setup.py sdist --format=gztar
- python3 setup.py sdist --format=xztar
+ python3 -m build --sdist
bdist_wheel: _${NAME}.cpp ${FILES} | sdist
-rm -- dist/${NAME}-*.whl
- python3 setup.py bdist_wheel
+ python3 -m build --wheel
-docs: bdist_wheel $(wildcard docs/* docs/*/*)
+install: bdist_wheel
+ python3 -m pip install --force dist/pytidyhtml5-*.whl
+
+docs: install $(wildcard docs/* docs/*/*)
-rm -r -- dist/html/
pip install --force dist/${NAME}-*.whl
python3 -m sphinx -M html docs/ dist/
diff --git a/_pytidyhtml5.pyx b/_pytidyhtml5.pyx
index 2115130..cbaf0f8 100644
--- a/_pytidyhtml5.pyx
+++ b/_pytidyhtml5.pyx
@@ -1,6 +1,19 @@
# distutils: language = c++
# cython: embedsignature = True, language_level = 3, warn.unreachable = True, warn.maybe_uninitialized = True
+# Copyright 2019-2024 René Kijewski
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
include 'lib/_import_tidy_platform.pyx'
include 'lib/_imports.pyx'
diff --git a/basic-sanity-test.py b/basic-sanity-test.py
index eaf0a35..6259c06 100755
--- a/basic-sanity-test.py
+++ b/basic-sanity-test.py
@@ -1,46 +1,52 @@
#!/usr/bin/env python3
-from pytidyhtml5 import AttrSortStrategy, LineEnding, OptionId, set_language, tidy_document
+from pytidyhtml5 import (
+ AttrSortStrategy,
+ LineEnding,
+ OptionId,
+ set_language,
+ tidy_document,
+)
options = {
- OptionId.alt_text: '',
- OptionId.doctype: 'html5',
- OptionId.drop_empty_elems: False,
- OptionId.drop_empty_paras: False,
- OptionId.fix_backslash: False,
- OptionId.force_output: True,
- OptionId.hide_comments: True,
- OptionId.html_out: True,
- OptionId.indent_attributes: False,
- OptionId.indent_cdata: False,
- OptionId.indent_content: False,
- OptionId.indent_spaces: 0,
- OptionId.join_classes: True,
- OptionId.join_styles: True,
- OptionId.mark: False,
- OptionId.merge_divs: False,
- OptionId.merge_emphasis: False,
- OptionId.merge_spans: False,
- OptionId.meta_charset: True,
- OptionId.newline: LineEnding.crlf,
- OptionId.num_entities: True,
- OptionId.quote_ampersand: True,
- OptionId.quote_marks: True,
- OptionId.quote_nbsp: True,
- OptionId.sort_attributes: AttrSortStrategy.alpha,
+ OptionId.alt_text: "",
+ OptionId.doctype: "html5",
+ OptionId.drop_empty_elems: False,
+ OptionId.drop_empty_paras: False,
+ OptionId.fix_backslash: False,
+ OptionId.force_output: True,
+ OptionId.hide_comments: True,
+ OptionId.html_out: True,
+ OptionId.indent_attributes: False,
+ OptionId.indent_cdata: False,
+ OptionId.indent_content: False,
+ OptionId.indent_spaces: 0,
+ OptionId.join_classes: True,
+ OptionId.join_styles: True,
+ OptionId.mark: False,
+ OptionId.merge_divs: False,
+ OptionId.merge_emphasis: False,
+ OptionId.merge_spans: False,
+ OptionId.meta_charset: True,
+ OptionId.newline: LineEnding.crlf,
+ OptionId.num_entities: True,
+ OptionId.quote_ampersand: True,
+ OptionId.quote_marks: True,
+ OptionId.quote_nbsp: True,
+ OptionId.sort_attributes: AttrSortStrategy.alpha,
}
-set_language('de')
+set_language("de")
expected = (
'\r\n\r\n
\r\n\r\n\r\n\r\n\r\nHallöchen\r\n\r\n\r\n',
"Zeile 1 Spalte 1 - Warnung: fehlende Deklaration\nZeile 1 Spalte 1 - Warnung: Klartext ist im Element nicht erlaubt\nZeile 1 Spalte 1 - Info: bereits vermerkt\nZeile 1 Spalte 1 - Warnung: füge implizites ein\nZeile 1 Spalte 1 - Warnung: füge fehlendes 'title' Element ein\nZeile 1 Spalte 10 - Info: Fehlendes wurde in hinzugefügt\n",
)
-actual = tidy_document('Hallöchen', options=options)
+actual = tidy_document("Hallöchen", options=options)
if expected != actual:
- print('actual=' + repr(actual))
+ print("actual=" + repr(actual))
raise SystemExit(1)
-print('OK')
+print("OK")
diff --git a/docs/conf.py b/docs/conf.py
index 71f18c9..e5c4e9d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,203 +1,63 @@
-# -*- coding: utf-8 -*-
-#
-# PyJSON5 documentation build configuration file, created by
-# sphinx-quickstart on Wed May 2 18:15:32 2018.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
import os
import sys
-sys.path.insert(0, os.path.abspath('..'))
-import pytidyhtml5
-import pytidyhtml5
+sys.path.insert(0, os.path.abspath(".."))
+import pytidyhtml5
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
extensions = [
- 'sphinx.ext.autodoc',
- 'sphinx.ext.autosummary',
- 'sphinx.ext.napoleon',
- 'sphinx.ext.intersphinx',
- 'sphinx.ext.inheritance_diagram',
- 'sphinx_autodoc_typehints',
- 'sphinx.ext.autosectionlabel',
+ "sphinx.ext.autodoc",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.napoleon",
+ "sphinx.ext.intersphinx",
+ "sphinx_autodoc_typehints",
+ "sphinx.ext.autosectionlabel",
]
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
-# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
-# General information about the project.
-project = u'PyTidyHTML5'
-copyright = u'2018-2021, René Kijewski'
-author = u'René Kijewski'
+project = "PyTidyHTML5"
+copyright = "2019-2024, René Kijewski"
+author = "René Kijewski"
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-with open(os.path.join(os.path.abspath('..'), 'lib', 'VERSION.txt'), 'rt') as f:
+with open(os.path.join(os.path.abspath(".."), "lib", "VERSION.txt"), "rt") as f:
# The full version, including alpha/beta/rc tags.
release = eval(f.read())
# The short X.Y version.
version = release # '.'.join(release.split('.', 2)[:2])
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
+language = "en"
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = []
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
-# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages. See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further. For a list of options available for each theme, see the
-# documentation.
+html_theme = "sphinx_rtd_theme"
html_theme_options = {
- 'navigation_depth': -1,
+ "navigation_depth": -1,
}
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-#html_static_path = ['_static']
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# This is required for the alabaster theme
-# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
html_sidebars = {
- '**': [
- 'localtoc.html',
- 'searchbox.html',
+ "**": [
+ "localtoc.html",
+ "searchbox.html",
]
}
-
-# -- Options for HTMLHelp output ------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'PyJSON5doc'
-
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
- # The paper size ('letterpaper' or 'a4paper').
- #
- # 'papersize': 'letterpaper',
-
- # The font size ('10pt', '11pt' or '12pt').
- #
- # 'pointsize': '10pt',
-
- # Additional stuff for the LaTeX preamble.
- #
- # 'preamble': '',
-
- # Latex figure (float) alignment
- #
- # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-# author, documentclass [howto, manual, or own class]).
-latex_documents = [
- (master_doc, 'PyJSON5.tex', u'PyJSON5 Documentation',
- u'René Kijewski', 'manual'),
-]
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
- (master_doc, 'pyjson5', u'PyJSON5 Documentation',
- [author], 1)
-]
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-# dir menu entry, description, category)
-texinfo_documents = [
- (master_doc, 'PyJSON5', u'PyJSON5 Documentation',
- author, 'PyJSON5', 'One line description of project.',
- 'Miscellaneous'),
-]
-
+htmlhelp_basename = "PyTidyHtml5doc"
display_toc = True
-autodoc_default_flags = ['members']
+autodoc_default_flags = ["members"]
autosummary_generate = True
intersphinx_mapping = {
- 'python': ('https://docs.python.org/3', None),
-}
-
-inheritance_graph_attrs = {
- 'size': '"6.0, 8.0"',
- 'fontsize': 32,
- 'bgcolor': 'transparent',
-}
-inheritance_node_attrs = {
- 'color': 'black',
- 'fillcolor': 'white',
- 'style': '"filled,solid"',
-}
-inheritance_edge_attrs = {
- 'penwidth': 1.2,
- 'arrowsize': 0.8,
+ "python": ("https://docs.python.org/3", None),
}
diff --git a/docs/document.rst b/docs/document.rst
index 7494038..84c9d81 100644
--- a/docs/document.rst
+++ b/docs/document.rst
@@ -5,3 +5,8 @@ Documents
.. autosummary::
~pytidyhtml5.Document
+
+
+.. autoclass:: pytidyhtml5.Document
+ :members:
+ :undoc-members:
diff --git a/generate_imports.py b/generate_imports.py
index 98d77d2..4ed0256 100755
--- a/generate_imports.py
+++ b/generate_imports.py
@@ -11,14 +11,19 @@
root = abspath(dirname(__file__))
-Config.set_library_file((sorted(
- glob('/usr/lib/llvm-*/lib/libclang.so.1'),
- key=lambda p: float(match(r'/usr/lib/llvm-(\d+(?:[.]\d+)?)/', p).group(1)),
- reverse=True,
-) or ('/usr/lib64/llvm/libclang.so',))[0])
+Config.set_library_file(
+ (
+ sorted(
+ glob("/usr/lib/llvm-*/lib/libclang.so.1"),
+ key=lambda p: float(match(r"/usr/lib/llvm-(\d+(?:[.]\d+)?)/", p).group(1)),
+ reverse=True,
+ )
+ or ("/usr/lib64/llvm/libclang.so",)
+ )[0]
+)
index = Index.create()
-tu = index.parse(join(root, 'generate_imports_transclusion.h'))
+tu = index.parse(join(root, "generate_imports_transclusion.h"))
enums = {}
@@ -28,105 +33,135 @@
if len(direct_children) != 1:
continue
- direct_child, = direct_children
+ (direct_child,) = direct_children
if direct_child.kind is not CursorKind.ENUM_DECL:
continue
enums[cursor.spelling] = {
- 'type': direct_child.enum_type.spelling,
- 'values': {
+ "type": direct_child.enum_type.spelling,
+ "values": {
definition.spelling: definition.enum_value
for definition in direct_child.get_children()
},
}
-with open(join(root, 'lib', '_import_tidy_enum.pyx'), 'wt') as f:
+with open(join(root, "lib", "_import_tidy_enum.pyx"), "wt") as f:
print("# GENERATED FILE: all modifications will be overwritten.", file=f)
print(file=f)
print("cdef extern from 'tidyenum.h' nogil:", file=f)
print(file=f)
- print(" # Rationale: If Cython thinks the enums are T*, then it won't automagically convert to ints,", file=f)
+ print(
+ " # Rationale: If Cython thinks the enums are T*, then it won't automagically convert to ints,",
+ file=f,
+ )
print(" # so it's easier to find wrong type usage.", file=f)
print(file=f)
for enumname in sorted(enums):
- if enumname.lower().startswith('tidy'):
- print(" ctypedef struct __Enum__", enumname, file=f, sep='')
+ if enumname.lower().startswith("tidy"):
+ print(" ctypedef struct __Enum__", enumname, file=f, sep="")
print(file=f)
for enumname in sorted(enums):
- if enumname.lower().startswith('tidy'):
- print(" ctypedef __Enum__", enumname, " *", enumname, " ", repr(enumname), file=f, sep='')
+ if enumname.lower().startswith("tidy"):
+ print(
+ " ctypedef __Enum__",
+ enumname,
+ " *",
+ enumname,
+ " ",
+ repr(enumname),
+ file=f,
+ sep="",
+ )
for enumname, definition in sorted(enums.items()):
- if enumname.lower().startswith('tidy'):
+ if enumname.lower().startswith("tidy"):
print(file=f)
- for valuename in sorted(definition['values']):
- print(" const ", enumname, " ", valuename, file=f, sep='')
+ for valuename in sorted(definition["values"]):
+ print(" const ", enumname, " ", valuename, file=f, sep="")
clsnames = {
- 'AttrId': ('TidyAttr_', '', 'TidyAttrId'),
- 'AttrSortStrategy': ('TidySortAttr', '', 'TidyAttrSortStrategy'),
- 'ConfigCategory': ('Tidy', '', 'TidyConfigCategory'),
- 'DoctypeModes': ('TidyDoctype', '', 'TidyDoctypeModes'),
- 'DupAttrModes': ('Tidy', '', 'TidyDupAttrModes'),
- 'EncodingOptions': ('TidyEnc', '', 'TidyEncodingOptions'),
- 'FormatParameterType': ('tidyFormatType_', '', 'TidyFormatParameterType'),
- 'LineEnding': ('Tidy', '', 'TidyLineEnding'),
- 'NodeType': ('TidyNode_', '', 'TidyNodeType'),
- 'OptionId': ('Tidy', '', 'TidyOptionId'),
- 'OptionType': ('Tidy', '', 'TidyOptionType'),
- 'ReportLevel': ('Tidy', '', 'TidyReportLevel'),
- 'Strings': ('', '', 'tidyStrings'),
- 'TagId': ('TidyTag_', '', 'TidyTagId'),
- 'TriState': ('Tidy', 'State', 'TidyTriState'),
- 'Uppercase': ('TidyUppercase', '', 'TidyUppercase'),
- 'UseCustomTagsState': ('TidyCustom', '', 'TidyUseCustomTagsState'),
+ "AttrId": ("TidyAttr_", "", "TidyAttrId"),
+ "AttrSortStrategy": ("TidySortAttr", "", "TidyAttrSortStrategy"),
+ "ConfigCategory": ("Tidy", "", "TidyConfigCategory"),
+ "DoctypeModes": ("TidyDoctype", "", "TidyDoctypeModes"),
+ "DupAttrModes": ("Tidy", "", "TidyDupAttrModes"),
+ "EncodingOptions": ("TidyEnc", "", "TidyEncodingOptions"),
+ "FormatParameterType": ("tidyFormatType_", "", "TidyFormatParameterType"),
+ "LineEnding": ("Tidy", "", "TidyLineEnding"),
+ "NodeType": ("TidyNode_", "", "TidyNodeType"),
+ "OptionId": ("Tidy", "", "TidyOptionId"),
+ "OptionType": ("Tidy", "", "TidyOptionType"),
+ "ReportLevel": ("Tidy", "", "TidyReportLevel"),
+ "Strings": ("", "", "tidyStrings"),
+ "TagId": ("TidyTag_", "", "TidyTagId"),
+ "TriState": ("Tidy", "State", "TidyTriState"),
+ "Uppercase": ("TidyUppercase", "", "TidyUppercase"),
+ "UseCustomTagsState": ("TidyCustom", "", "TidyUseCustomTagsState"),
}
-with open(join(root, 'lib', '_tidy_enum.pyx'), 'wt') as f:
+with open(join(root, "lib", "_tidy_enum.pyx"), "wt") as f:
print("# GENERATED FILE: all modifications will be overwritten.", file=f)
print(file=f)
for clsname, (prefix, suffix, enumname) in sorted(clsnames.items()):
- print("cdef object _", clsname, file=f, sep='')
+ print("cdef object _", clsname, file=f, sep="")
print(file=f)
for clsname, (prefix, suffix, enumname) in sorted(clsnames.items()):
- print("global ", clsname, file=f, sep='')
+ print("global ", clsname, file=f, sep="")
print(file=f)
for clsname in sorted(clsnames):
print(file=f)
- print("cdef object ", underscore(clsname), "_for_name(name):", file=f, sep='')
- print(" return _generic_id_for_name(_", clsname, ", name)", file=f, sep='')
+ print("cdef object ", underscore(clsname), "_for_name(name):", file=f, sep="")
+ print(" return _generic_id_for_name(_", clsname, ", name)", file=f, sep="")
for clsname, (prefix, suffix, enumname) in sorted(clsnames.items()):
definition = enums[enumname]
print(file=f)
print(file=f)
- print("_", clsname, " = IntEnum(", repr(clsname), ", {", file=f, sep='')
- for valuename in sorted(definition['values']):
+ print("_", clsname, " = IntEnum(", repr(clsname), ", {", file=f, sep="")
+ for valuename in sorted(definition["values"]):
pretty_name = valuename
if prefix and pretty_name.startswith(prefix):
- pretty_name = pretty_name[len(prefix):]
+ pretty_name = pretty_name[len(prefix) :]
if suffix and pretty_name.endswith(suffix):
- pretty_name = pretty_name[:-len(suffix)]
+ pretty_name = pretty_name[: -len(suffix)]
pretty_name = underscore(pretty_name)
- if pretty_name.startswith('n_tidy_'):
+ if pretty_name.startswith("n_tidy_"):
continue
if iskeyword(pretty_name):
- pretty_name = pretty_name + '_'
-
- print(" ", repr(pretty_name), ": <", definition['type'], "> ", valuename, ",", file=f, sep='')
+ pretty_name = pretty_name + "_"
+
+ print(
+ " ",
+ repr(pretty_name),
+ ": <",
+ definition["type"],
+ "> ",
+ valuename,
+ ",",
+ file=f,
+ sep="",
+ )
print("})", file=f)
print(file=f)
- print("_", clsname, ".for_name = ", underscore(clsname), "_for_name", file=f, sep='')
+ print(
+ "_",
+ clsname,
+ ".for_name = ",
+ underscore(clsname),
+ "_for_name",
+ file=f,
+ sep="",
+ )
print(file=f)
- print(clsname, " = _", clsname, file=f, sep='')
+ print(clsname, " = _", clsname, file=f, sep="")
diff --git a/lib/VERSION.txt b/lib/VERSION.txt
index dc5c43b..693318c 100644
--- a/lib/VERSION.txt
+++ b/lib/VERSION.txt
@@ -1 +1 @@
-"2021.8.7"
+"2024.10.24"
diff --git a/lib/_allocator.pyx b/lib/_allocator.pyx
index 44404f8..f2d4951 100644
--- a/lib/_allocator.pyx
+++ b/lib/_allocator.pyx
@@ -1,8 +1,8 @@
-cdef void *allocator_realloc_raw(TidyAllocator *self, void *block, size_t nBytes) nogil:
+cdef void *allocator_realloc_raw(TidyAllocator *self, void *block, size_t nBytes) noexcept nogil:
return PyMem_RawRealloc(block, nBytes)
-cdef void *allocator_realloc_gil(TidyAllocator *self, void *block, size_t nBytes) nogil:
+cdef void *allocator_realloc_gil(TidyAllocator *self, void *block, size_t nBytes) noexcept nogil:
cdef void *result
with gil:
result = ObjectRealloc(block, nBytes)
@@ -12,23 +12,23 @@ cdef void *allocator_realloc_gil(TidyAllocator *self, void *block, size_t nBytes
return result
-cdef void *allocator_alloc_raw(TidyAllocator *self, size_t nBytes) nogil:
+cdef void *allocator_alloc_raw(TidyAllocator *self, size_t nBytes) noexcept nogil:
return allocator_realloc_raw(self, NULL, nBytes)
-cdef void *allocator_alloc_gil(TidyAllocator *self, size_t nBytes) nogil:
+cdef void *allocator_alloc_gil(TidyAllocator *self, size_t nBytes) noexcept nogil:
return allocator_realloc_gil(self, NULL, nBytes)
-cdef void allocator_free_raw(TidyAllocator *self, void *block) nogil:
+cdef void allocator_free_raw(TidyAllocator *self, void *block) noexcept nogil:
allocator_realloc_raw(self, block, 0)
-cdef void allocator_free_gil(TidyAllocator *self, void *block) nogil:
+cdef void allocator_free_gil(TidyAllocator *self, void *block) noexcept nogil:
allocator_realloc_gil(self, block, 0)
-cdef void allocator_panic(TidyAllocator *self, ctmbstr msg) nogil:
+cdef void allocator_panic(TidyAllocator *self, ctmbstr msg) noexcept nogil:
with gil:
raise SystemError(unicode(msg, 'UTF-8', 'replace'))
diff --git a/lib/_enum_by_name.pyx b/lib/_enum_by_name.pyx
index 380fcbf..84531a7 100644
--- a/lib/_enum_by_name.pyx
+++ b/lib/_enum_by_name.pyx
@@ -1,6 +1,6 @@
cdef cppclass _GetEnum [T, V]:
@staticmethod
- inline object _do(T elem, V fn(T) nogil, EnumCls):
+ inline object _do(T elem, V fn(T) noexcept nogil, EnumCls):
cdef V value
cdef object result = None
@@ -19,7 +19,7 @@ cdef cppclass _EnsureId [V]:
@staticmethod
inline Py_ssize_t _do(
object name,
- V fn(ctmbstr) nogil,
+ V fn(ctmbstr) noexcept nogil,
Py_ssize_t min_excl,
Py_ssize_t max_excl,
) except -2:
diff --git a/lib/_import_python.pyx b/lib/_import_python.pyx
index 0cf83e6..1f4ba34 100644
--- a/lib/_import_python.pyx
+++ b/lib/_import_python.pyx
@@ -32,13 +32,13 @@ cdef extern from 'Python.h':
PyUnicode_4BYTE_KIND
int PyUnicode_READY(object o) except -1
- Py_ssize_t PyUnicode_GET_LENGTH(object o) nogil
- int PyUnicode_KIND(object o) nogil
- boolean PyUnicode_IS_ASCII(object) nogil
- Py_UCS1 *PyUnicode_1BYTE_DATA(object o) nogil
- Py_UCS2 *PyUnicode_2BYTE_DATA(object o) nogil
- Py_UCS4 *PyUnicode_4BYTE_DATA(object o) nogil
- void *PyUnicode_DATA(object o) nogil
+ Py_ssize_t PyUnicode_GET_LENGTH(object o) noexcept nogil
+ int PyUnicode_KIND(object o) noexcept nogil
+ boolean PyUnicode_IS_ASCII(object) noexcept nogil
+ Py_UCS1 *PyUnicode_1BYTE_DATA(object o) noexcept nogil
+ Py_UCS2 *PyUnicode_2BYTE_DATA(object o) noexcept nogil
+ Py_UCS4 *PyUnicode_4BYTE_DATA(object o) noexcept nogil
+ void *PyUnicode_DATA(object o) noexcept nogil
int PyDict_SetItemString(object p, const char *key, object val) except -1
@@ -50,8 +50,8 @@ cdef extern from 'Python.h':
object PyMemoryView_FromObject(object)
object PyByteArray_FromStringAndSize(const char*, Py_ssize_t)
- char *PyByteArray_AS_STRING(object) nogil
- Py_ssize_t PyByteArray_GET_SIZE(object) nogil
+ char *PyByteArray_AS_STRING(object) noexcept nogil
+ Py_ssize_t PyByteArray_GET_SIZE(object) noexcept nogil
ctypedef signed long Py_hash
ctypedef signed short wchar_t
@@ -71,14 +71,12 @@ cdef extern from 'Python.h':
ctypedef struct PyASCIIObject:
Py_ssize_t length
Py_hash hash
- wchar_t *wstr
__ascii_object_state state
ctypedef struct PyCompactUnicodeObject:
# PyASCIIObject
Py_ssize_t utf8_length
char *utf8
- Py_ssize_t wstr_length
ctypedef struct PyVarObject:
pass
@@ -99,7 +97,7 @@ cdef extern from 'Python.h':
boolean RaiseIfErrOccurred 'PyErr_Occurred'() except True
AlwaysTrue RaiseErrOccurred 'PyErr_Occurred'() except True
- void *PyMem_RawRealloc(void *p, size_t n) nogil
+ void *PyMem_RawRealloc(void *p, size_t n) noexcept nogil
PyObject *Py_None
diff --git a/lib/_import_tidy.pyx b/lib/_import_tidy.pyx
index dfb4129..6093ec6 100644
--- a/lib/_import_tidy.pyx
+++ b/lib/_import_tidy.pyx
@@ -1,10 +1,10 @@
cdef extern from 'tidy.h' nogil:
ctypedef struct TidyAllocator
- ctypedef void *__allocator_alloc(TidyAllocator *self, size_t nBytes)
- ctypedef void *__allocator_realloc(TidyAllocator *self, void *block, size_t nBytes)
- ctypedef void __allocator_free(TidyAllocator *self, void *block)
- ctypedef void __allocator_panic(TidyAllocator *self, ctmbstr msg)
+ ctypedef void *__allocator_alloc(TidyAllocator *self, size_t nBytes) noexcept
+ ctypedef void *__allocator_realloc(TidyAllocator *self, void *block, size_t nBytes) noexcept
+ ctypedef void __allocator_free(TidyAllocator *self, void *block) noexcept
+ ctypedef void __allocator_panic(TidyAllocator *self, ctmbstr msg) noexcept
ctypedef struct TidyAllocatorVtbl:
__allocator_alloc alloc
@@ -15,11 +15,11 @@ cdef extern from 'tidy.h' nogil:
ctypedef struct TidyAllocator:
const TidyAllocatorVtbl *vtbl
- ctypedef int TidyGetByteFunc(void *sourceData)
- ctypedef void TidyUngetByteFunc(void *sourceData, byte bt)
- ctypedef Bool TidyEOFFunc(void *sourceData)
+ ctypedef int TidyGetByteFunc(void *sourceData) noexcept
+ ctypedef void TidyUngetByteFunc(void *sourceData, byte bt) noexcept
+ ctypedef Bool TidyEOFFunc(void *sourceData) noexcept
- ctypedef void TidyPutByteFunc(void *sinkData, byte bt)
+ ctypedef void TidyPutByteFunc(void *sinkData, byte bt) noexcept
ctypedef struct TidyInputSource:
void *sourceData
@@ -48,14 +48,14 @@ cdef extern from 'tidy.h' nogil:
ctypedef __TidyMessage *TidyMessage
ctypedef __TidyMessageArgument *TidyMessageArgument
- ctmbstr tidyReleaseDate()
- ctmbstr tidyLibraryVersion()
- ctmbstr tidyPlatform()
+ ctmbstr tidyReleaseDate() noexcept
+ ctmbstr tidyLibraryVersion() noexcept
+ ctmbstr tidyPlatform() noexcept
- Bool tidySetLanguage(ctmbstr languageCode)
- ctmbstr tidyGetLanguage()
+ Bool tidySetLanguage(ctmbstr languageCode) noexcept
+ ctmbstr tidyGetLanguage() noexcept
- ctypedef Bool TidyMessageCallback(TidyMessage tmessage)
+ ctypedef Bool TidyMessageCallback(TidyMessage tmessage) noexcept
# Document
diff --git a/lib/_imports.pyx b/lib/_imports.pyx
index 0864d33..b21bb64 100644
--- a/lib/_imports.pyx
+++ b/lib/_imports.pyx
@@ -72,6 +72,9 @@ cdef extern from 'native.hpp' namespace 'PyTidyHtml5' nogil:
Out _reinterpret_cast[Out, In](In ptr)
int call_bool_fn_1(object fn, object arg1)
void reset_hash[T](T *obj)
+ void reset_wstr[T](T *obj)
+ void reset_wstr_length[T](T *obj)
+ void set_ready[T](T *obj)
ctypedef boolean AlwaysTrue
diff --git a/lib/_input_source.pyx b/lib/_input_source.pyx
index e50eb13..81786b9 100644
--- a/lib/_input_source.pyx
+++ b/lib/_input_source.pyx
@@ -90,25 +90,25 @@ cdef class FiledescriptorSource(InputSource):
self.close()
@staticmethod
- cdef int _get_byte(void *sourceData) nogil:
+ cdef int _get_byte(void *sourceData) noexcept nogil:
return ( sourceData).get_byte()
@staticmethod
- cdef void _unget_byte(void *sourceData, byte bt) nogil:
+ cdef void _unget_byte(void *sourceData, byte bt) noexcept nogil:
( sourceData).unget_byte(bt)
@staticmethod
- cdef Bool _eof(void *sourceData) nogil:
+ cdef Bool _eof(void *sourceData) noexcept nogil:
return ( sourceData).eof()
- cdef void unget_byte(FiledescriptorSource self, byte bt) nogil:
+ cdef void unget_byte(FiledescriptorSource self, byte bt) noexcept nogil:
cdef Py_ssize_t index
self.pushback_remaining += 1
index = self.pushback_length - self.pushback_remaining
PyByteArray_AS_STRING(self.buffer)[index] = bt
- cdef int get_byte(FiledescriptorSource self) nogil:
+ cdef int get_byte(FiledescriptorSource self) noexcept nogil:
cdef uint8_t result
cdef Py_ssize_t index
cdef Py_ssize_t pushback_remaining = self.pushback_remaining
@@ -124,7 +124,7 @@ cdef class FiledescriptorSource(InputSource):
return result
- cdef Bool eof(FiledescriptorSource self) nogil:
+ cdef Bool eof(FiledescriptorSource self) noexcept nogil:
cdef ssize_t count
if self.pushback_remaining > 0:
diff --git a/lib/_input_ucs.pyx b/lib/_input_ucs.pyx
index e96aa33..d16c872 100644
--- a/lib/_input_ucs.pyx
+++ b/lib/_input_ucs.pyx
@@ -7,7 +7,7 @@ ctypedef fused Codepoint:
uint32_t
-cdef inline int encode_utf8(SourceData *source_data, Codepoint v) nogil:
+cdef inline int encode_utf8(SourceData *source_data, Codepoint v) noexcept nogil:
cdef int32_t codepoint = v
if (Codepoint is ascii_char) or (codepoint < 0x80):
@@ -36,7 +36,7 @@ cdef inline int encode_utf8(SourceData *source_data, Codepoint v) nogil:
return b' '
-cdef inline int ucsGetByteFunc(SourceData *source_data, Codepoint *data) nogil:
+cdef inline int ucsGetByteFunc(SourceData *source_data, Codepoint *data) noexcept nogil:
cdef int result
result = source_data.pushback
@@ -55,22 +55,22 @@ cdef inline int ucsGetByteFunc(SourceData *source_data, Codepoint *data) nogil:
return result
-cdef int asciiGetByteFunc(void *sourceData_) nogil:
+cdef int asciiGetByteFunc(void *sourceData_) noexcept nogil:
cdef SourceData *source_data = sourceData_
return ucsGetByteFunc(source_data, source_data.string)
-cdef int ucs1GetByteFunc(void *sourceData_) nogil:
+cdef int ucs1GetByteFunc(void *sourceData_) noexcept nogil:
cdef SourceData *source_data = sourceData_
return ucsGetByteFunc(source_data, source_data.string)
-cdef int ucs2GetByteFunc(void *sourceData_) nogil:
+cdef int ucs2GetByteFunc(void *sourceData_) noexcept nogil:
cdef SourceData *source_data = sourceData_
return ucsGetByteFunc(source_data, source_data.string)
-cdef int ucs4GetByteFunc(void *sourceData_) nogil:
+cdef int ucs4GetByteFunc(void *sourceData_) noexcept nogil:
cdef SourceData *source_data = sourceData_
return ucsGetByteFunc(source_data, source_data.string)
@@ -84,12 +84,12 @@ ctypedef struct SourceData:
UChar3 sub_string
-cdef void ungetByteFunc(void *sourceData_, byte bt) nogil:
+cdef void ungetByteFunc(void *sourceData_, byte bt) noexcept nogil:
cdef SourceData *source_data = sourceData_
source_data.pushback = bt
-cdef Bool eofFunc(void *sourceData_) nogil:
+cdef Bool eofFunc(void *sourceData_) noexcept nogil:
cdef SourceData *source_data = sourceData_
if source_data.remaining != 0:
return no
diff --git a/lib/_output_buffer.pyx b/lib/_output_buffer.pyx
index a19b4a2..72b7bb2 100644
--- a/lib/_output_buffer.pyx
+++ b/lib/_output_buffer.pyx
@@ -200,17 +200,17 @@ cdef class StringBuffer(Buffer):
( result).length = length
reset_hash( result)
- ( result).wstr = NULL
+ reset_wstr( result)
( result).state.interned = SSTATE_NOT_INTERNED
( result).state.kind = PyUnicode_1BYTE_KIND
( result).state.compact = True
- ( result).state.ready = True
+ set_ready( result)
( result).state.ascii = is_ascii
if not is_ascii:
( result).utf8_length = 0
( result).utf8 = NULL
- ( result).wstr_length = 0
+ reset_wstr_length( result)
self.tidy_buffer.allocator = NULL
self.tidy_buffer.bp = NULL
diff --git a/lib/_output_sink.pyx b/lib/_output_sink.pyx
index df30181..245939e 100644
--- a/lib/_output_sink.pyx
+++ b/lib/_output_sink.pyx
@@ -89,17 +89,17 @@ cdef class CallbackSink(OutputSink):
raise
@staticmethod
- cdef void put_byte_integer(void *sinkData, byte bt) nogil:
+ cdef void put_byte_integer(void *sinkData, byte bt) noexcept nogil:
with gil:
( sinkData)._put_byte_integer(bt)
@staticmethod
- cdef void put_byte_bytes(void *sinkData, byte bt) nogil:
+ cdef void put_byte_bytes(void *sinkData, byte bt) noexcept nogil:
with gil:
( sinkData)._put_byte_bytes(bt)
@staticmethod
- cdef void put_byte_latin1(void *sinkData, byte bt) nogil:
+ cdef void put_byte_latin1(void *sinkData, byte bt) noexcept nogil:
with gil:
( sinkData)._put_byte_latin(bt)
@@ -168,7 +168,7 @@ cdef class FiledescriptorSink(OutputSink):
if result < 0:
RaiseErrOccurred()
- cdef ssize_t _flush(FiledescriptorSink self) nogil:
+ cdef ssize_t _flush(FiledescriptorSink self) noexcept nogil:
cdef ssize_t result
cdef Py_ssize_t filled = self.filled
cdef int fd = self.fd
@@ -193,7 +193,7 @@ cdef class FiledescriptorSink(OutputSink):
return 0
@staticmethod
- cdef void put_byte(void *sinkData, byte bt) nogil:
+ cdef void put_byte(void *sinkData, byte bt) noexcept nogil:
cdef Py_ssize_t *empty = &( sinkData).empty
cdef Py_ssize_t *filled = &( sinkData).filled
cdef char *buf = PyByteArray_AS_STRING(( sinkData).buffer)
@@ -234,5 +234,5 @@ cdef class VoidSink(OutputSink):
pass
@staticmethod
- cdef void put_byte(void *sinkData, byte bt) nogil:
+ cdef void put_byte(void *sinkData, byte bt) noexcept nogil:
pass
diff --git a/lib/_tidy_attr.pyx b/lib/_tidy_attr.pyx
index a31361b..1ccb445 100644
--- a/lib/_tidy_attr.pyx
+++ b/lib/_tidy_attr.pyx
@@ -28,7 +28,7 @@ cdef class Attr:
'An Attr is truthy if the attribute exists and the Node did not expire.'
)
- cdef inline boolean _nonzero(Attr self) nogil:
+ cdef inline boolean _nonzero(Attr self) noexcept nogil:
if self is None:
return False
elif self.tidy_attr is NULL:
diff --git a/lib/_tidy_document.pyx b/lib/_tidy_document.pyx
index 1304031..7f9a146 100644
--- a/lib/_tidy_document.pyx
+++ b/lib/_tidy_document.pyx
@@ -38,7 +38,7 @@ cdef class DocumentIterOptions:
self.tidy_iterator = tidy_iterator
self.document = document
- cdef boolean _nonzero__(DocumentIterOptions self) nogil:
+ cdef boolean _nonzero__(DocumentIterOptions self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is NULL:
@@ -95,7 +95,7 @@ cdef class DocumentIterOptionIds:
self.tidy_iterator = tidy_iterator
self.document = document
- cdef inline boolean _nonzero(DocumentIterOptionIds self) nogil:
+ cdef inline boolean _nonzero(DocumentIterOptionIds self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is NULL:
@@ -155,7 +155,7 @@ cdef class DocumentIterDeclTags:
self.document = document
self.option_id = option_id
- cdef inline boolean _nonzero(DocumentIterDeclTags self) nogil:
+ cdef inline boolean _nonzero(DocumentIterDeclTags self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is NULL:
@@ -201,7 +201,7 @@ cdef class DocumentOptionsProxy:
def __cinit__(DocumentOptionsProxy self, Document document):
self.document = document
- cdef inline boolean _nonzero(DocumentOptionsProxy self) nogil:
+ cdef inline boolean _nonzero(DocumentOptionsProxy self) noexcept nogil:
if self is None:
return False
else:
@@ -263,7 +263,7 @@ cdef class DocumentOptionsProxy:
raise KeyError
-cdef document_iter_ctmbstr_init(Document document, TidyIterator *out_tidy_iterator, TidyIterator fn(TidyDoc) nogil):
+cdef document_iter_ctmbstr_init(Document document, TidyIterator *out_tidy_iterator, TidyIterator fn(TidyDoc) noexcept nogil):
cdef TidyDoc tidy_doc
cdef TidyIterator tidy_iterator
@@ -287,7 +287,7 @@ cdef _result_to_outcome(int result):
return parse_outcome
-cdef document_iter_ctmbstr_next(PyObject **document, TidyIterator *tidy_iterator, ctmbstr fn(TidyDoc, TidyIterator*) nogil):
+cdef document_iter_ctmbstr_next(PyObject **document, TidyIterator *tidy_iterator, ctmbstr fn(TidyDoc, TidyIterator*) noexcept nogil):
cdef TidyDoc tidy_doc
cdef TidyOption tidy_option
cdef ctmbstr text
@@ -321,7 +321,7 @@ cdef class DocumentIterPriorityAttrs:
def __cinit__(DocumentIterPriorityAttrs self, Document document):
self.document = document_iter_ctmbstr_init(document, &self.tidy_iterator, tidyOptGetPriorityAttrList)
- cdef inline boolean _nonzero(DocumentIterPriorityAttrs self) nogil:
+ cdef inline boolean _nonzero(DocumentIterPriorityAttrs self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is not NULL:
@@ -353,7 +353,7 @@ cdef class DocumentIterMutedMessages:
def __cinit__(DocumentIterMutedMessages self, Document document):
self.document = document_iter_ctmbstr_init(document, &self.tidy_iterator, tidyOptGetMutedMessageList)
- cdef inline boolean _nonzero(DocumentIterMutedMessages self) nogil:
+ cdef inline boolean _nonzero(DocumentIterMutedMessages self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is NULL:
@@ -416,7 +416,7 @@ cdef class Document:
if tidy_doc is not NULL:
tidyRelease(tidy_doc)
- cdef inline boolean _nonzero(Document self) nogil:
+ cdef inline boolean _nonzero(Document self) noexcept nogil:
if self is None:
return False
else:
@@ -444,7 +444,7 @@ cdef class Document:
else:
return (self.tidy_doc is NULL) or (self.tidy_doc is not ( other).tidy_doc)
- cdef object __get(Document self, TidyNode fn(TidyDoc) nogil):
+ cdef object __get(Document self, TidyNode fn(TidyDoc) noexcept nogil):
cdef Node result
cdef TidyDoc tidy_doc = self.tidy_doc
cdef TidyNode tidy_node = NULL
@@ -803,7 +803,7 @@ cdef class Document:
)
cdef TidyInputSource input_source
cdef int input_kind
- cdef int (*get_fun)(void *sourceData_) nogil
+ cdef int (*get_fun)(void *sourceData_) noexcept nogil
cdef TidyDoc tidy_doc = self.tidy_doc
if tidy_doc is NULL:
@@ -958,7 +958,7 @@ cdef class Document:
self._set_message_callback(value)
@staticmethod
- cdef Bool message_callback_nogil(TidyMessage tidy_message) nogil:
+ cdef Bool message_callback_nogil(TidyMessage tidy_message) noexcept nogil:
cdef TidyDoc tidy_doc = tidyGetMessageDoc(tidy_message)
cdef void *app_data
diff --git a/lib/_tidy_message.pyx b/lib/_tidy_message.pyx
index cbfb009..39bb2b2 100644
--- a/lib/_tidy_message.pyx
+++ b/lib/_tidy_message.pyx
@@ -29,7 +29,7 @@ cdef class Message:
'''
return self.document
- cdef inline boolean _nonzero(Message self) nogil:
+ cdef inline boolean _nonzero(Message self) noexcept nogil:
if self is None:
return False
elif self.tidy_message is NULL:
@@ -322,7 +322,7 @@ cdef class MessageArg:
def __init__(MessageArg self, Message message):
self.message = message
- cdef inline boolean _nonzero(MessageArg self) nogil:
+ cdef inline boolean _nonzero(MessageArg self) noexcept nogil:
if self is None:
return False
elif self.tidy_arg is NULL:
@@ -462,7 +462,7 @@ cdef class MessageIterArgs:
self.tidy_iterator = tidy_iterator
self.message = message
- cdef inline boolean _nonzero(self) nogil:
+ cdef inline boolean _nonzero(self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is NULL:
@@ -519,7 +519,7 @@ cdef class MessageIterValues:
self.tidy_iterator = tidy_iterator
self.message = message
- cdef inline boolean _nonzero(self) nogil:
+ cdef inline boolean _nonzero(self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is NULL:
diff --git a/lib/_tidy_node.pyx b/lib/_tidy_node.pyx
index 3f0c96a..58f3658 100644
--- a/lib/_tidy_node.pyx
+++ b/lib/_tidy_node.pyx
@@ -8,7 +8,7 @@ cdef class NodeIterChildren:
if parent is not None:
self.node = parent.get_child()
- cdef inline boolean _nonzero(NodeIterChildren self) nogil:
+ cdef inline boolean _nonzero(NodeIterChildren self) noexcept nogil:
if self is None:
return False
else:
@@ -38,7 +38,7 @@ cdef class NodeIterAttributes:
if parent is not None:
self.attr = parent.get_attr_first()
- cdef inline boolean _nonzero(NodeIterAttributes self) nogil:
+ cdef inline boolean _nonzero(NodeIterAttributes self) noexcept nogil:
if self is None:
return False
else:
@@ -68,7 +68,7 @@ cdef class NodeIterAttributeIds:
if parent is not None:
self.attr = parent.get_attr_first()
- cdef inline boolean _nonzero(NodeIterAttributeIds self) nogil:
+ cdef inline boolean _nonzero(NodeIterAttributeIds self) noexcept nogil:
if self is None:
return False
else:
@@ -105,7 +105,7 @@ cdef class NodeAttrProxy:
def __cinit__(NodeAttrProxy self, Node node):
self.node = node
- cdef inline boolean _nonzero(NodeAttrProxy self) nogil:
+ cdef inline boolean _nonzero(NodeAttrProxy self) noexcept nogil:
if self is None:
return False
else:
@@ -176,7 +176,7 @@ cdef class Node:
'and the document has was not been released in the meantime.'
)
- cdef inline boolean _nonzero(Node self) nogil:
+ cdef inline boolean _nonzero(Node self) noexcept nogil:
if self is None:
return False
elif self.tidy_node is NULL:
@@ -216,7 +216,7 @@ cdef class Node:
else:
return (self.tidy_node is NULL) or (self.tidy_node is not ( other).tidy_node)
- cdef object __get_node(Node self, TidyNode fn(TidyNode) nogil):
+ cdef object __get_node(Node self, TidyNode fn(TidyNode) noexcept nogil):
cdef Node result
cdef TidyNode tidy_node = self.tidy_node
diff --git a/lib/_tidy_options.pyx b/lib/_tidy_options.pyx
index 0b72e11..ac8416d 100644
--- a/lib/_tidy_options.pyx
+++ b/lib/_tidy_options.pyx
@@ -19,7 +19,7 @@ cdef class OptionPicklist:
self.tidy_iterator = tidy_iterator
self.option = option
- cdef inline boolean _nonzero(OptionPicklist self) nogil:
+ cdef inline boolean _nonzero(OptionPicklist self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is NULL:
@@ -80,7 +80,7 @@ cdef class OptionIterLinkedOptions:
self.tidy_iterator = tidy_iterator
self.document = document
- cdef inline boolean _nonzero(OptionIterLinkedOptions self) nogil:
+ cdef inline boolean _nonzero(OptionIterLinkedOptions self) noexcept nogil:
if self is None:
return False
elif self.tidy_iterator is NULL:
@@ -139,7 +139,7 @@ cdef class Option:
'and the document has was not been released in the meantime.'
)
- cdef inline boolean _nonzero(Option self) nogil:
+ cdef inline boolean _nonzero(Option self) noexcept nogil:
if self is None:
return False
elif self.tidy_option is NULL:
diff --git a/lib/native.hpp b/lib/native.hpp
index 444261f..6a4f8d2 100644
--- a/lib/native.hpp
+++ b/lib/native.hpp
@@ -1,6 +1,5 @@
#pragma once
-#include
#include
#include
#include "Python.h"
@@ -19,9 +18,9 @@ const char LONGDESCRIPTION[] =
const std::size_t VERSION_LENGTH = sizeof(VERSION) - 1;
const std::size_t LONGDESCRIPTION_LENGTH = sizeof(LONGDESCRIPTION) - 1;
-using UChar3 = std::array;
+using UChar3 = unsigned char[3];
-constexpr const UChar3 utf8_bom{{0xBFu, 0xBBu, 0xEFu}};
+constexpr const UChar3 utf8_bom{0xBFu, 0xBBu, 0xEFu};
template
ctmbstr _text_fn(Type elem, ctmbstr fn(Type, ArgsFn...), ArgsIn &&...args) {
@@ -133,16 +132,23 @@ int call_bool_fn_1(PyObject *fn, PyObject *arg1) {
}
+template
+struct VoidT_ {
+ using Value = void*;
+};
+
+// hash | ob_shash
+
template
struct has_ob_shash {
- template static std::uint8_t test(decltype(&C::ob_shash)) ;
+ template static std::uint8_t test(typename VoidT_().ob_shash, true))>::Value);
template static std::uint64_t test(...);
enum { value = sizeof(test(0)) == sizeof(std::uint8_t) };
};
template
struct has_hash {
- template static std::uint8_t test(decltype(&C::hash)) ;
+ template static std::uint8_t test(typename VoidT_().hash, true))>::Value);
template static std::uint64_t test(...);
enum { value = sizeof(test(0)) == sizeof(std::uint8_t) };
};
@@ -176,5 +182,98 @@ static inline void reset_hash(T *obj) {
ResetHash_::reset(obj);
}
+// wstr
+
+template
+struct has_wstr {
+ template static std::uint8_t test(typename VoidT_().wstr, true))>::Value);
+ template static std::uint64_t test(...);
+ enum { value = sizeof(test(0)) == sizeof(std::uint8_t) };
+};
+
+template::value>
+struct ResetWstr_;
+
+template
+struct ResetWstr_ {
+ static inline void reset(T *obj) {
+ obj->wstr = nullptr; // CPython >= 3.12: absent
+ }
+};
+
+template
+struct ResetWstr_ {
+ static inline void reset(T *) {
+ (void) 0;
+ }
+};
+
+template
+static inline void reset_wstr(T *obj) {
+ ResetWstr_::reset(obj);
+}
+
+// ready
+
+template
+struct has_ready {
+ template static std::uint8_t test(typename VoidT_().state.ready, true))>::Value);
+ template static std::uint64_t test(...);
+ enum { value = sizeof(test(0)) == sizeof(std::uint8_t) };
+};
+
+template::value>
+struct SetReady_;
+
+template
+struct SetReady_ {
+ static inline void set(T *obj) {
+ obj->state.ready = true; // CPython >= 3.12: absent
+ }
+};
+
+template
+struct SetReady_ {
+ static inline void set(T *) {
+ (void) 0;
+ }
+};
+
+template
+static inline void set_ready(T *obj) {
+ SetReady_::set(obj);
+}
+
+// wstr_length
+
+template
+struct has_wstr_length {
+ template static std::uint8_t test(typename VoidT_().wstr_length, true))>::Value);
+ template static std::uint64_t test(...);
+ enum { value = sizeof(test(0)) == sizeof(std::uint8_t) };
+};
+
+template::value>
+struct ResetWstrLength_;
+
+template
+struct ResetWstrLength_ {
+ static inline void reset(T *obj) {
+ obj->wstr_length = nullptr; // CPython >= 3.12: absent
+ }
+};
+
+template
+struct ResetWstrLength_ {
+ static inline void reset(T *) {
+ (void) 0;
+ }
+};
+
+template
+static inline void reset_wstr_length(T *obj) {
+ ResetWstrLength_::reset(obj);
+}
+
}
}
diff --git a/lib/pytidyhtml5/__init__.py b/lib/pytidyhtml5/__init__.py
new file mode 100644
index 0000000..f86c519
--- /dev/null
+++ b/lib/pytidyhtml5/__init__.py
@@ -0,0 +1,72 @@
+# Copyright 2019-2024 René Kijewski
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import _pytidyhtml5
+
+from _pytidyhtml5 import *
+from ._doc import BUILDING_DOC
+
+
+__all__ = _pytidyhtml5.__all__
+__version__ = _pytidyhtml5.__version__
+
+
+if BUILDING_DOC:
+ from enum import IntEnum
+
+ def _omit_signature(doc):
+ if doc:
+ return "\n\n".join(doc.split("\n\n", 1)[1:]).strip()
+
+ for name in __all__:
+ obj = getattr(_pytidyhtml5, name)
+ if isinstance(obj, type):
+ if issubclass(obj, IntEnum):
+ src = [f"class {name}(IntEnum):"]
+ src.extend(f" {__elem.name} = {int(__elem)}" for __elem in obj)
+ src.extend(
+ (
+ " __slots__ = ()",
+ " __name__ = __qualname__ = obj.__name__",
+ " __doc__ = obj.__doc__",
+ )
+ )
+ eval(compile("\n".join(src), __file__, "exec"))
+ else:
+ src = [f"class {name}:"]
+ src.extend(
+ f" {__name} = obj.{__name}"
+ for __name in vars(obj)
+ if __name not in ("__new__", "__pyx_vtable__")
+ )
+ src.extend(
+ f" {__name} = property(doc=_omit_signature(obj.{__name}.__doc__))"
+ for __name in vars(obj)
+ if isinstance(getattr(obj, __name), property)
+ )
+ src.extend(
+ (
+ " __slots__ = ()",
+ " __doc__ = obj.__doc__",
+ " __mro__ = obj.__mro__",
+ )
+ )
+ if hasattr(obj, "_non_zero_doc"):
+ src.extend(
+ (
+ " def __bool__(self): pass",
+ " __bool__.__doc__ = obj._non_zero_doc",
+ )
+ )
+ eval(compile("\n".join(src), __file__, "exec"))
diff --git a/lib/pytidyhtml5/_doc.py b/lib/pytidyhtml5/_doc.py
new file mode 100644
index 0000000..d0670b3
--- /dev/null
+++ b/lib/pytidyhtml5/_doc.py
@@ -0,0 +1,7 @@
+from os.path import basename, dirname
+from sys import argv
+
+
+BUILDING_DOC = (basename(argv[0]) in ("sphinx-build",)) or (
+ basename(dirname(argv[0])) in ("sphinx",)
+)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..4388bd9
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,6 @@
+[build-system]
+requires = [
+ "Cython == 3.*",
+ "setuptools",
+]
+build-backend = "setuptools.build_meta"
diff --git a/pytidyhtml5/__init__.py b/pytidyhtml5/__init__.py
deleted file mode 100644
index 86c150b..0000000
--- a/pytidyhtml5/__init__.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import _pytidyhtml5
-
-from _pytidyhtml5 import *
-from ._doc import BUILDING_DOC
-
-
-__all__ = _pytidyhtml5.__all__
-__version__ = _pytidyhtml5.__version__
-
-
-if BUILDING_DOC:
- from enum import IntEnum
-
- def _omit_signature(doc):
- if doc:
- return '\n\n'.join(doc.split('\n\n', 1)[1:]).strip()
-
- for name in __all__:
- obj = getattr(_pytidyhtml5, name)
- if isinstance(obj, type):
- if issubclass(obj, IntEnum):
- src = [f'class {name}(IntEnum):']
- src.extend(
- f' {__elem.name} = {int(__elem)}'
- for __elem in obj
- )
- src.extend((
- ' __slots__ = ()',
- ' __name__ = __qualname__ = obj.__name__',
- ' __doc__ = obj.__doc__',
- ))
- eval(compile('\n'.join(src), __file__, 'exec'))
- else:
- src = [f'class {name}:']
- src.extend(
- f' {__name} = obj.{__name}'
- for __name in vars(obj)
- if __name not in ('__new__', '__pyx_vtable__')
- )
- src.extend(
- f' {__name} = property(doc=_omit_signature(obj.{__name}.__doc__))'
- for __name in vars(obj)
- if isinstance(getattr(obj, __name), property)
- )
- src.extend((
- ' __slots__ = ()',
- ' __doc__ = obj.__doc__',
- ' __mro__ = obj.__mro__',
- ))
- if hasattr(obj, '_non_zero_doc'):
- src.extend((
- ' def __bool__(self): pass',
- ' __bool__.__doc__ = obj._non_zero_doc',
- ))
- eval(compile('\n'.join(src), __file__, 'exec'))
diff --git a/pytidyhtml5/_doc.py b/pytidyhtml5/_doc.py
deleted file mode 100644
index 5b7e56e..0000000
--- a/pytidyhtml5/_doc.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from os.path import basename, dirname
-from sys import argv
-
-
-BUILDING_DOC = (basename(argv[0]) in (
- 'sphinx-build',
-)) or (basename(dirname(argv[0])) in (
- 'sphinx',
-))
diff --git a/readthedocs.yml b/readthedocs.yml
index 22bef0e..052748d 100644
--- a/readthedocs.yml
+++ b/readthedocs.yml
@@ -5,10 +5,11 @@ version: 2
sphinx:
configuration: docs/conf.py
-formats:
- - pdf
+build:
+ os: ubuntu-24.04
+ tools:
+ python: "3.12"
python:
- version: "3.8"
install:
- - requirements: requirements.readthedocs.txt
+ - requirements: requirements-readthedocs.txt
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..3dd9abc
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,12 @@
+black
+build
+clang == 17.*
+cython == 3.*
+inflection == 0.5.*
+pytest == 8.*
+wheel
+
+# keep synchronous to requirements-readthedocs.txt
+sphinx == 8.*
+sphinx_autodoc_typehints == 2.*
+sphinx_rtd_theme == 3.*
diff --git a/requirements-readthedocs.txt b/requirements-readthedocs.txt
new file mode 100644
index 0000000..4549e30
--- /dev/null
+++ b/requirements-readthedocs.txt
@@ -0,0 +1,7 @@
+# keep synchronous to src/VERSION.inc
+pytidyhtml5 == 2024.10.24
+
+# keep synchronous to requirements.txt
+sphinx == 8.*
+sphinx_autodoc_typehints == 2.*
+sphinx_rtd_theme == 3.*
diff --git a/requirements.readthedocs.txt b/requirements.readthedocs.txt
deleted file mode 100644
index ff263bf..0000000
--- a/requirements.readthedocs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-pytidyhtml5 == 2021.8.7
-sphinx == 3.*, >= 3.4
-sphinx_autodoc_typehints == 1.*, >= 1.3, < 1.8
-sphinx_rtd_theme == 0.*, >= 0.4
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 254af0c..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-cibuildwheel == 2.*
-clang == 6.*, >= 6.0.0.1
-cython == 0.*, >= 0.28.4
-sphinx_autodoc_typehints == 1.*, >= 1.3, < 1.8
-sphinx_rtd_theme == 0.*, >= 0.4
-wheel == 0.*, >= 0.31.0
-inflection == 0.*, >= 0.3.1
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..e6e6832
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,61 @@
+[metadata]
+# keep synchronous to requirements-readthedocs.txt
+# keep synchronous to src/VERSION.inc
+version = 2024.10.24
+
+name = pytidyhtml5
+description = HTML clean-up and repair: Statically linked Python interface for tidy-html5.
+url = https://github.com/Kijewski/pytidyhtml5
+project_urls =
+ Code = https://github.com/Kijewski/pytidyhtml5
+ Documentation = https://pytidyhtml5.readthedocs.io/
+ Download = https://pypi.org/project/pytidyhtml5/
+ Homepage = https://github.com/Kijewski/pytidyhtml5
+ Tracker = https://github.com/Kijewski/pytidyhtml5/issues
+
+author = René Kijewski
+maintainer = René Kijewski
+author_email = pypi.org@k6i.de
+maintainer_email = pypi.org@k6i.de
+
+long_description = file: README.rst
+long_description_content_type = text/x-rst
+
+license = Apache License 2.0
+license_files = LICENSE
+
+classifiers =
+ Development Status :: 5 - Production/Stable
+ Intended Audience :: Developers
+ Intended Audience :: System Administrators
+ License :: OSI Approved :: Apache Software License
+ Operating System :: POSIX :: Linux
+ Operating System :: MacOS :: MacOS X
+ Operating System :: Microsoft :: Windows
+ Programming Language :: Cython
+ Programming Language :: Python :: 3
+ Programming Language :: Python :: 3.6
+ Programming Language :: Python :: 3.7
+ Programming Language :: Python :: 3.8
+ Programming Language :: Python :: 3.9
+ Programming Language :: Python :: 3.10
+ Programming Language :: Python :: 3.11
+ Programming Language :: Python :: 3.12
+ Programming Language :: Python :: 3.13
+ Programming Language :: Python :: 3 :: Only
+ Programming Language :: Python :: Implementation :: CPython
+ Programming Language :: Python :: Implementation :: PyPy
+ Topic :: Text Processing :: Markup :: HTML
+
+[options]
+zip_safe = False
+
+python_requires = ~= 3.6
+setup_requires =
+ Cython == 3.*
+ setuptools
+
+include_package_data = True
+packages = pytidyhtml5
+package_dir =
+ = lib
diff --git a/setup.py b/setup.py
index 07f028c..9fb2395 100644
--- a/setup.py
+++ b/setup.py
@@ -1,130 +1,33 @@
#!/usr/bin/env python3
-from os.path import dirname, join, abspath
-from os import environ
-from platform import system
from setuptools import setup, Extension
-from subprocess import check_output
-from sys import argv
-from traceback import print_last
-def get_text(name):
- root = abspath(dirname(__file__))
- with open(join(root, 'lib', name), 'rt') as f:
- return eval(f.read().strip())
-
-
-if system() == 'Linux':
- extra_compile_args = [
- '-std=c++11', '-flto',
- '-O2', '-fomit-frame-pointer', '-fPIC', '-ggdb1', '-pipe',
- '-D_FORTIFY_SOURCE=2', '-fstack-protector-strong', '--param=ssp-buffer-size=8',
- '-isystem', 'tidy-html5/include/',
- ]
- extra_link_args = [
- *extra_compile_args,
- 'tidy-html5/build/cmake/libtidy.a',
- '-fPIC',
- '-Wl,-zrelro,-znow,-zcombreloc,-znocommon,-znoexecstack',
- ]
-else:
- # OSX or Windows
- extra_compile_args = [
- '-std=c++11', '-flto',
- '-O2', '-fomit-frame-pointer', '-fPIC', '-ggdb1', '-pipe',
- '-D_FORTIFY_SOURCE=2', '-fstack-protector-strong', '--param=ssp-buffer-size=8',
- '-isystem', 'tidy-html5/include/',
- ]
- extra_link_args = [
- *extra_compile_args,
- 'tidy-html5/build/cmake/libtidy.a',
- '-fPIC',
- ]
-
-if system() == 'Windows':
- # I cannot get cibuildwheel to accept my CC + CXX overrides. :(
- # So monkey patching it is ...
-
- for line in check_output(['make', 'export-environ']).decode('UTF-8').splitlines():
- (key, value) = line.split('=', 1)
- environ[key] = value
-
- import distutils.command.build_ext
-
- def customize_compiler(compiler):
- compiler_settings = [
- environ['CXX'], '-m64',
- '-Wall', '-Wno-unused-result', '-Wformat', '-Werror=format-security', '-Wdate-time',
- '-O2', '-g', '-fwrapv', '-fstack-protector-strong',
- '-DNDEBUG', '-D_FORTIFY_SOURCE=2',
-
- '-DMS_WIN64', # https://github.com/cython/cython/issues/3405#issuecomment-596975159
- ]
-
- compiler.preprocessor = [environ['CXX'], '-m64', '-E', '-Wdate-time', '-D_FORTIFY_SOURCE=2']
- compiler.compiler = [*compiler_settings]
- compiler.compiler_cxx = [*compiler_settings]
- compiler.compiler_so = [*compiler_settings, '-fPIC']
- compiler.linker_so = [*compiler_settings, '-shared']
- compiler.linker_exe = [*compiler_settings, '-fPIC']
- compiler.archiver = [environ['AR'], environ['ARFLAGS']]
- compiler.ranlib = [environ['RANLIB']]
-
- class build_ext(distutils.command.build_ext.build_ext):
- def run(self):
- self.compiler = 'unix'
- return super().run()
-
- def build_extensions(self):
- print('self.compiler', repr(vars(self.compiler)))
- return super().build_extensions()
-
- distutils.command.build_ext.customize_compiler = customize_compiler
- distutils.command.build_ext.build_ext = build_ext
-
-
-name = 'pytidyhtml5'
+extra_compile_args = [
+ "-std=c++11",
+ "-flto",
+ "-O3",
+ "-fPIC",
+ "-g0",
+ "-pipe",
+ "-isystem",
+ "tidy-html5/include/",
+]
+extra_link_args = [
+ *extra_compile_args,
+ "tidy-html5/build/cmake/libtidy.a",
+]
+
+name = "pytidyhtml5"
setup(
- name=name,
- version=get_text('VERSION.txt'),
- long_description=get_text('DESCRIPTION.txt'),
- description='HTML clean-up and repair: Statically linked Python interface for tidy-html5.',
- author='René Kijewski',
- author_email='pypi.org@k6i.de',
- maintainer='René Kijewski',
- maintainer_email='pypi.org@k6i.de',
- url='https://github.com/Kijewski/pytidyhtml5',
- python_requires='~= 3.6',
- zip_safe=False,
- ext_modules=[Extension(
- '_' + name,
- sources=['_' + name + '.pyx'],
- include_dirs=['lib'],
- extra_compile_args=extra_compile_args,
- extra_link_args=extra_link_args,
- language='c++',
- )],
- packages=[name],
- platforms=['any'],
- license='ISC',
- classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'Intended Audience :: Developers',
- 'Intended Audience :: System Administrators',
- 'License :: OSI Approved :: ISC License (ISCL)',
- 'Operating System :: POSIX :: Linux',
- 'Operating System :: MacOS :: MacOS X',
- 'Operating System :: Microsoft :: Windows',
- 'Programming Language :: Cython',
- 'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.6',
- 'Programming Language :: Python :: 3.7',
- 'Programming Language :: Python :: 3.8',
- 'Programming Language :: Python :: 3.9',
- 'Programming Language :: Python :: 3 :: Only',
- 'Programming Language :: Python :: Implementation :: CPython',
- 'Programming Language :: Python :: Implementation :: PyPy',
- 'Topic :: Text Processing :: Markup :: HTML ',
+ ext_modules=[
+ Extension(
+ "_" + name,
+ sources=["_" + name + ".cpp"],
+ include_dirs=["lib"],
+ extra_compile_args=extra_compile_args,
+ extra_link_args=extra_link_args,
+ language="c++",
+ )
],
)
diff --git a/tidy-html5 b/tidy-html5
index 1ca3747..d08ddc2 160000
--- a/tidy-html5
+++ b/tidy-html5
@@ -1 +1 @@
-Subproject commit 1ca37471b48a3498f985509828cb3cf85ea129f8
+Subproject commit d08ddc2860aa95ba8e301343a30837f157977cba