diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index ac27a8486..217ee2e78 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -6,3 +6,7 @@ updates:
schedule:
# Check for updates to GitHub Actions every week
interval: "weekly"
+ groups:
+ github-actions:
+ patterns:
+ - "*"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index dd1b9ea6e..dc32c0438 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -41,99 +41,115 @@ jobs:
#
os: [ubuntu-latest, macos-latest, windows-2019]
python-version:
- - "2.7"
- - "3.5"
- "3.6"
- "3.7"
- "3.8"
- "3.9"
- "3.10" # quotes to avoid being interpreted as the number 3.1
- "3.11"
- # - "3.12-dev"
+ - "3.12"
+ - "3.13-dev"
env: [{ STATIC_DEPS: true }, { STATIC_DEPS: false }]
include:
- - os: windows-2016
- python-version: 2.7
- env: { STATIC_DEPS: true } # always static
+ - os: ubuntu-latest
+ python-version: "3.13-dev"
+ allowed_failure: true
- os: ubuntu-latest
python-version: "3.9"
env: {STATIC_DEPS: true, WITH_REFNANNY: true}
extra_hash: "-refnanny"
- allowed_failure: true
- os: ubuntu-latest
- python-version: "3.11"
+ python-version: "3.12"
env: {STATIC_DEPS: true, WITH_REFNANNY: true}
extra_hash: "-refnanny"
- allowed_failure: true
+
# Coverage setup
- os: ubuntu-latest
- python-version: "3.9"
- env: { COVERAGE: true }
+ python-version: "3.10"
+ env: { COVERAGE: true, STATIC_DEPS: true }
extra_hash: "-coverage"
- allowed_failure: true # shouldn't fail but currently does...
- os: ubuntu-latest
- python-version: "3.9"
+ python-version: "3.10"
env: { STATIC_DEPS: false, EXTRA_DEPS: "docutils pygments sphinx sphinx-rtd-theme" }
extra_hash: "-docs"
- allowed_failure: true # shouldn't fail but currently does...
+
# Old library setup with minimum version requirements
- os: ubuntu-latest
- python-version: "3.9"
+ python-version: "3.10"
env: {
STATIC_DEPS: true,
LIBXML2_VERSION: 2.9.2,
LIBXSLT_VERSION: 1.1.27,
}
- extra_hash: "-oldlibs"
- allowed_failure: true # shouldn't fail but currently does...
+ extra_hash: "-oldlibs29"
+ - os: ubuntu-latest
+ python-version: "3.10"
+ env: {
+ STATIC_DEPS: true,
+ LIBXML2_VERSION: 2.10.3,
+ LIBXSLT_VERSION: 1.1.37,
+ }
+ extra_hash: "-oldlibs210"
+ - os: ubuntu-latest
+ python-version: "3.10"
+ env: {
+ STATIC_DEPS: true,
+ LIBXML2_VERSION: 2.11.7,
+ LIBXSLT_VERSION: 1.1.37,
+ }
+ extra_hash: "-oldlibs211"
+
# Ubuntu sub-jobs:
# ================
# Pypy
- os: ubuntu-latest
- python-version: pypy-2.7
+ python-version: pypy-3.8
env: { STATIC_DEPS: false }
allowed_failure: true
- os: ubuntu-latest
- python-version: pypy-3.7
+ python-version: pypy-3.9
env: { STATIC_DEPS: false }
allowed_failure: true
- os: ubuntu-latest
- python-version: pypy-3.8
+ python-version: pypy-3.10
env: { STATIC_DEPS: false }
allowed_failure: true
# MacOS sub-jobs
# ==============
- - os: macos-latest
- allowed_failure: true # Unicode parsing fails in Py3
+ #- os: macos-latest
+ # allowed_failure: true # Unicode parsing fails in Py3
+
+ - os: ubuntu-20.04
+ python-version: "3.6"
+ env: { STATIC_DEPS: true } # only static
exclude:
- os: ubuntu-latest
- python-version: "3.5"
- - os: ubuntu-latest
python-version: "3.6"
+ - os: macos-latest
+ python-version: "3.6"
+ - os: macos-latest
+ python-version: "3.7"
# Windows sub-jobs
# ==============
- - os: windows-2019
- python-version: 2.7 # needs older image
- os: windows-2019
env: { STATIC_DEPS: false } # always static
# This defaults to 360 minutes (6h) which is way too long and if a test gets stuck, it can block other pipelines.
- # From testing, the runs tend to take ~3 minutes, so a limit of 20 minutes should be enough. This can always be
- # changed in the future if needed.
- timeout-minutes: 20
+ # From testing, the runs tend to take 3-8 minutes, so a limit of 30 minutes should be enough.
+ timeout-minutes: 30
runs-on: ${{ matrix.os }}
env:
OS_NAME: ${{ matrix.os }}
PYTHON_VERSION: ${{ matrix.python-version }}
- MACOSX_DEPLOYMENT_TARGET: 10.15
- LIBXML2_VERSION: 2.9.14
- LIBXSLT_VERSION: 1.1.35
+ MACOSX_DEPLOYMENT_TARGET: 11.0
+ LIBXML2_VERSION: 2.12.6
+ LIBXSLT_VERSION: 1.1.39
COVERAGE: false
GCC_VERSION: 9
USE_CCACHE: 1
@@ -143,50 +159,61 @@ jobs:
steps:
- name: Checkout repo
- uses: actions/checkout@v3
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
fetch-depth: 1
- name: Setup Python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
with:
python-version: ${{ matrix.python-version }}
- - name: Cache [ccache]
- uses: pat-s/always-upload-cache@v3.0.11
- if: startsWith(runner.os, 'Linux')
+ - name: Install MacOS dependencies
+ if: runner.os == 'macOS'
+ run: |
+ brew install automake libtool ccache
+ ln -s /usr/local/bin/glibtoolize /usr/local/bin/libtoolize
+
+ - name: ccache
+ uses: hendrikmuhs/ccache-action@v1.2
+ if: runner.os == 'Linux' || runner.os == 'macOS'
with:
- path: ~/.ccache
- key: ${{ runner.os }}-ccache${{ matrix.extra_hash }}-${{ matrix.python-version }}-${{ hashFiles('.github/workflows/ci.yml', 'tools/ci-run.sh') }}
+ max-size: 100M
+ create-symlink: true
+ verbose: 1
+ key: ${{ runner.os }}-ccache${{ matrix.extra_hash }}-${{ matrix.python-version }}-${{ matrix.env.STATIC_DEPS }}
+
+ - name: Cache [libs]
+ uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+ if: matrix.env.STATIC_DEPS
+ with:
+ path: |
+ libs/*.xz
+ libs/*.gz
+ libs/*.zip
+ key: libs-${{ runner.os }}-${{ env.LIBXML2_VERSION }}-${{ env.LIBXSLT_VERSION }}
- name: Run CI
continue-on-error: ${{ matrix.allowed_failure || false }}
env: ${{ matrix.env }}
- run: bash ./tools/ci-run.sh
+ run: bash -c 'GITHUB_API_TOKEN="${{ secrets.GITHUB_TOKEN }}" bash ./tools/ci-run.sh'
- name: Build docs
if: contains( matrix.env.EXTRA_DEPS, 'sphinx')
run: make html
- name: Upload docs
- uses: actions/upload-artifact@v3
- if: ${{ matrix.extra_hash == '-docs' }}
+ uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
+ if: matrix.extra_hash == '-docs'
with:
name: website_html
path: doc/html
if-no-files-found: ignore
- name: Upload Coverage Report
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
+ if: matrix.env.COVERAGE
with:
name: pycoverage_html
path: coverage*
if-no-files-found: ignore
-
- - name: Upload Wheel
- uses: actions/upload-artifact@v3
- if: ${{ matrix.env.STATIC_DEPS == 'true' && env.COVERAGE == 'false' }}
- with:
- name: wheels-${{ runner.os }}
- path: dist/*.whl
- if-no-files-found: ignore
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 89078587b..afcaa6cd7 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -3,16 +3,49 @@ name: Wheel build
on:
release:
types: [created]
+ schedule:
+ # ┌───────────── minute (0 - 59)
+ # │ ┌───────────── hour (0 - 23)
+ # │ │ ┌───────────── day of the month (1 - 31)
+ # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+ # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+ # │ │ │ │ │
+ - cron: "42 3 * * 4"
+ push:
+ paths:
+ - .github/workflows/wheels.yml
+ - requirements.txt
+ - pyproject.toml
+ - MANIFEST.in
+ - Makefile
+ - setup*
+ - build*
+ pull_request:
+ types: [opened, synchronize, reopened]
+ paths:
+ - .github/workflows/wheels.yml
+ - requirements.txt
+ - pyproject.toml
+ - MANIFEST.in
+ - Makefile
+ - setup*
+ - build*
+ workflow_dispatch:
+
+permissions: {}
jobs:
sdist:
runs-on: ubuntu-latest
+ permissions:
+ contents: write
+
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Set up Python
- uses: actions/setup-python@v4
+ uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
with:
python-version: "3.x"
@@ -24,161 +57,145 @@ jobs:
- name: Build docs and sdist
run: make html sdist
- env: { STATIC_DEPS: false }
-
- - name: Release
- uses: softprops/action-gh-release@v1
- if: startsWith(github.ref, 'refs/tags/')
- with:
- files: dist/*.tar.gz
+ env: { STATIC_DEPS: false; CFLAGS="-Og" } # it's run-once, so build more quickly
- name: Upload sdist
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
with:
name: sdist
path: dist/*.tar.gz
- name: Upload website
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
with:
name: website
path: doc/html
- Linux:
+ generate-wheels-matrix:
+ # Create a matrix of all architectures & versions to build.
+ # This enables the next step to run cibuildwheel in parallel.
+ # From https://iscinumpy.dev/post/cibuildwheel-2-10-0/#only-210
+ name: Generate wheels matrix
runs-on: ubuntu-latest
-
- strategy:
- # Allows for matrix sub-jobs to fail without canceling the rest
- fail-fast: false
-
- matrix:
- image:
- - manylinux1_x86_64
- - manylinux1_i686
- #- manylinux2010_x86_64
- #- manylinux2010_i686
- - manylinux_2_24_x86_64
- - manylinux_2_24_i686
- - manylinux_2_24_aarch64
- - musllinux_1_1_x86_64
- - musllinux_1_1_aarch64
- #- manylinux_2_24_ppc64le
- #- manylinux_2_24_ppc64le
- #- manylinux_2_24_s390x
- pyversion: ["*"]
-
- exclude:
- - image: manylinux_2_24_aarch64
- pyversion: "*"
- - image: musllinux_1_1_aarch64
- pyversion: "*"
- include:
- - image: manylinux2014_aarch64
- pyversion: "cp36*"
- - image: manylinux_2_24_aarch64
- pyversion: "cp37*"
- - image: manylinux_2_24_aarch64
- pyversion: "cp38*"
- - image: manylinux_2_24_aarch64
- pyversion: "cp39*"
- - image: manylinux_2_24_aarch64
- pyversion: "cp310*"
- - image: manylinux_2_24_aarch64
- pyversion: "cp311*"
-
- - image: musllinux_1_1_aarch64
- pyversion: "cp36*"
- - image: musllinux_1_1_aarch64
- pyversion: "cp37*"
- - image: musllinux_1_1_aarch64
- pyversion: "cp38*"
- - image: musllinux_1_1_aarch64
- pyversion: "cp39*"
- - image: musllinux_1_1_aarch64
- pyversion: "cp310*"
- - image: musllinux_1_1_aarch64
- pyversion: "cp311*"
-
+ outputs:
+ include: ${{ steps.set-matrix.outputs.include }}
steps:
- - uses: actions/checkout@v3
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: "3.x"
-
- - name: Install dependencies
- run: python -m pip install -r requirements.txt
-
- - name: Build Linux wheels
- run: make sdist wheel_${{ matrix.image }}
- env: { STATIC_DEPS: true, PYTHON_BUILD_VERSION: "${{ matrix.pyversion }}" }
-
- - name: Release
- uses: softprops/action-gh-release@v1
- if: startsWith(github.ref, 'refs/tags/')
- with:
- files: wheelhouse/*/*-m*linux*.whl # manylinux / musllinux
-
- - name: Upload wheels
- uses: actions/upload-artifact@v3
- with:
- name: wheels-${{ matrix.image }}
- path: wheelhouse/*/*-m*linux*.whl # manylinux / musllinux
- if-no-files-found: ignore
+ - uses: actions/checkout@v4
+ - name: Install cibuildwheel
+ # Nb. keep cibuildwheel version pin consistent with job below
+ run: pipx install cibuildwheel==2.15.0
+ - id: set-matrix
+ run: |
+ MATRIX=$(
+ {
+ cibuildwheel --print-build-identifiers --platform linux \
+ | jq -nRc '{"only": inputs, "os": "ubuntu-latest"}' \
+ && cibuildwheel --print-build-identifiers --platform macos \
+ | jq -nRc '{"only": inputs, "os": "macos-latest"}' \
+ && cibuildwheel --print-build-identifiers --platform windows \
+ | jq -nRc '{"only": inputs, "os": "windows-2019"}'
+ } | jq -sc
+ )
+ echo "include=$MATRIX"
+ echo "include=$MATRIX" >> $GITHUB_OUTPUT
+
+ build_wheels:
+ name: Build for ${{ matrix.only }}
+ needs: generate-wheels-matrix
+ runs-on: ${{ matrix.os }}
- non-Linux:
strategy:
- # Allows for matrix sub-jobs to fail without canceling the rest
fail-fast: false
-
matrix:
- os: [macos-latest, windows-2019]
- #os: [macos-10.15, windows-latest]
- #os: [macos-10.15, macOS-M1]
- #os: [macos-10.15]
- python-version: ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "pypy-3.8-v7.3.7", "pypy-3.9-v7.3.9"]
-
- include:
- - os: windows-2016
- python-version: 2.7
- exclude:
- - os: windows-2019
- python-version: 2.7 # needs older image
+ include: ${{ fromJson(needs.generate-wheels-matrix.outputs.include) }}
- runs-on: ${{ matrix.os }}
- env: { LIBXML2_VERSION: 2.9.14, LIBXSLT_VERSION: 1.1.35, MACOSX_DEPLOYMENT_TARGET: 10.15 }
+ env:
+ LIBXML2_VERSION: 2.12.6
+ LIBXSLT_VERSION: 1.1.39
steps:
- - uses: actions/checkout@v3
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Install MacOS dependencies
- if: startsWith(matrix.os, 'mac')
- run: |
- brew install automake libtool
- ln -s /usr/local/bin/glibtoolize /usr/local/bin/libtoolize
-
- - name: Install dependencies
- run: python -m pip install setuptools wheel -r requirements.txt
-
- - name: Build wheels
- run: make sdist wheel
- env: { STATIC_DEPS: true, RUN_TESTS: true }
+ - name: Check out the repo
+ uses: actions/checkout@v4
+
+ - name: Cache [libs]
+ uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
+ with:
+ path: |
+ libs/*.xz
+ libs/*.gz
+ libs/*.zip
+ key: libs-${{ runner.os }}-${{ env.LIBXML2_VERSION }}-${{ env.LIBXSLT_VERSION }}
+
+ - name: Set up QEMU
+ if: runner.os == 'Linux'
+ uses: docker/setup-qemu-action@v3
+ with:
+ platforms: all
+
+ - name: Build wheels
+ uses: pypa/cibuildwheel@v2.17.0
+ with:
+ only: ${{ matrix.only }}
+
+ - name: Build old Linux wheels
+ if: contains(matrix.only, '-manylinux_') && startsWith(matrix.only, 'cp36-') && (contains(matrix.only, 'i686') || contains(matrix.only, 'x86_64'))
+ uses: pypa/cibuildwheel@v2.17.0
+ env:
+ CIBW_MANYLINUX_i686_IMAGE: manylinux1
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux1
+ with:
+ only: ${{ matrix.only }}
+
+ - name: Build faster Linux wheels
+ # also build wheels with the most recent manylinux images and gcc
+ if: runner.os == 'Linux' && !contains(matrix.only, 'i686')
+ uses: pypa/cibuildwheel@v2.17.0
+ env:
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
+ CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28
+ CIBW_MANYLINUX_PPC64LE_IMAGE: manylinux_2_28
+ CIBW_MANYLINUX_S390X_IMAGE: manylinux_2_28
+ CIBW_MANYLINUX_PYPY_X86_64_IMAGE: manylinux_2_28
+ CIBW_MANYLINUX_PYPY_AARCH64_IMAGE: manylinux_2_28
+ CIBW_MUSLLINUX_X86_64_IMAGE: musllinux_1_2
+ CIBW_MUSLLINUX_AARCH64_IMAGE: musllinux_1_2
+ CIBW_MUSLLINUX_PPC64LE_IMAGE: musllinux_1_2
+ CIBW_MUSLLINUX_S390X_IMAGE: musllinux_1_2
+ with:
+ only: ${{ matrix.only }}
+
+ - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
+ with:
+ path: ./wheelhouse/*.whl
+ name: lxml-wheel-${{ matrix.only }}
+
+ upload_release_assets:
+ name: Upload Release Assets
+ needs: [ sdist, build_wheels ]
+ runs-on: ubuntu-latest
- - name: Release
- uses: softprops/action-gh-release@v1
- if: startsWith(github.ref, 'refs/tags/')
- with:
- files: dist/lxml-*.whl
+ permissions:
+ contents: write
- - name: Upload wheels
- uses: actions/upload-artifact@v3
- with:
- name: wheels-${{ matrix.os }}
- path: dist/lxml-*.whl
- if-no-files-found: ignore
+ steps:
+ - name: Download artifacts
+ uses: actions/download-artifact@c850b930e6ba138125429b7e5c93fc707a7f8427 # v4.1.4
+ with:
+ path: ./release_upload
+ merge-multiple: true
+
+ - name: List downloaded artifacts
+ run: ls -la ./release_upload
+
+ - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
+ with:
+ path: ./release_upload/*.whl
+ name: all_wheels
+
+ - name: Release
+ uses: softprops/action-gh-release@v2
+ if: github.ref_type == 'tag'
+ with:
+ files: |
+ ./release_upload/*.whl
+ ./release_upload/*.tar.gz
diff --git a/.gitignore b/.gitignore
index 66a48a6e4..30164c48a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,37 +1,62 @@
-*.pyc
.tox
.idea
.vscode
+.hg
+.cache
+.coverage
+.ipynb_checkpoints/
build
+doc/_build
+doc/pdf
+doc/html
+doc/sphinx
dist
wheelhouse
wheels
venvs
-venv
-doc/html
+*venv
+*dump
+cython_debug/
+py[0-9][0-9]
+lxml-*/
libs
*.egg-info
+*.pickle
+*.pyc
*.pdb
*.so
*.o
*.pyd
+*.whl
+*.log
+*.patch
+*.orig
+*.rej
+*.gz
+*.xz
+*.bz2
+*.zip
+*.tgz
+*~
+callgrind.out.*
+coverty
+coverage
+coverage.xml
+coverage.html
MANIFEST
+TEST
doc/api/lxml*.rst
doc/api/_build/
doc/s5/lxml-ep2008.html
src/lxml/includes/*/
src/lxml/includes/lxml-version.h
-src/lxml/*.html
+src/lxml/html/*.html
src/lxml/html/*.c
-src/lxml/_elementpath.c
-src/lxml/builder.c
-src/lxml/etree.c
+src/lxml/*.html
+src/lxml/*.c
src/lxml/etree.h
src/lxml/etree_api.h
src/lxml/lxml.etree.c
src/lxml/lxml.etree.h
src/lxml/lxml.etree_api.h
-src/lxml/objectify.c
-src/lxml/lxml.objectify.c
-src/lxml/sax.c
diff --git a/CHANGES.txt b/CHANGES.txt
index c684ad5e1..eac13980a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,285 @@
lxml changelog
==============
+5.2.2 (2024-05-12)
+==================
+
+Bugs fixed
+----------
+
+* GH#417: The ``test_feed_parser`` test could fail if ``lxml_html_clean`` was not installed.
+ It is now skipped in that case.
+
+* LP#2059910: The minimum CPU architecture for the Linux x86 binary wheels was set back to
+ "core2", without SSE 4.2.
+
+* If libxml2 uses iconv, the compile time version is available as `etree.ICONV_COMPILED_VERSION`.
+
+
+5.2.1 (2024-04-02)
+==================
+
+Bugs fixed
+----------
+
+* LP#2059910: The minimum CPU architecture for the Linux x86 binary wheels was set back to
+ "core2", but with SSE 4.2 enabled.
+
+* LP#2059977: ``Element.iterfind("//absolute_path")`` failed with a ``SyntaxError``
+ where it should have issued a warning.
+
+* GH#416: The documentation build was using the non-standard ``which`` command.
+ Patch by Michał Górny.
+
+
+5.2.0 (2024-03-30)
+==================
+
+Other changes
+-------------
+
+* LP#1958539: The ``lxml.html.clean`` implementation suffered from several (only if used)
+ security issues in the past and was now extracted into a separate library:
+
+ https://github.com/fedora-python/lxml_html_clean
+
+ Projects that use lxml without "lxml.html.clean" will not notice any difference,
+ except that they won't have potentially vulnerable code installed.
+ The module is available as an "extra" setuptools dependency "lxml[html_clean]",
+ so that Projects that need "lxml.html.clean" will need to switch their requirements
+ from "lxml" to "lxml[html_clean]", or install the new library themselves.
+
+* The minimum CPU architecture for the Linux x86 binary wheels was upgraded to
+ "sandybridge" (launched 2011), and glibc 2.28 / gcc 12 (manylinux_2_28) wheels were added.
+
+* Built with Cython 3.0.10.
+
+
+5.1.2 (2024-??-??)
+==================
+
+Bugs fixed
+----------
+
+* LP#2059977: ``Element.iterfind("//absolute_path")`` failed with a ``SyntaxError``
+ where it should have issued a warning.
+
+
+5.1.1 (2024-03-28)
+==================
+
+Bugs fixed
+----------
+
+* LP#2048920: ``iterlinks()`` in ``lxml.html`` rejected ``bytes`` input in 5.1.0.
+
+* High source line numbers from the parser are no longer truncated
+ (up to a C ``long``) when using libxml2 2.11 or later.
+
+Other changes
+-------------
+
+* GH#407: A compatibility test was adapted to recent expat versions.
+ Patch by Miro Hrončok.
+
+* Binary wheels use the library versions libxml2 2.12.6 and libxslt 1.1.39.
+
+* Windows binary wheels use the library versions libxml2 2.11.7 and libxslt 1.1.39.
+
+* Built with Cython 3.0.9.
+
+
+5.1.0 (2024-01-05)
+==================
+
+Features added
+--------------
+
+* Parsing ASCII strings is slightly faster.
+
+Bugs fixed
+----------
+
+* GH#349: The HTML ``Cleaner()`` interpreted an accidentally provided string parameter
+ for the ``host_whitelist`` as list of characters and silently failed to reject any hosts.
+ Passing a non-collection is now rejected.
+
+Other changes
+-------------
+
+* Support for Python 2.7 and Python versions < 3.6 was removed.
+
+* The wheel build was migrated to use ``cibuildwheel``.
+ Patch by Primož Godec.
+
+
+5.0.2 (2024-03-28)
+==================
+
+Other changes
+-------------
+
+* GH#407: A compatibility test was adapted to recent expat versions.
+ Patch by Miro Hrončok.
+
+* Binary wheels use the library versions libxml2 2.12.6 and libxslt 1.1.39.
+
+* Built with Cython 3.0.9.
+
+
+5.0.1 (2024-01-05)
+==================
+
+Bugs fixed
+----------
+
+* LP#2046208: Parsing non-BMP Python Unicode strings could fail on macOS.
+
+* LP#2044225: When incrementally parsing broken HTML, reporting start events on
+ missing structural tags failed and could lead to subsequent exceptions.
+
+* LP#2045435: Some (not all) issues with stricter C compilers were resolved.
+
+* The binary wheels in the 5.0.0 release did not validate cleanly (but installed ok).
+
+
+.. _latest_release:
+
+5.0.0 (2023-12-29)
+==================
+
+Features added
+--------------
+
+* Character escaping in ``C14N2`` serialisation now uses a single pass over the text
+ instead of searching for each unescaped character separately.
+
+* Early support for Python 3.13a2 was added.
+
+Bugs fixed
+----------
+
+* LP#1976304: The ``Element.addnext()`` method previously inserted the new element
+ before existing tail text. The tail text of both sibling elements now stays on
+ the respective elements.
+
+* LP#1980767, GH#379: ``TreeBuilder.close()`` could fail with a ``TypeError`` after
+ parsing incorrect input. Original patch by Enrico Minack.
+
+* ``Element.itertext(with_tail=False)`` returned the tail text of comments and
+ processing instructions, despite the explicit option.
+
+* GH#370: A crash with recent libxml2 2.11.x versions was resolved.
+ Patch by Michael Schlenker.
+
+* A compile problem with recent libxml2 2.12.x versions was resolved.
+
+* The internal exception handling in C callbacks was improved for Cython 3.0.
+
+* The exception declarations of ``xmlInputReadCallback``, ``xmlInputCloseCallback``,
+ ``xmlOutputWriteCallback`` and ``xmlOutputCloseCallback`` in ``tree.pxd`` were
+ corrected to prevent running Python code or calling into the C-API with a live
+ exception set.
+
+* GH#385: The long deprecated ``unittest.m̀akeSuite()`` function is no longer used.
+ Patch by Miro Hrončok.
+
+* LP#1522052: A file-system specific test is now optional and should no longer fail
+ on systems that don't support it.
+
+* GH#392: Some tests were adapted for libxml2 2.13.
+ Patch by Nick Wellnhofer.
+
+* Contains all fixes from lxml 4.9.4.
+
+Other changes
+-------------
+
+* LP#1742885: lxml no longer expands external entities (XXE) by default to prevent
+ the security risk of loading arbitrary files and URLs. If this feature is needed,
+ it can be enabled in a backwards compatible way by using a parser with the option
+ ``resolve_entities=True``. The new default is ``resolve_entities='internal'``.
+
+* With libxml2 2.10.4 and later (as provided by the lxml 5.0 binary wheels),
+ parsing HTML tags with "prefixes" no longer builds a namespace dictionary
+ in ``nsmap`` but considers the ``prefix:name`` string the actual tag name.
+ With older libxml2 versions, since 2.9.11, the prefix was removed. Before
+ that, the prefix was parsed as XML prefix.
+
+ lxml 5.0 does not try to hide this difference but now changes the ElementPath
+ implementation to let ``element.find("part1:part2")`` search for the tag
+ ``part1:part2`` in documents parsed as HTML, instead of looking only for ``part2``.
+
+* LP#2024343: The validation of the schema file itself is now optional in the
+ ISO-Schematron implementation. This was done because some lxml distributions
+ discard the RNG validation schema file due to licensing issues. The validation
+ can now always be disabled with ``Schematron(..., validate_schema=False)``.
+ It is enabled by default if available and disabled otherwise. The module
+ constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used
+ to detect whether schema file validation is available.
+
+* Some redundant and long deprecated methods were removed:
+ ``parser.setElementClassLookup()``,
+ ``xslt_transform.apply()``,
+ ``xpath.evaluate()``.
+
+* Some incorrect declarations were removed from ``python.pxd``. In general, this file
+ should not be used by external Cython code. Use the C-API declarations provided by
+ Cython itself instead.
+
+* Binary wheels use the library versions libxml2 2.12.3 and libxslt 1.1.39.
+
+* Built with Cython 3.0.7, updated to follow recent changes in Cython 3.1-dev.
+
+
+4.9.4 (2023-12-19)
+==================
+
+Bugs fixed
+----------
+
+* LP#2046398: Inserting/replacing an ancestor into a node's children could loop indefinitely.
+
+* LP#1980767, GH#379: ``TreeBuilder.close()`` could fail with a ``TypeError`` after
+ parsing incorrect input. Original patch by Enrico Minack.
+
+* LP#1522052: A file-system specific test is now optional and should no longer fail
+ on systems that don't support it.
+
+Other changes
+-------------
+
+* Wheels include zlib 1.3, libxml2 2.10.3 and libxslt 1.1.39
+ (zlib 1.2.12, libxml2 2.10.3 and libxslt 1.1.37 on Windows).
+
+* Built with Cython 0.29.37.
+
+
+4.9.3 (2023-07-05)
+==================
+
+Bugs fixed
+----------
+
+* LP#2008911: ``lxml.objectify`` accepted non-decimal numbers like ``²²²`` as integers.
+
+* A memory leak in ``lxml.html.clean`` was resolved by switching to Cython 0.29.34+.
+
+* GH#348: URL checking in the HTML cleaner was improved.
+ Patch by Tim McCormack.
+
+* GH#371, GH#373: Some regex strings were changed to raw strings to fix Python warnings.
+ Patches by Jakub Wilk and Anthony Sottile.
+
+Other changes
+-------------
+
+* Wheels include zlib 1.2.13, libxml2 2.10.3 and libxslt 1.1.38
+ (zlib 1.2.12, libxml2 2.10.3 and libxslt 1.1.37 on Windows).
+
+* Built with Cython 0.29.36 to adapt to changes in Python 3.12.
+
+
4.9.2 (2022-12-13)
==================
diff --git a/INSTALL.txt b/INSTALL.txt
index 94d6a3ecb..b0d691655 100644
--- a/INSTALL.txt
+++ b/INSTALL.txt
@@ -41,7 +41,8 @@ see below.
Requirements
------------
-You need Python 2.7 or 3.4+.
+You need Python 3.6+ for lxml 5.0 and later.
+lxml versions before 5.0 support Python 2.7 and 3.6+.
Unless you are using a static binary distribution (e.g. from a
Windows binary installer), lxml requires libxml2 and libxslt to
@@ -90,7 +91,7 @@ To install a specific version, either download the distribution
manually and let pip install that, or pass the desired version
to pip::
- pip install lxml==3.4.2
+ pip install lxml==5.0.0
.. _pip: http://pypi.python.org/pypi/pip
@@ -105,14 +106,15 @@ the ``CFLAGS`` environment variable::
MS Windows
..........
-For MS Windows, recent lxml releases feature community donated
-binary distributions, although you might still want to take a look
-at the related `FAQ entry '
>>> body = fromstring(tag_soup).find('.//body')
>>> body.text
- u'\xa9\u20ac-\xf5\u01bd'
+ '\xa9\u20ac-\xf5\u01bd'
If you want them back on the way out, you can just serialise with the
default encoding, which is 'US-ASCII'.
@@ -139,10 +139,10 @@ Any other encoding will output the respective byte sequences.
' a paragraph a paragraph a paragraph
-
-
-
- >>> cleaner = Cleaner(style=True, links=True, add_nofollow=True,
- ... page_structure=False, safe_attrs_only=False)
-
- >>> print cleaner.clean_html(html)
-
-
-
-
- a link
- another link
-
-
-
-
-You can also whitelist some otherwise dangerous content with
-``Cleaner(host_whitelist=['www.youtube.com'])``, which would allow
-embedded media from YouTube, while still filtering out embedded media
-from other sites.
-
-See the docstring of ``Cleaner`` for the details of what can be
-cleaned.
-
-
-autolink
---------
-
-In addition to cleaning up malicious HTML, ``lxml.html.clean``
-contains functions to do other things to your HTML. This includes
-autolinking::
-
- autolink(doc, ...)
-
- autolink_html(html, ...)
-
-This finds anything that looks like a link (e.g.,
-``http://example.com``) in the *text* of an HTML document, and
-turns it into an anchor. It avoids making bad links.
-
-Links in the elements ``
\\U00026007
' - ).decode('unicode_escape')) + element = self.etree.HTML('\U00026007
') p_text = element.findtext('.//p') self.assertEqual(1, len(p_text)) - self.assertEqual(_bytes('\\U00026007').decode('unicode_escape'), + self.assertEqual('\U00026007', p_text) def test_html_ids(self): @@ -91,7 +84,7 @@ def test_module_HTML_pretty_print(self): def test_module_parse_html_error(self): parser = self.etree.HTMLParser(recover=False) parse = self.etree.parse - f = BytesIO("") + f = BytesIO(b"") self.assertRaises(self.etree.XMLSyntaxError, parse, f, parser) @@ -205,23 +198,23 @@ def test_module_parse_html_default_doctype(self): self.assertEqual(d.getroottree().docinfo.doctype, '') def test_parse_encoding_8bit_explicit(self): - text = _str('Søk på nettet') - html_latin1 = (_str('%s
') % text).encode('iso-8859-1') + text = 'Søk på nettet' + html_latin1 = ('%s
' % text).encode('iso-8859-1') tree = self.etree.parse( BytesIO(html_latin1), self.etree.HTMLParser(encoding="iso-8859-1")) - p = tree.find("//p") + p = tree.find(".//p") self.assertEqual(p.text, text) def test_parse_encoding_8bit_override(self): - text = _str('Søk på nettet') - wrong_head = _str(''' + text = 'Søk på nettet' + wrong_head = ''' - ''') - html_latin1 = (_str('%s%s
') % (wrong_head, + ''' + html_latin1 = ('%s%s
' % (wrong_head, text) ).encode('iso-8859-1') @@ -232,7 +225,7 @@ def test_parse_encoding_8bit_override(self): tree = self.etree.parse( BytesIO(html_latin1), self.etree.HTMLParser(encoding="iso-8859-1")) - p = tree.find("//p") + p = tree.find(".//p") self.assertEqual(p.text, text) def test_module_HTML_broken(self): @@ -242,7 +235,7 @@ def test_module_HTML_broken(self): def test_module_HTML_cdata(self): # by default, libxml2 generates CDATA nodes for