diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8cadf23b4..d1322b974 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -173,9 +173,11 @@ jobs: env: OS_NAME: ${{ matrix.os }} PYTHON_VERSION: ${{ matrix.python-version }} - MACOSX_DEPLOYMENT_TARGET: 11.0 - LIBXML2_VERSION: 2.14.4 - LIBXSLT_VERSION: 1.1.43 + MACOSX_DEPLOYMENT_TARGET: "11.0" + ZLIB_VERSION: "1.3.1" + LIBICONV_VERSION: "1.18" + LIBXML2_VERSION: "2.14.4" + LIBXSLT_VERSION: "1.1.43" COVERAGE: false GCC_VERSION: 9 USE_CCACHE: 1 diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index e812ea5d4..b2713afdf 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -113,6 +113,8 @@ jobs: include: ${{ fromJson(needs.generate-wheels-matrix.outputs.include) }} env: + ZLIB_VERSION: "1.3.1" + LIBICONV_VERSION: "1.18" LIBXML2_VERSION: 2.14.4 LIBXSLT_VERSION: 1.1.43 @@ -136,13 +138,13 @@ jobs: platforms: all - name: Build wheels - uses: pypa/cibuildwheel@v3.0.0 + uses: pypa/cibuildwheel@v3.0.1 with: only: ${{ matrix.only }} - name: Build old Linux wheels if: contains(matrix.only, '-manylinux_') && (contains(matrix.only, 'i686') || contains(matrix.only, 'x86_64') || contains(matrix.only, 'aarch64')) - uses: pypa/cibuildwheel@v3.0.0 + uses: pypa/cibuildwheel@v3.0.1 env: CIBW_MANYLINUX_i686_IMAGE: manylinux2014 CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 @@ -153,7 +155,7 @@ jobs: - name: Build faster Linux wheels # also build wheels with the most recent manylinux images and gcc if: runner.os == 'Linux' && !contains(matrix.only, 'i686') - uses: pypa/cibuildwheel@v3.0.0 + uses: pypa/cibuildwheel@v3.0.1 env: CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 diff --git a/CHANGES.txt b/CHANGES.txt index ab0f253ed..5f832e9cd 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,6 +2,25 @@ lxml changelog ============== +7.0.0 (2025-??-??) +================== + +Features added +-------------- + +* The shared parser name dict is now local to a parser (as opposed to global), + which allows to control its lifetime and cross-document usage more easily. + It is now also unbounded in size if the ``huge_tree=True`` option is provided. + +* The default chunk size for reading from file-likes in ``iterparse()`` was increased + from 32 KiB to 64 KiB and is now configurable with a new ``chunk_size`` argument. + +Other changes +------------- + +* Some internal adaptations were made for libxml2 2.14.x and 2.15.x. + + 6.0.0 (2025-06-26) ================== diff --git a/pyproject.toml b/pyproject.toml index a7ef4d7e3..9ebac4a04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["Cython>=3.1.2", "setuptools", "wheel"] +requires = ["Cython>=3.1.2", "setuptools"] [tool.cibuildwheel] build-verbosity = 1 diff --git a/setup.py b/setup.py index c63225644..7375f0222 100644 --- a/setup.py +++ b/setup.py @@ -230,7 +230,6 @@ def build_packages(files): versioninfo.dev_status(), 'Intended Audience :: Developers', 'Intended Audience :: Information Technology', - 'License :: OSI Approved :: BSD License', 'Programming Language :: Cython', # NOTE: keep in sync with 'python_requires' list above. 'Programming Language :: Python :: 3', diff --git a/src/lxml/__init__.py b/src/lxml/__init__.py index 8443a3498..2673b6542 100644 --- a/src/lxml/__init__.py +++ b/src/lxml/__init__.py @@ -1,6 +1,6 @@ # this is a package -__version__ = "6.0.0" +__version__ = "7.0.0a0" def get_include(): diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index f683e70db..d52cff30a 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -79,6 +79,7 @@ cdef bint _isAncestorOrSame(xmlNode* c_ancestor, xmlNode* c_node) noexcept: c_node = c_node.parent return False + cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, _BaseParser parser, text, tail, attrib, nsmap, dict extra_attrs): @@ -96,11 +97,17 @@ cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, If 'c_doc' is also NULL, a new xmlDoc will be created. """ - cdef xmlNode* c_node + cdef bint is_html = False + cdef bint is_new_doc = doc is None + if doc is not None: c_doc = doc._c_doc + is_html = doc.ishtml() + elif parser is not None: + is_html = parser._flags.for_html + ns_utf, name_utf = _getNsTag(tag) - if parser is not None and parser._for_html: + if is_html: _htmlTagValidOrRaise(name_utf) if c_doc is NULL: c_doc = _newHTMLDoc() @@ -108,34 +115,30 @@ cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, _tagValidOrRaise(name_utf) if c_doc is NULL: c_doc = _newXMLDoc() - c_node = _createElement(c_doc, name_utf) + + if doc is None: + doc = _documentFactory(c_doc, parser) + if is_new_doc: + doc.initDict() + + cdef xmlNode* c_node = _createElement(c_doc, name_utf) if c_node is NULL: - if doc is None and c_doc is not NULL: - tree.xmlFreeDoc(c_doc) raise MemoryError() - try: - if doc is None: - tree.xmlDocSetRootElement(c_doc, c_node) - doc = _documentFactory(c_doc, parser) - if text is not None: - _setNodeText(c_node, text) - if tail is not None: - _setTailText(c_node, tail) - # add namespaces to node if necessary - _setNodeNamespaces(c_node, doc, ns_utf, nsmap) - _initNodeAttributes(c_node, doc, attrib, extra_attrs) - return _elementFactory(doc, c_node) - except: - # free allocated c_node/c_doc unless Python does it for us - if c_node.doc is not c_doc: - # node not yet in document => will not be freed by document - if tail is not None: - _removeText(c_node.next) # tail - tree.xmlFreeNode(c_node) - if doc is None: - # c_doc will not be freed by doc - tree.xmlFreeDoc(c_doc) - raise + if is_new_doc: + tree.xmlDocSetRootElement(c_doc, c_node) + + # add namespaces to node if necessary + _setNodeNamespaces(c_node, doc, ns_utf, nsmap) + + if text is not None: + _setNodeText(c_node, text) + if tail is not None: + _setTailText(c_node, tail) + + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + + return _elementFactory(doc, c_node) + cdef int _initNewElement(_Element element, bint is_html, name_utf, ns_utf, _BaseParser parser, attrib, nsmap, dict extra_attrs) except -1: @@ -153,13 +156,14 @@ cdef int _initNewElement(_Element element, bint is_html, name_utf, ns_utf, else: _tagValidOrRaise(name_utf) c_doc = _newXMLDoc() + + doc = _documentFactory(c_doc, parser) + doc.initDict() + c_node = _createElement(c_doc, name_utf) if c_node is NULL: - if c_doc is not NULL: - tree.xmlFreeDoc(c_doc) raise MemoryError() tree.xmlDocSetRootElement(c_doc, c_node) - doc = _documentFactory(c_doc, parser) # add namespaces to node if necessary _setNodeNamespaces(c_node, doc, ns_utf, nsmap) _initNodeAttributes(c_node, doc, attrib, extra_attrs) @@ -167,6 +171,7 @@ cdef int _initNewElement(_Element element, bint is_html, name_utf, ns_utf, element._init() return 0 + cdef _Element _makeSubElement(_Element parent, tag, text, tail, attrib, nsmap, dict extra_attrs): """Create a new child element and initialize text content, namespaces and @@ -180,7 +185,7 @@ cdef _Element _makeSubElement(_Element parent, tag, text, tail, ns_utf, name_utf = _getNsTag(tag) c_doc = parent._doc._c_doc - if parent._doc._parser is not None and parent._doc._parser._for_html: + if parent._doc.ishtml(): _htmlTagValidOrRaise(name_utf) else: _tagValidOrRaise(name_utf) @@ -313,14 +318,18 @@ cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, dict extra): cdef xmlNs* c_ns if attrib is not None and not hasattr(attrib, 'items'): raise TypeError, f"Invalid attribute dictionary: {python._fqtypename(attrib).decode('utf8')}" - if not attrib and not extra: + + has_attrib = bool(attrib) + has_extra = bool(extra) + if not has_attrib and not has_extra: return # nothing to do - is_html = doc._parser._for_html + + is_html = doc.ishtml() seen = set() - if extra: + if has_extra: for name, value in extra.items(): _addAttributeToNode(c_node, doc, is_html, name, value, seen) - if attrib: + if has_attrib: for name, value in _iter_attrib(attrib): _addAttributeToNode(c_node, doc, is_html, name, value, seen) @@ -582,7 +591,7 @@ cdef int _setAttributeValue(_Element element, key, value) except -1: cdef const_xmlChar* c_value cdef xmlNs* c_ns ns, tag = _getNsTag(key) - is_html = element._doc._parser._for_html + is_html = element._doc.ishtml() if not is_html: _attributeValidOrRaise(tag) c_tag = _xcstr(tag) @@ -624,8 +633,7 @@ cdef list _collectAttributes(xmlNode* c_node, int collecttype): c_attr = c_node.properties count = 0 while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - count += 1 + count += (c_attr.type == tree.XML_ATTRIBUTE_NODE) c_attr = c_attr.next if not count: diff --git a/src/lxml/classlookup.pxi b/src/lxml/classlookup.pxi index 92d1d47a5..49cf85ae5 100644 --- a/src/lxml/classlookup.pxi +++ b/src/lxml/classlookup.pxi @@ -108,14 +108,15 @@ cdef class CommentBase(_Comment): """ def __init__(self, text): # copied from Comment() factory - cdef _Document doc - cdef xmlDoc* c_doc if text is None: text = b'' else: text = _utf8(text) + c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) + doc.initDict() + self._c_node = _createComment(c_doc, _xcstr(text)) if self._c_node is NULL: raise MemoryError() @@ -138,15 +139,16 @@ cdef class PIBase(_ProcessingInstruction): """ def __init__(self, target, text=None): # copied from PI() factory - cdef _Document doc - cdef xmlDoc* c_doc target = _utf8(target) if text is None: text = b'' else: text = _utf8(text) + c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) + doc.initDict() + self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text)) if self._c_node is NULL: raise MemoryError() @@ -167,8 +169,6 @@ cdef class EntityBase(_Entity): called after object creation. """ def __init__(self, name): - cdef _Document doc - cdef xmlDoc* c_doc name_utf = _utf8(name) c_name = _xcstr(name_utf) if c_name[0] == c'#': @@ -176,8 +176,11 @@ cdef class EntityBase(_Entity): raise ValueError, f"Invalid character reference: '{name}'" elif not _xmlNameIsValid(c_name): raise ValueError, f"Invalid entity reference: '{name}'" + c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) + doc.initDict() + self._c_node = _createEntity(c_doc, c_name) if self._c_node is NULL: raise MemoryError() @@ -446,12 +449,10 @@ cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node): element_type = "entity" else: element_type = "element" - if c_node.name is NULL: - name = None - else: - name = funicode(c_node.name) + + name = funicodeOrNone(c_node.name) c_str = tree._getNs(c_node) - ns = funicode(c_str) if c_str is not NULL else None + ns = funicodeOrNone(c_str) cls = lookup.lookup(element_type, doc, ns, name) if cls is not None: @@ -553,7 +554,7 @@ def set_element_class_lookup(ElementClassLookup lookup = None): This defines the main entry point for looking up element implementations. The standard implementation uses the :class:`ParserBasedElementClassLookup` - to delegate to different lookup schemes for each parser. + to delegate to different lookup schemes for each parser. .. warning:: diff --git a/src/lxml/debug.pxi b/src/lxml/debug.pxi index d728e8419..b6c1c1f31 100644 --- a/src/lxml/debug.pxi +++ b/src/lxml/debug.pxi @@ -24,13 +24,9 @@ cdef class _MemDebug: def dict_size(self): """dict_size(self) - Returns the current size of the global name dictionary used by libxml2 - for the current thread. Each thread has its own dictionary. + Returns the current size of the default parser's name dictionary used by libxml2. """ - c_dict = __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL) - if c_dict is NULL: - raise MemoryError() - return tree.xmlDictSize(c_dict) + return __GLOBAL_PARSER_CONTEXT.getDefaultParser().dict_size memory_debugger = _MemDebug() diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx index 562d95ed1..f6545d1aa 100644 --- a/src/lxml/etree.pyx +++ b/src/lxml/etree.pyx @@ -481,6 +481,9 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # the document tree.xmlFreeDoc(self._c_doc) + cdef void initDict(self) noexcept: + self._parser.initDocDict(self._c_doc) + @cython.final cdef getroot(self): # return an element proxy for the document root @@ -544,6 +547,10 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: else: return (self._c_doc.standalone == 1) + @cython.final + cdef bint ishtml(self): + return self._c_doc.type == tree.XML_HTML_DOCUMENT_NODE + @cython.final cdef bytes buildNewPrefix(self): # get a new unique prefix ("nsX") for this document @@ -762,6 +769,10 @@ cdef class DocInfo: """ return self._doc.isstandalone() + @property + def is_html(self): + return self._doc.ishtml() + property URL: "The source URL of the document (or None if unknown)." def __get__(self): @@ -1130,7 +1141,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: _assertValidNode(self) ns, name = _getNsTag(value) parser = self._doc._parser - if parser is not None and parser._for_html: + if self._doc.ishtml(): _htmlTagValidOrRaise(name) else: _tagValidOrRaise(name) @@ -2658,8 +2669,7 @@ cdef class _Attrib: cdef xmlAttr* c_attr = self._element._c_node.properties cdef Py_ssize_t c = 0 while c_attr is not NULL: - if c_attr.type == tree.XML_ATTRIBUTE_NODE: - c += 1 + c += (c_attr.type == tree.XML_ATTRIBUTE_NODE) c_attr = c_attr.next return c @@ -3214,10 +3224,6 @@ def Comment(text=None): Comment element factory. This factory function creates a special element that will be serialized as an XML comment. """ - cdef _Document doc - cdef xmlNode* c_node - cdef xmlDoc* c_doc - if text is None: text = b'' else: @@ -3227,6 +3233,8 @@ def Comment(text=None): c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) + doc.initDict() + c_node = _createComment(c_doc, _xcstr(text)) tree.xmlAddChild(c_doc, c_node) return _elementFactory(doc, c_node) @@ -3238,10 +3246,6 @@ def ProcessingInstruction(target, text=None): ProcessingInstruction element factory. This factory function creates a special element that will be serialized as an XML processing instruction. """ - cdef _Document doc - cdef xmlNode* c_node - cdef xmlDoc* c_doc - target = _utf8(target) _tagValidOrRaise(target) if target.lower() == b'xml': @@ -3256,6 +3260,8 @@ def ProcessingInstruction(target, text=None): c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) + doc.initDict() + c_node = _createPI(c_doc, _xcstr(target), _xcstr(text)) tree.xmlAddChild(c_doc, c_node) return _elementFactory(doc, c_node) @@ -3291,9 +3297,6 @@ def Entity(name): declared in the document. A document that uses entity references requires a DTD to define the entities. """ - cdef _Document doc - cdef xmlNode* c_node - cdef xmlDoc* c_doc name_utf = _utf8(name) c_name = _xcstr(name_utf) if c_name[0] == c'#': @@ -3301,8 +3304,11 @@ def Entity(name): raise ValueError, f"Invalid character reference: '{name}'" elif not _xmlNameIsValid(c_name): raise ValueError, f"Invalid entity reference: '{name}'" + c_doc = _newXMLDoc() doc = _documentFactory(c_doc, None) + doc.initDict() + c_node = _createEntity(c_doc, c_name) tree.xmlAddChild(c_doc, c_node) return _elementFactory(doc, c_node) @@ -3317,6 +3323,7 @@ def SubElement(_Element _parent not None, _tag, """ return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra) + from typing import Generic, TypeVar T = TypeVar("T") @@ -3327,11 +3334,7 @@ class ElementTree(ABC, Generic[T]): ElementTree wrapper class. """ - cdef xmlNode* c_next - cdef xmlNode* c_node - cdef xmlNode* c_node_copy cdef xmlDoc* c_doc - cdef _ElementTree etree cdef _Document doc if element is not None: @@ -3344,15 +3347,18 @@ class ElementTree(ABC, Generic[T]): else: c_doc = _newXMLDoc() doc = _documentFactory(c_doc, parser) + doc.initDict() return _elementTreeFactory(doc, element) + # Register _ElementTree as a virtual subclass of ElementTree ElementTree.register(_ElementTree) # Remove "ABC" and typing helpers from module dict del ABC, Generic, TypeVar, T + def HTML(text, _BaseParser parser=None, *, base_url=None): """HTML(text, parser=None, base_url=None) diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd index 43a52e647..19ff6da4c 100644 --- a/src/lxml/includes/tree.pxd +++ b/src/lxml/includes/tree.pxd @@ -77,13 +77,22 @@ cdef extern from "libxml/hash.h": cdef int xmlHashSize(xmlHashTable* table) nogil cdef void xmlHashFree(xmlHashTable* table, xmlHashDeallocator f) nogil + cdef extern from * nogil: # actually "libxml/dict.h" # libxml/dict.h appears to be broken to include in C ctypedef struct xmlDict + + cdef xmlDict* xmlDictCreate() + cdef xmlDict* xmlDictCreateSub(xmlDict* subdict) + cdef void xmlDictFree(xmlDict* sub) + cdef int xmlDictReference(xmlDict* dict) cdef const_xmlChar* xmlDictLookup(xmlDict* dict, const_xmlChar* name, int len) cdef const_xmlChar* xmlDictExists(xmlDict* dict, const_xmlChar* name, int len) cdef int xmlDictOwns(xmlDict* dict, const_xmlChar* name) cdef size_t xmlDictSize(xmlDict* dict) + cdef size_t xmlDictSetLimit(xmlDict* dict, size_t limit) + cdef size_t xmlDictGetUsage(xmlDict* dict) + cdef extern from "libxml/tree.h" nogil: ctypedef struct xmlDoc diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd index 3a721c1dc..ca905c6ec 100644 --- a/src/lxml/includes/xmlparser.pxd +++ b/src/lxml/includes/xmlparser.pxd @@ -136,10 +136,12 @@ cdef extern from "libxml/parser.h" nogil: cdef bint xmlHasFeature(xmlFeature feature) + # START: Legacy, moved to tree.pxd cdef xmlDict* xmlDictCreate() cdef xmlDict* xmlDictCreateSub(xmlDict* subdict) cdef void xmlDictFree(xmlDict* sub) cdef int xmlDictReference(xmlDict* dict) + # END: Legacy, moved to tree.pxd cdef int XML_COMPLETE_ATTRS # SAX option for adding DTD default attributes cdef int XML_SKIP_IDS # SAX option for not building an XML ID dict diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi index 42b752499..ad7d70926 100644 --- a/src/lxml/iterparse.pxi +++ b/src/lxml/iterparse.pxi @@ -1,13 +1,12 @@ # iterparse -- event-driven parsing -DEF __ITERPARSE_CHUNK_SIZE = 32768 - cdef class iterparse: """iterparse(self, source, events=("end",), tag=None, \ attribute_defaults=False, dtd_validation=False, \ load_dtd=False, no_network=True, remove_blank_text=False, \ remove_comments=False, remove_pis=False, encoding=None, \ - html=False, recover=None, huge_tree=False, schema=None) + html=False, recover=None, huge_tree=False, schema=None, \ + chunk_size=65536) Incremental parser. @@ -42,7 +41,7 @@ cdef class iterparse: - remove_blank_text: discard blank text nodes - remove_comments: discard comments - remove_pis: discard processing instructions - - strip_cdata: replace CDATA sections by normal text content (default: + - strip_cdata: replace CDATA sections by normal text content (default: True for XML, ignored otherwise) - compact: safe memory for short text content (default: True) - resolve_entities: replace entities by their text value (default: True) @@ -55,6 +54,8 @@ cdef class iterparse: Other keyword arguments: - encoding: override the document encoding - schema: an XMLSchema to validate against + - chunk_size: the number of bytes to read from the 'source' in one chunk + (default: 65536) """ cdef _FeedParser _parser cdef object _tag @@ -63,6 +64,7 @@ cdef class iterparse: cdef object _source cdef object _filename cdef object _error + cdef object _chunk_size cdef bint _close_source_after_read def __init__(self, source, events=("end",), *, tag=None, @@ -71,7 +73,7 @@ cdef class iterparse: compact=True, resolve_entities=True, remove_comments=False, remove_pis=False, strip_cdata=True, encoding=None, html=False, recover=None, huge_tree=False, collect_ids=True, - XMLSchema schema=None): + XMLSchema schema=None, int chunk_size=65536): if not hasattr(source, 'read'): source = _getFSPathOrObject(source) self._filename = source @@ -124,6 +126,7 @@ cdef class iterparse: target=None, # TODO compact=compact) + self._chunk_size = chunk_size self._events = parser.read_events() self._parser = parser @@ -215,7 +218,7 @@ cdef class iterparse: @cython.final cdef bint _read_more_events(self, _SaxParserContext context) except -123: - data = self._source.read(__ITERPARSE_CHUNK_SIZE) + data = self._source.read(self._chunk_size) if not isinstance(data, bytes): self._close_source() raise TypeError("reading file objects must return bytes objects") diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi index a3fbef399..f4d3ce68e 100644 --- a/src/lxml/parser.pxi +++ b/src/lxml/parser.pxi @@ -44,6 +44,56 @@ cdef class ParserError(LxmlError): """ +@cython.final +@cython.internal +cdef class _ParserDictionary: + # The string dictionary of a parser, shared by all of its parsed documents. + + cdef tree.xmlDict* _c_dict + + def __cinit__(self): + self._c_dict = tree.xmlDictCreate() + + def __dealloc__(self): + tree.xmlDictFree(self._c_dict) + self._c_dict = NULL + + cdef void disableSizeLimit(self): + tree.xmlDictSetLimit(self._c_dict, 0) + + cdef tree.xmlDict *getDict(self): + return self._c_dict + + cdef tree.xmlDict *getDictRef(self): + c_dict = self._c_dict + tree.xmlDictReference(c_dict) + return c_dict + + cdef size_t getDictSize(self): + return tree.xmlDictSize(self._c_dict) + + cdef void initDictRef(self, tree.xmlDict** c_dict_ref) noexcept: + c_dict = c_dict_ref[0] + if c_dict is self._c_dict: + return + + c_dict_ref[0] = self.getDictRef() + if c_dict is not NULL: + tree.xmlDictFree(c_dict) + + cdef void initParserDict(self, xmlparser.xmlParserCtxt* pctxt) noexcept: + "Assure we always use the same string dictionary." + self.initDictRef(&pctxt.dict) + pctxt.dictNames = 1 + + #cdef void initXPathParserDict(self, xpath.xmlXPathContext* pctxt) noexcept: + # "Assure we always use the same string dictionary." + # self.initDictRef(&pctxt.dict) + + cdef void initDocDict(self, xmlDoc *c_doc) noexcept: + self.initDictRef(&c_doc.dict) + + @cython.final @cython.internal cdef class _ParserDictionaryContext: @@ -56,17 +106,12 @@ cdef class _ParserDictionaryContext: # __GLOBAL_PARSER_CONTEXT as defined below the class. # - cdef tree.xmlDict* _c_dict cdef _BaseParser _default_parser cdef list _implied_parser_contexts def __cinit__(self): self._implied_parser_contexts = [] - def __dealloc__(self): - if self._c_dict is not NULL: - xmlparser.xmlDictFree(self._c_dict) - cdef int initMainParserContext(self) except -1: """Put the global context into the thread dictionary of the main thread. To be called once and only in the main thread.""" @@ -105,48 +150,6 @@ cdef class _ParserDictionaryContext: context._default_parser = self._default_parser._copy() return context._default_parser - cdef tree.xmlDict* _getThreadDict(self, tree.xmlDict* default): - "Return the thread-local dict or create a new one if necessary." - cdef _ParserDictionaryContext context - context = self._findThreadParserContext() - if context._c_dict is NULL: - # thread dict not yet set up => use default or create a new one - if default is not NULL: - context._c_dict = default - xmlparser.xmlDictReference(default) - return default - if self._c_dict is NULL: - self._c_dict = xmlparser.xmlDictCreate() - if context is not self: - context._c_dict = xmlparser.xmlDictCreateSub(self._c_dict) - return context._c_dict - - cdef int initThreadDictRef(self, tree.xmlDict** c_dict_ref) except -1: - c_dict = c_dict_ref[0] - c_thread_dict = self._getThreadDict(c_dict) - if c_dict is c_thread_dict: - return 0 - if c_dict is not NULL: - xmlparser.xmlDictFree(c_dict) - c_dict_ref[0] = c_thread_dict - xmlparser.xmlDictReference(c_thread_dict) - - cdef int initParserDict(self, xmlparser.xmlParserCtxt* pctxt) except -1: - "Assure we always use the same string dictionary." - self.initThreadDictRef(&pctxt.dict) - pctxt.dictNames = 1 - - cdef int initXPathParserDict(self, xpath.xmlXPathContext* pctxt) except -1: - "Assure we always use the same string dictionary." - self.initThreadDictRef(&pctxt.dict) - - cdef int initDocDict(self, xmlDoc* result) except -1: - "Store dict of last object parsed if no shared dict yet" - # XXX We also free the result dict here if there already was one. - # This case should only occur for new documents with empty dicts, - # otherwise we'd free data that's in use => segfault - self.initThreadDictRef(&result.dict) - cdef _ParserContext findImpliedContext(self): """Return any current implied xml parser context for the current thread. This is used when the resolver functions are called @@ -542,6 +545,7 @@ cdef void _reset_document_loader(xmlparser.xmlExternalEntityLoader old) noexcept @cython.internal cdef class _ParserContext(_ResolverContext): cdef _ErrorLog _error_log + cdef _ParserDictionary _dict cdef _ParserSchemaValidationContext _validator cdef xmlparser.xmlParserCtxt* _c_ctxt cdef xmlparser.xmlExternalEntityLoader _orig_loader @@ -554,6 +558,7 @@ cdef class _ParserContext(_ResolverContext): if config.ENABLE_THREADING: self._lock = python.PyThread_allocate_lock() self._error_log = _ErrorLog() + self._dict = _ParserDictionary() def __dealloc__(self): if config.ENABLE_THREADING and self._lock is not NULL: @@ -583,6 +588,7 @@ cdef class _ParserContext(_ResolverContext): """ self._c_ctxt = c_ctxt c_ctxt._private = self + self._dict.initParserDict(c_ctxt) cdef void _resetParserContext(self) noexcept: if self._c_ctxt is not NULL: @@ -708,11 +714,11 @@ cdef xmlDoc* _handleParseResult(_ParserContext context, # to parse the document. cdef bint well_formed if result is not NULL: - __GLOBAL_PARSER_CONTEXT.initDocDict(result) + context._dict.initDocDict(result) if c_ctxt.myDoc is not NULL: if c_ctxt.myDoc is not result: - __GLOBAL_PARSER_CONTEXT.initDocDict(c_ctxt.myDoc) + context._dict.initDocDict(c_ctxt.myDoc) tree.xmlFreeDoc(c_ctxt.myDoc) c_ctxt.myDoc = NULL @@ -832,6 +838,26 @@ cdef inline int _fixHtmlDictNodeNames(tree.xmlDict* c_dict, return 0 +cdef extern from *: + """ + typedef struct { + unsigned int for_html: 1; + unsigned int remove_comments: 1; + unsigned int remove_pis: 1; + unsigned int strip_cdata: 1; + unsigned int collect_ids: 1; + unsigned int resolve_external_entities: 1; + } __lxml_ParserFlags; + """ + ctypedef struct ParserFlags "__lxml_ParserFlags": + bint for_html + bint remove_comments + bint remove_pis + bint strip_cdata + bint collect_ids + bint resolve_external_entities + + @cython.internal cdef class _BaseParser: cdef ElementClassLookup _class_lookup @@ -839,12 +865,7 @@ cdef class _BaseParser: cdef _ParserContext _parser_context cdef _ParserContext _push_parser_context cdef int _parse_options - cdef bint _for_html - cdef bint _remove_comments - cdef bint _remove_pis - cdef bint _strip_cdata - cdef bint _collect_ids - cdef bint _resolve_external_entities + cdef ParserFlags _flags cdef XMLSchema _schema cdef bytes _filename cdef readonly object target @@ -860,15 +881,17 @@ cdef class _BaseParser: raise TypeError, "This class cannot be instantiated" self._parse_options = parse_options + self._flags = ParserFlags( + for_html=for_html, + remove_comments=remove_comments, + remove_pis=remove_pis, + strip_cdata=strip_cdata, + collect_ids=collect_ids, + resolve_external_entities=resolve_external_entities, + ) + self.target = target - self._for_html = for_html - self._remove_comments = remove_comments - self._remove_pis = remove_pis - self._strip_cdata = strip_cdata - self._collect_ids = collect_ids - self._resolve_external_entities = resolve_external_entities self._schema = schema - self._resolvers = _ResolverRegistry() if encoding is None: @@ -896,11 +919,16 @@ cdef class _BaseParser: cdef xmlparser.xmlParserCtxt* pctxt if self._parser_context is None: self._parser_context = self._createContext(self.target, None) - self._parser_context._collect_ids = self._collect_ids + + self._parser_context._collect_ids = self._flags.collect_ids + if self._parse_options & xmlparser.XML_PARSE_HUGE: + self._parser_context._dict.disableSizeLimit() + if self._schema is not None: self._parser_context._validator = \ self._schema._newSaxValidator( self._parse_options & xmlparser.XML_PARSE_DTDATTR) + pctxt = self._newParserCtxt() _initParserContext(self._parser_context, self._resolvers, pctxt) self._configureSaxContext(pctxt) @@ -911,11 +939,16 @@ cdef class _BaseParser: if self._push_parser_context is None: self._push_parser_context = self._createContext( self.target, self._events_to_collect) - self._push_parser_context._collect_ids = self._collect_ids + + self._push_parser_context._collect_ids = self._flags.collect_ids + if self._parse_options & xmlparser.XML_PARSE_HUGE: + self._push_parser_context._dict.disableSizeLimit() + if self._schema is not None: self._push_parser_context._validator = \ self._schema._newSaxValidator( self._parse_options & xmlparser.XML_PARSE_DTDATTR) + pctxt = self._newPushParserCtxt() _initParserContext( self._push_parser_context, self._resolvers, pctxt) @@ -942,14 +975,14 @@ cdef class _BaseParser: @cython.final cdef int _configureSaxContext(self, xmlparser.xmlParserCtxt* pctxt) except -1: - if self._remove_comments: + if self._flags.remove_comments: pctxt.sax.comment = NULL - if self._remove_pis: + if self._flags.remove_pis: pctxt.sax.processingInstruction = NULL - if self._strip_cdata: + if self._flags.strip_cdata: # hard switch-off for CDATA nodes => makes them plain text pctxt.sax.cdataBlock = NULL - if not self._resolve_external_entities: + if not self._flags.resolve_external_entities: pctxt.sax.getEntity = _getInternalEntityOnly cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1: @@ -976,7 +1009,7 @@ cdef class _BaseParser: Create and initialise a libxml2-level parser context. """ cdef xmlparser.xmlParserCtxt* c_ctxt - if self._for_html: + if self._flags.for_html: c_ctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5) if c_ctxt is not NULL: self._registerHtmlErrorHandler(c_ctxt) @@ -990,7 +1023,7 @@ cdef class _BaseParser: cdef xmlparser.xmlParserCtxt* _newPushParserCtxt(self) except NULL: cdef xmlparser.xmlParserCtxt* c_ctxt cdef char* c_filename = _cstr(self._filename) if self._filename is not None else NULL - if self._for_html: + if self._flags.for_html: c_ctxt = htmlparser.htmlCreatePushParserCtxt( NULL, NULL, NULL, 0, c_filename, tree.XML_CHAR_ENCODING_NONE) if c_ctxt is not NULL: @@ -1006,6 +1039,14 @@ cdef class _BaseParser: c_ctxt.sax.startDocument = _initSaxDocument return c_ctxt + @cython.final + cdef void initDocDict(self, tree.xmlDoc *c_doc) noexcept: + self._getParserContext()._dict.initDocDict(c_doc) + + @cython.final + cdef tree.xmlDict* getDict(self) noexcept: + return self._getParserContext()._dict.getDict() + @property def error_log(self): """The error log of the last parser run. @@ -1024,6 +1065,15 @@ cdef class _BaseParser: """The version of the underlying XML parser.""" return "libxml2 %d.%d.%d" % LIBXML_VERSION + @property + def dict_size(self): + cdef size_t size = 0 + if self._parser_context is not None: + size += self._parser_context._dict.getDictSize() + if self._push_parser_context is not None: + size += self._push_parser_context._dict.getDictSize() + return size + def set_element_class_lookup(self, ElementClassLookup lookup = None): """set_element_class_lookup(self, lookup = None) @@ -1038,10 +1088,7 @@ cdef class _BaseParser: cdef _BaseParser parser parser = self.__class__() parser._parse_options = self._parse_options - parser._for_html = self._for_html - parser._remove_comments = self._remove_comments - parser._remove_pis = self._remove_pis - parser._strip_cdata = self._strip_cdata + parser._flags = self._flags parser._filename = self._filename parser._resolvers = self._resolvers parser.target = self.target @@ -1113,10 +1160,9 @@ cdef class _BaseParser: context.prepare() try: pctxt = context._c_ctxt - __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) orig_options = pctxt.options with nogil: - if self._for_html: + if self._flags.for_html: result = htmlparser.htmlCtxtReadMemory( pctxt, c_text, buffer_len, c_filename, c_encoding, self._parse_options) @@ -1145,9 +1191,6 @@ cdef class _BaseParser: context = self._getParserContext() context.prepare() try: - pctxt = context._c_ctxt - __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) - if self._default_encoding is None: c_encoding = NULL # libxml2 (at least 2.9.3) does not recognise UTF-32 BOMs @@ -1172,9 +1215,10 @@ cdef class _BaseParser: else: c_encoding = _cstr(self._default_encoding) + pctxt = context._c_ctxt orig_options = pctxt.options with nogil: - if self._for_html: + if self._flags.for_html: result = htmlparser.htmlCtxtReadMemory( pctxt, c_text, c_len, c_filename, c_encoding, self._parse_options) @@ -1202,17 +1246,15 @@ cdef class _BaseParser: context = self._getParserContext() context.prepare() try: - pctxt = context._c_ctxt - __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) - if self._default_encoding is None: c_encoding = NULL else: c_encoding = _cstr(self._default_encoding) + pctxt = context._c_ctxt orig_options = pctxt.options with nogil: - if self._for_html: + if self._flags.for_html: result = htmlparser.htmlCtxtReadFile( pctxt, c_filename, c_encoding, self._parse_options) if result is not NULL: @@ -1233,7 +1275,6 @@ cdef class _BaseParser: cdef _ParserContext context cdef _FileReaderContext file_context cdef xmlDoc* result - cdef xmlparser.xmlParserCtxt* pctxt cdef char* c_filename if not filename: filename = None @@ -1241,12 +1282,10 @@ cdef class _BaseParser: context = self._getParserContext() context.prepare() try: - pctxt = context._c_ctxt - __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) file_context = _FileReaderContext( filelike, context, filename, encoding or self._default_encoding) - result = file_context._readDoc(pctxt, self._parse_options) + result = file_context._readDoc(context._c_ctxt, self._parse_options) return context._handleParseResultDoc( self, result, filename) @@ -1315,8 +1354,8 @@ cdef void _initSaxDocument(void* ctxt) noexcept with gil: if c_doc and c_ctxt.dict and not c_doc.dict: # I have no idea why libxml2 disables this - we need it c_ctxt.dictNames = 1 + tree.xmlDictReference(c_ctxt.dict) c_doc.dict = c_ctxt.dict - xmlparser.xmlDictReference(c_ctxt.dict) # set up XML ID hash table if c_ctxt._private: @@ -1325,10 +1364,11 @@ cdef void _initSaxDocument(void* ctxt) noexcept with gil: # keep the global parser dict from filling up with XML IDs if c_doc and not c_doc.ids: # memory errors are not fatal here - c_dict = xmlparser.xmlDictCreate() + c_dict = tree.xmlDictCreate() if c_dict: + tree.xmlDictSetLimit(c_dict, 0) c_doc.ids = tree.xmlHashCreateDict(0, c_dict) - xmlparser.xmlDictFree(c_dict) + tree.xmlDictFree(c_dict) else: c_doc.ids = tree.xmlHashCreate(0) else: @@ -1416,7 +1456,7 @@ cdef class _FeedParser(_BaseParser): if char_data is not NULL: buffer_len = 4 if py_buffer_len > 4 else py_buffer_len orig_loader = _register_document_loader() - if self._for_html: + if self._flags.for_html: error = _htmlCtxtResetPush( pctxt, char_data, buffer_len, c_filename, c_encoding, self._parse_options) @@ -1429,7 +1469,6 @@ cdef class _FeedParser(_BaseParser): char_data += buffer_len if error: raise MemoryError() - __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) #print pctxt.charset, 'NONE' if c_encoding is NULL else c_encoding @@ -1499,7 +1538,7 @@ cdef class _FeedParser(_BaseParser): pctxt = context._c_ctxt self._feed_parser_running = 0 - if self._for_html: + if self._flags.for_html: htmlparser.htmlParseChunk(pctxt, NULL, 0, 1) else: xmlparser.xmlParseChunk(pctxt, NULL, 0, 1) @@ -1534,9 +1573,9 @@ cdef (int, int) _parse_data_chunk(xmlparser.xmlParserCtxt* c_ctxt, fixup_error = _fixHtmlDictSubtreeNames( c_ctxt.dict, c_ctxt.myDoc, c_node) if c_ctxt.myDoc.dict and c_ctxt.myDoc.dict is not c_ctxt.dict: - xmlparser.xmlDictFree(c_ctxt.myDoc.dict) + tree.xmlDictReference(c_ctxt.dict) + tree.xmlDictFree(c_ctxt.myDoc.dict) c_ctxt.myDoc.dict = c_ctxt.dict - xmlparser.xmlDictReference(c_ctxt.dict) else: orig_loader = _register_document_loader() error = xmlparser.xmlParseChunk(c_ctxt, char_data, buffer_len, 0) @@ -1965,7 +2004,6 @@ cdef xmlDoc* _newXMLDoc() except NULL: raise MemoryError() if result.encoding is NULL: result.encoding = tree.xmlStrdup("UTF-8") - __GLOBAL_PARSER_CONTEXT.initDocDict(result) return result cdef xmlDoc* _newHTMLDoc() except NULL: @@ -1973,7 +2011,6 @@ cdef xmlDoc* _newHTMLDoc() except NULL: result = tree.htmlNewDoc(NULL, NULL) if result is NULL: raise MemoryError() - __GLOBAL_PARSER_CONTEXT.initDocDict(result) return result cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive) except NULL: @@ -1985,7 +2022,8 @@ cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive) except NULL: result = tree.xmlCopyDoc(c_doc, 0) if result is NULL: raise MemoryError() - __GLOBAL_PARSER_CONTEXT.initDocDict(result) + tree.xmlDictReference(c_doc.dict) + result.dict = c_doc.dict return result cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root) except NULL: @@ -1993,7 +2031,9 @@ cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root) except NULL: cdef xmlDoc* result cdef xmlNode* c_node result = tree.xmlCopyDoc(c_doc, 0) # non recursive - __GLOBAL_PARSER_CONTEXT.initDocDict(result) + assert result.dict is NULL + tree.xmlDictReference(c_doc.dict) + result.dict = c_doc.dict with nogil: c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive if c_node is NULL: diff --git a/src/lxml/tests/test_elementtree.py b/src/lxml/tests/test_elementtree.py index 784dbfc18..bf92ffa16 100644 --- a/src/lxml/tests/test_elementtree.py +++ b/src/lxml/tests/test_elementtree.py @@ -14,6 +14,7 @@ import re import sys import textwrap +import types import unittest from contextlib import contextmanager from functools import wraps, partial @@ -4978,6 +4979,35 @@ def setUpClass(cls): r'This method will be removed.*\.iter\(\).*instead', PendingDeprecationWarning) + def test_elementtree_serialises_lxml_tree(self): + # Parse tree with lxml.etree. + root = etree.XML(""" + + A + + + + """) + + # Sanity checks. + self.assertNotIsInstance(etree.tostring, types.FunctionType) + self.assertIsInstance(self.etree.tostring, types.FunctionType) + + # Serialised with xml.etree.ElementTree.tostring() + xml_tostring = self.etree.tostring(root, encoding='utf8') + self.assertIn(b'', xml_tostring) + self.assertIn(b'', xml_tostring) + + # ET.write() + out = io.BytesIO() + self.etree.ElementTree(root).write(out, encoding='utf8') + xml_write = out.getvalue() + self.assertIn(b'', xml_write) + self.assertIn(b'', xml_write) + + # Both should be identical because they used the same serialiser. + self.assertEqual(xml_tostring, xml_write) + filter_by_version( ElementTreeTestCase, ElementTreeTestCase.required_versions_ET, ET_VERSION) diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index 03f387454..81c4a1e72 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -722,6 +722,14 @@ def test_parse_parser_type_error(self): parse = self.etree.parse self.assertRaises(TypeError, parse, 'notthere.xml', object()) + def test_parse_huge_tree(self): + fromstring = self.etree.fromstring + XMLParser = self.etree.XMLParser + + xml = b'' + parser = XMLParser(huge_tree=True) + self.assertEqual(2, len(fromstring(xml, parser=parser))) + def test_parse_premature_end(self): fromstring = self.etree.fromstring XMLParser = self.etree.XMLParser @@ -750,6 +758,17 @@ def test_iterparse_getiterator(self): [1,2,1,4], counts) + def test_iterparse_huge_tree(self): + iterparse = self.etree.iterparse + f = BytesIO(b'') + + counts = [] + for _, elem in iterparse(f, huge_tree=True): + counts.append(len(elem)) + self.assertEqual( + [0,1,0,2], + counts) + def test_iterparse_tree_comments(self): # ET removes comments iterparse = self.etree.iterparse diff --git a/src/lxml/tests/test_threading.py b/src/lxml/tests/test_threading.py index 3b0e3fb2a..50dbd9f7f 100644 --- a/src/lxml/tests/test_threading.py +++ b/src/lxml/tests/test_threading.py @@ -8,7 +8,7 @@ import unittest import threading -from .common_imports import etree, HelperTestCase, BytesIO, _bytes +from .common_imports import etree, HelperTestCase, BytesIO try: from Queue import Queue diff --git a/src/lxml/tests/test_xmlschema.py b/src/lxml/tests/test_xmlschema.py index 0e7e03ccc..9341d2a0a 100644 --- a/src/lxml/tests/test_xmlschema.py +++ b/src/lxml/tests/test_xmlschema.py @@ -436,10 +436,13 @@ def test_xmlschema_resolvers_root(self): # test that the default resolver will get called if there's no # specific parser resolver. root_resolver = self.simple_resolver(self.resolver_schema_ext) - etree.get_default_parser().resolvers.add(root_resolver) - schema_doc = etree.parse(self.resolver_schema_int) - schema = etree.XMLSchema(schema_doc) - etree.get_default_parser().resolvers.remove(root_resolver) + default_resolvers = etree.get_default_parser().resolvers + default_resolvers.add(root_resolver) + try: + schema_doc = etree.parse(self.resolver_schema_int) + schema = etree.XMLSchema(schema_doc) + finally: + default_resolvers.remove(root_resolver) def test_xmlschema_resolvers_noroot(self): # test that the default resolver will not get called when a @@ -451,14 +454,16 @@ def resolve(self, url, id, context): return None root_resolver = res_root() - etree.get_default_parser().resolvers.add(root_resolver) - - parser = etree.XMLParser() - parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext)) + default_resolvers = etree.get_default_parser().resolvers + default_resolvers.add(root_resolver) + try: + parser = etree.XMLParser() + parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext)) - schema_doc = etree.parse(self.resolver_schema_int, parser = parser) - schema = etree.XMLSchema(schema_doc) - etree.get_default_parser().resolvers.remove(root_resolver) + schema_doc = etree.parse(self.resolver_schema_int, parser = parser) + schema = etree.XMLSchema(schema_doc) + finally: + default_resolvers.remove(root_resolver) def test_xmlschema_nested_resolvers(self): # test that resolvers work in a nested fashion. diff --git a/src/lxml/xslt.pxi b/src/lxml/xslt.pxi index 659d7054c..c050155b6 100644 --- a/src/lxml/xslt.pxi +++ b/src/lxml/xslt.pxi @@ -62,6 +62,7 @@ cdef _initXSLTResolverContext(_XSLTResolverContext context, context._parser = parser context._c_style_doc = NULL + cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context, int parse_options, int* error) with gil: # call the Python document loaders @@ -101,6 +102,7 @@ cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context, doc_ref._file, doc_ref._filename, context._parser) elif doc_ref._type == PARSER_DATA_EMPTY: c_return_doc = _newXMLDoc() + context._parser.initDocDict(c_return_doc) if c_return_doc is not NULL and c_return_doc.URL is NULL: c_return_doc.URL = tree.xmlStrdup(c_uri) except: @@ -521,17 +523,17 @@ cdef class XSLT: # non-input tag/attr names will come from the stylesheet # anyway. if transform_ctxt.dict is not NULL: - xmlparser.xmlDictFree(transform_ctxt.dict) + tree.xmlDictFree(transform_ctxt.dict) if kw: # parameter values are stored in the dict # => avoid unnecessarily cluttering the global dict - transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict) + transform_ctxt.dict = tree.xmlDictCreateSub(self._c_style.doc.dict) if transform_ctxt.dict is NULL: xslt.xsltFreeTransformContext(transform_ctxt) raise MemoryError() else: transform_ctxt.dict = self._c_style.doc.dict - xmlparser.xmlDictReference(transform_ctxt.dict) + tree.xmlDictReference(transform_ctxt.dict) xslt.xsltSetCtxtParseOptions( transform_ctxt, input_doc._parser._parse_options) @@ -598,11 +600,12 @@ cdef class XSLT: if resolver_context is not None: resolver_context.clear() + c_dict = c_result.dict + tree.xmlDictReference(c_dict) + result_doc = _documentFactory(c_result, input_doc._parser) + result_doc.initDict() - c_dict = c_result.dict - xmlparser.xmlDictReference(c_dict) - __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict) if c_dict is not c_result.dict or \ self._c_style.doc.dict is not c_result.dict or \ input_doc._c_doc.dict is not c_result.dict: @@ -616,7 +619,8 @@ cdef class XSLT: if input_doc._c_doc.dict is not c_result.dict: fixThreadDictNames(c_result, input_doc._c_doc.dict, c_result.dict) - xmlparser.xmlDictFree(c_dict) + + tree.xmlDictFree(c_dict) return _xsltResultTreeFactory(result_doc, self, profile_doc) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy