From 9b9517ec7dfac674052d41ec96e4c85e197f3228 Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Thu, 22 Dec 2016 12:38:03 -0800 Subject: [PATCH] Fully fix iterparse() defusing on Python 3.6 Python 3.3 did a very thorough job of hiding the pure-Python iterparse() from defusedxml, so we had to not use iterparse() directly, but find and use the pure-Python _IterParseIterator instead. This trick breaks with Python 3.6, though, because _IterParseIterator is no longer accessible externally at all. However, it turns out Python 3.3's approach to iterparse() was a one-off: the implementation of the C accelerator stuff was changed again in 3.4, and from 3.4 onwards we should be getting the pure-Python iterparse() again. So we can make the private iterator access dodge specific to Python 3.3, and just use the simple code which uses iterparse() directly - which we were only using for Python 2.7 until now - for Python 3.2 and 3.4+. --- defusedxml/ElementTree.py | 16 ++++++++++------ defusedxml/common.py | 7 +++++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/defusedxml/ElementTree.py b/defusedxml/ElementTree.py index 8c46064..28ffce0 100644 --- a/defusedxml/ElementTree.py +++ b/defusedxml/ElementTree.py @@ -8,7 +8,7 @@ from __future__ import print_function, absolute_import import sys -from .common import PY3, PY26, PY31 +from .common import PY3, PY26, PY31, PY33 if PY3: import importlib else: @@ -29,7 +29,7 @@ from .common import (DTDForbidden, EntitiesForbidden, __origin__ = "xml.etree.ElementTree" def _get_py3_cls(): - """Python 3.3 hides the pure Python code but defusedxml requires it. + """Python 3.3+ hide the pure Python code but defusedxml requires it. The code is based on test.support.import_fresh_module(). """ @@ -49,12 +49,16 @@ def _get_py3_cls(): _XMLParser = pure_pymod.XMLParser _iterparse = pure_pymod.iterparse - if PY31 or sys.version_info >= (3, 6): - _IterParseIterator = None + ParseError = pure_pymod.ParseError + _IterParseIterator = None + if PY31: from xml.parsers.expat import ExpatError as ParseError - else: + if PY33: + # Python 3.3 specifically did some shenanigans to hide the + # pure-Python iterparse() entirely, so we need to use the + # this private iterator instead. All other Pythons don't have + # this problem _IterParseIterator = pure_pymod._IterParseIterator - ParseError = pure_pymod.ParseError return _XMLParser, _iterparse, _IterParseIterator, ParseError diff --git a/defusedxml/common.py b/defusedxml/common.py index 5e5f8a2..53a5326 100644 --- a/defusedxml/common.py +++ b/defusedxml/common.py @@ -11,6 +11,7 @@ from types import MethodType PY3 = sys.version_info[0] == 3 PY26 = sys.version_info[:2] == (2, 6) PY31 = sys.version_info[:2] == (3, 1) +PY33 = sys.version_info[:2] == (3, 3) class DefusedXmlException(ValueError): @@ -126,7 +127,9 @@ def _generate_etree_functions(DefusedXMLParser, _TreeBuilder, bind(xmlparser, "defused_external_entity_ref_handler", "ExternalEntityRefHandler") return it - elif PY3: + elif PY33: + # pure-Python iterparse() is completely hidden on Python 3.3, + # we have to use the backing _IterParseIterator def iterparse(source, events=None, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True): close_source = False @@ -140,7 +143,7 @@ def _generate_etree_functions(DefusedXMLParser, _TreeBuilder, forbid_external=forbid_external) return _IterParseIterator(source, events, parser, close_source) else: - # Python 2.7 + # Python 2.7, Python 3.2, Python 3.4+ def iterparse(source, events=None, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True): if parser is None: -- 2.11.0