aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Lib/xml/etree/ElementTree.py
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:39 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:39 +0300
commite9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/tools/python3/src/Lib/xml/etree/ElementTree.py
parent2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
downloadydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Lib/xml/etree/ElementTree.py')
-rw-r--r--contrib/tools/python3/src/Lib/xml/etree/ElementTree.py1050
1 files changed, 525 insertions, 525 deletions
diff --git a/contrib/tools/python3/src/Lib/xml/etree/ElementTree.py b/contrib/tools/python3/src/Lib/xml/etree/ElementTree.py
index 6f964670a3..fde303c875 100644
--- a/contrib/tools/python3/src/Lib/xml/etree/ElementTree.py
+++ b/contrib/tools/python3/src/Lib/xml/etree/ElementTree.py
@@ -35,7 +35,7 @@
#---------------------------------------------------------------------
# Licensed to PSF under a Contributor Agreement.
-# See https://www.python.org/psf/license for licensing details.
+# See https://www.python.org/psf/license for licensing details.
#
# ElementTree
# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
@@ -76,7 +76,7 @@ __all__ = [
"dump",
"Element", "ElementTree",
"fromstring", "fromstringlist",
- "indent", "iselement", "iterparse",
+ "indent", "iselement", "iterparse",
"parse", "ParseError",
"PI", "ProcessingInstruction",
"QName",
@@ -87,7 +87,7 @@ __all__ = [
"XML", "XMLID",
"XMLParser", "XMLPullParser",
"register_namespace",
- "canonicalize", "C14NWriterTarget",
+ "canonicalize", "C14NWriterTarget",
]
VERSION = "1.3.0"
@@ -171,7 +171,7 @@ class Element:
raise TypeError("attrib must be dict, not %s" % (
attrib.__class__.__name__,))
self.tag = tag
- self.attrib = {**attrib, **extra}
+ self.attrib = {**attrib, **extra}
self._children = []
def __repr__(self):
@@ -195,13 +195,13 @@ class Element:
original tree.
"""
- warnings.warn(
- "elem.copy() is deprecated. Use copy.copy(elem) instead.",
- DeprecationWarning
- )
- return self.__copy__()
-
- def __copy__(self):
+ warnings.warn(
+ "elem.copy() is deprecated. Use copy.copy(elem) instead.",
+ DeprecationWarning
+ )
+ return self.__copy__()
+
+ def __copy__(self):
elem = self.makeelement(self.tag, self.attrib)
elem.text = self.text
elem.tail = self.tail
@@ -223,11 +223,11 @@ class Element:
return self._children[index]
def __setitem__(self, index, element):
- if isinstance(index, slice):
- for elt in element:
- self._assert_is_element(elt)
- else:
- self._assert_is_element(element)
+ if isinstance(index, slice):
+ for elt in element:
+ self._assert_is_element(elt)
+ else:
+ self._assert_is_element(element)
self._children[index] = element
def __delitem__(self, index):
@@ -252,7 +252,7 @@ class Element:
"""
for element in elements:
self._assert_is_element(element)
- self._children.append(element)
+ self._children.append(element)
def insert(self, index, subelement):
"""Insert *subelement* at position *index*."""
@@ -435,7 +435,7 @@ def SubElement(parent, tag, attrib={}, **extra):
additional attributes given as keyword arguments.
"""
- attrib = {**attrib, **extra}
+ attrib = {**attrib, **extra}
element = parent.makeelement(tag, attrib)
parent.append(element)
return element
@@ -897,7 +897,7 @@ def _serialize_xml(write, elem, qnames, namespaces,
k,
_escape_attrib(v)
))
- for k, v in items:
+ for k, v in items:
if isinstance(k, QName):
k = k.text
if isinstance(v, QName):
@@ -953,7 +953,7 @@ def _serialize_html(write, elem, qnames, namespaces, **kwargs):
k,
_escape_attrib(v)
))
- for k, v in items:
+ for k, v in items:
if isinstance(k, QName):
k = k.text
if isinstance(v, QName):
@@ -1057,15 +1057,15 @@ def _escape_attrib(text):
text = text.replace(">", "&gt;")
if "\"" in text:
text = text.replace("\"", "&quot;")
- # Although section 2.11 of the XML specification states that CR or
- # CR LN should be replaced with just LN, it applies only to EOLNs
- # which take part of organizing file into lines. Within attributes,
- # we are replacing these with entity numbers, so they do not count.
+ # Although section 2.11 of the XML specification states that CR or
+ # CR LN should be replaced with just LN, it applies only to EOLNs
+ # which take part of organizing file into lines. Within attributes,
+ # we are replacing these with entity numbers, so they do not count.
# http://www.w3.org/TR/REC-xml/#sec-line-ends
- # The current solution, contained in following six lines, was
- # discussed in issue 17582 and 39011.
+ # The current solution, contained in following six lines, was
+ # discussed in issue 17582 and 39011.
if "\r" in text:
- text = text.replace("\r", "&#13;")
+ text = text.replace("\r", "&#13;")
if "\n" in text:
text = text.replace("\n", "&#10;")
if "\t" in text:
@@ -1090,7 +1090,7 @@ def _escape_attrib_html(text):
# --------------------------------------------------------------------
def tostring(element, encoding=None, method=None, *,
- xml_declaration=None, default_namespace=None,
+ xml_declaration=None, default_namespace=None,
short_empty_elements=True):
"""Generate string representation of XML element.
@@ -1099,17 +1099,17 @@ def tostring(element, encoding=None, method=None, *,
*element* is an Element instance, *encoding* is an optional output
encoding defaulting to US-ASCII, *method* is an optional output which can
- be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
- sets the default XML namespace (for "xmlns").
+ be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
+ sets the default XML namespace (for "xmlns").
Returns an (optionally) encoded string containing the XML data.
"""
stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
- ElementTree(element).write(stream, encoding,
- xml_declaration=xml_declaration,
- default_namespace=default_namespace,
- method=method,
+ ElementTree(element).write(stream, encoding,
+ xml_declaration=xml_declaration,
+ default_namespace=default_namespace,
+ method=method,
short_empty_elements=short_empty_elements)
return stream.getvalue()
@@ -1131,14 +1131,14 @@ class _ListDataStream(io.BufferedIOBase):
return len(self.lst)
def tostringlist(element, encoding=None, method=None, *,
- xml_declaration=None, default_namespace=None,
+ xml_declaration=None, default_namespace=None,
short_empty_elements=True):
lst = []
stream = _ListDataStream(lst)
- ElementTree(element).write(stream, encoding,
- xml_declaration=xml_declaration,
- default_namespace=default_namespace,
- method=method,
+ ElementTree(element).write(stream, encoding,
+ xml_declaration=xml_declaration,
+ default_namespace=default_namespace,
+ method=method,
short_empty_elements=short_empty_elements)
return lst
@@ -1161,57 +1161,57 @@ def dump(elem):
if not tail or tail[-1] != "\n":
sys.stdout.write("\n")
-
-def indent(tree, space=" ", level=0):
- """Indent an XML document by inserting newlines and indentation space
- after elements.
-
- *tree* is the ElementTree or Element to modify. The (root) element
- itself will not be changed, but the tail text of all elements in its
- subtree will be adapted.
-
- *space* is the whitespace to insert for each indentation level, two
- space characters by default.
-
- *level* is the initial indentation level. Setting this to a higher
- value than 0 can be used for indenting subtrees that are more deeply
- nested inside of a document.
- """
- if isinstance(tree, ElementTree):
- tree = tree.getroot()
- if level < 0:
- raise ValueError(f"Initial indentation level must be >= 0, got {level}")
- if not len(tree):
- return
-
- # Reduce the memory consumption by reusing indentation strings.
- indentations = ["\n" + level * space]
-
- def _indent_children(elem, level):
- # Start a new indentation level for the first child.
- child_level = level + 1
- try:
- child_indentation = indentations[child_level]
- except IndexError:
- child_indentation = indentations[level] + space
- indentations.append(child_indentation)
-
- if not elem.text or not elem.text.strip():
- elem.text = child_indentation
-
- for child in elem:
- if len(child):
- _indent_children(child, child_level)
- if not child.tail or not child.tail.strip():
- child.tail = child_indentation
-
- # Dedent after the last child by overwriting the previous indentation.
- if not child.tail.strip():
- child.tail = indentations[level]
-
- _indent_children(tree, 0)
-
-
+
+def indent(tree, space=" ", level=0):
+ """Indent an XML document by inserting newlines and indentation space
+ after elements.
+
+ *tree* is the ElementTree or Element to modify. The (root) element
+ itself will not be changed, but the tail text of all elements in its
+ subtree will be adapted.
+
+ *space* is the whitespace to insert for each indentation level, two
+ space characters by default.
+
+ *level* is the initial indentation level. Setting this to a higher
+ value than 0 can be used for indenting subtrees that are more deeply
+ nested inside of a document.
+ """
+ if isinstance(tree, ElementTree):
+ tree = tree.getroot()
+ if level < 0:
+ raise ValueError(f"Initial indentation level must be >= 0, got {level}")
+ if not len(tree):
+ return
+
+ # Reduce the memory consumption by reusing indentation strings.
+ indentations = ["\n" + level * space]
+
+ def _indent_children(elem, level):
+ # Start a new indentation level for the first child.
+ child_level = level + 1
+ try:
+ child_indentation = indentations[child_level]
+ except IndexError:
+ child_indentation = indentations[level] + space
+ indentations.append(child_indentation)
+
+ if not elem.text or not elem.text.strip():
+ elem.text = child_indentation
+
+ for child in elem:
+ if len(child):
+ _indent_children(child, child_level)
+ if not child.tail or not child.tail.strip():
+ child.tail = child_indentation
+
+ # Dedent after the last child by overwriting the previous indentation.
+ if not child.tail.strip():
+ child.tail = indentations[level]
+
+ _indent_children(tree, 0)
+
+
# --------------------------------------------------------------------
# parsing
@@ -1283,7 +1283,7 @@ class XMLPullParser:
def __init__(self, events=None, *, _parser=None):
# The _parser argument is for internal use only and must not be relied
# upon in user code. It will be removed in a future release.
- # See https://bugs.python.org/issue17741 for more details.
+ # See https://bugs.python.org/issue17741 for more details.
self._events_queue = collections.deque()
self._parser = _parser or XMLParser(target=TreeBuilder())
@@ -1402,30 +1402,30 @@ class TreeBuilder:
*element_factory* is an optional element factory which is called
to create new Element instances, as necessary.
- *comment_factory* is a factory to create comments to be used instead of
- the standard factory. If *insert_comments* is false (the default),
- comments will not be inserted into the tree.
-
- *pi_factory* is a factory to create processing instructions to be used
- instead of the standard factory. If *insert_pis* is false (the default),
- processing instructions will not be inserted into the tree.
+ *comment_factory* is a factory to create comments to be used instead of
+ the standard factory. If *insert_comments* is false (the default),
+ comments will not be inserted into the tree.
+
+ *pi_factory* is a factory to create processing instructions to be used
+ instead of the standard factory. If *insert_pis* is false (the default),
+ processing instructions will not be inserted into the tree.
"""
- def __init__(self, element_factory=None, *,
- comment_factory=None, pi_factory=None,
- insert_comments=False, insert_pis=False):
+ def __init__(self, element_factory=None, *,
+ comment_factory=None, pi_factory=None,
+ insert_comments=False, insert_pis=False):
self._data = [] # data collector
self._elem = [] # element stack
self._last = None # last element
- self._root = None # root element
+ self._root = None # root element
self._tail = None # true if we're after an end tag
- if comment_factory is None:
- comment_factory = Comment
- self._comment_factory = comment_factory
- self.insert_comments = insert_comments
- if pi_factory is None:
- pi_factory = ProcessingInstruction
- self._pi_factory = pi_factory
- self.insert_pis = insert_pis
+ if comment_factory is None:
+ comment_factory = Comment
+ self._comment_factory = comment_factory
+ self.insert_comments = insert_comments
+ if pi_factory is None:
+ pi_factory = ProcessingInstruction
+ self._pi_factory = pi_factory
+ self.insert_pis = insert_pis
if element_factory is None:
element_factory = Element
self._factory = element_factory
@@ -1433,8 +1433,8 @@ class TreeBuilder:
def close(self):
"""Flush builder buffers and return toplevel document Element."""
assert len(self._elem) == 0, "missing end tags"
- assert self._root is not None, "missing toplevel element"
- return self._root
+ assert self._root is not None, "missing toplevel element"
+ return self._root
def _flush(self):
if self._data:
@@ -1463,8 +1463,8 @@ class TreeBuilder:
self._last = elem = self._factory(tag, attrs)
if self._elem:
self._elem[-1].append(elem)
- elif self._root is None:
- self._root = elem
+ elif self._root is None:
+ self._root = elem
self._elem.append(elem)
self._tail = 0
return elem
@@ -1483,34 +1483,34 @@ class TreeBuilder:
self._tail = 1
return self._last
- def comment(self, text):
- """Create a comment using the comment_factory.
-
- *text* is the text of the comment.
- """
- return self._handle_single(
- self._comment_factory, self.insert_comments, text)
-
- def pi(self, target, text=None):
- """Create a processing instruction using the pi_factory.
-
- *target* is the target name of the processing instruction.
- *text* is the data of the processing instruction, or ''.
- """
- return self._handle_single(
- self._pi_factory, self.insert_pis, target, text)
-
- def _handle_single(self, factory, insert, *args):
- elem = factory(*args)
- if insert:
- self._flush()
- self._last = elem
- if self._elem:
- self._elem[-1].append(elem)
- self._tail = 1
- return elem
-
-
+ def comment(self, text):
+ """Create a comment using the comment_factory.
+
+ *text* is the text of the comment.
+ """
+ return self._handle_single(
+ self._comment_factory, self.insert_comments, text)
+
+ def pi(self, target, text=None):
+ """Create a processing instruction using the pi_factory.
+
+ *target* is the target name of the processing instruction.
+ *text* is the data of the processing instruction, or ''.
+ """
+ return self._handle_single(
+ self._pi_factory, self.insert_pis, target, text)
+
+ def _handle_single(self, factory, insert, *args):
+ elem = factory(*args)
+ if insert:
+ self._flush()
+ self._last = elem
+ if self._elem:
+ self._elem[-1].append(elem)
+ self._tail = 1
+ return elem
+
+
# also see ElementTree and TreeBuilder
class XMLParser:
"""Element structure builder for XML source data based on the expat parser.
@@ -1522,7 +1522,7 @@ class XMLParser:
"""
- def __init__(self, *, target=None, encoding=None):
+ def __init__(self, *, target=None, encoding=None):
try:
from xml.parsers import expat
except ImportError:
@@ -1546,10 +1546,10 @@ class XMLParser:
parser.StartElementHandler = self._start
if hasattr(target, 'end'):
parser.EndElementHandler = self._end
- if hasattr(target, 'start_ns'):
- parser.StartNamespaceDeclHandler = self._start_ns
- if hasattr(target, 'end_ns'):
- parser.EndNamespaceDeclHandler = self._end_ns
+ if hasattr(target, 'start_ns'):
+ parser.StartNamespaceDeclHandler = self._start_ns
+ if hasattr(target, 'end_ns'):
+ parser.EndNamespaceDeclHandler = self._end_ns
if hasattr(target, 'data'):
parser.CharacterDataHandler = target.data
# miscellaneous callbacks
@@ -1591,34 +1591,34 @@ class XMLParser:
append((event, end(tag)))
parser.EndElementHandler = handler
elif event_name == "start-ns":
- # TreeBuilder does not implement .start_ns()
- if hasattr(self.target, "start_ns"):
- def handler(prefix, uri, event=event_name, append=append,
- start_ns=self._start_ns):
- append((event, start_ns(prefix, uri)))
- else:
- def handler(prefix, uri, event=event_name, append=append):
- append((event, (prefix or '', uri or '')))
+ # TreeBuilder does not implement .start_ns()
+ if hasattr(self.target, "start_ns"):
+ def handler(prefix, uri, event=event_name, append=append,
+ start_ns=self._start_ns):
+ append((event, start_ns(prefix, uri)))
+ else:
+ def handler(prefix, uri, event=event_name, append=append):
+ append((event, (prefix or '', uri or '')))
parser.StartNamespaceDeclHandler = handler
elif event_name == "end-ns":
- # TreeBuilder does not implement .end_ns()
- if hasattr(self.target, "end_ns"):
- def handler(prefix, event=event_name, append=append,
- end_ns=self._end_ns):
- append((event, end_ns(prefix)))
- else:
- def handler(prefix, event=event_name, append=append):
- append((event, None))
+ # TreeBuilder does not implement .end_ns()
+ if hasattr(self.target, "end_ns"):
+ def handler(prefix, event=event_name, append=append,
+ end_ns=self._end_ns):
+ append((event, end_ns(prefix)))
+ else:
+ def handler(prefix, event=event_name, append=append):
+ append((event, None))
parser.EndNamespaceDeclHandler = handler
- elif event_name == 'comment':
- def handler(text, event=event_name, append=append, self=self):
- append((event, self.target.comment(text)))
- parser.CommentHandler = handler
- elif event_name == 'pi':
- def handler(pi_target, data, event=event_name, append=append,
- self=self):
- append((event, self.target.pi(pi_target, data)))
- parser.ProcessingInstructionHandler = handler
+ elif event_name == 'comment':
+ def handler(text, event=event_name, append=append, self=self):
+ append((event, self.target.comment(text)))
+ parser.CommentHandler = handler
+ elif event_name == 'pi':
+ def handler(pi_target, data, event=event_name, append=append,
+ self=self):
+ append((event, self.target.pi(pi_target, data)))
+ parser.ProcessingInstructionHandler = handler
else:
raise ValueError("unknown event %r" % event_name)
@@ -1639,12 +1639,12 @@ class XMLParser:
self._names[key] = name
return name
- def _start_ns(self, prefix, uri):
- return self.target.start_ns(prefix or '', uri or '')
-
- def _end_ns(self, prefix):
- return self.target.end_ns(prefix or '')
-
+ def _start_ns(self, prefix, uri):
+ return self.target.start_ns(prefix or '', uri or '')
+
+ def _end_ns(self, prefix):
+ return self.target.end_ns(prefix or '')
+
def _start(self, tag, attr_list):
# Handler for expat's StartElementHandler. Since ordered_attributes
# is set, the attributes are reported as a list of alternating
@@ -1706,25 +1706,25 @@ class XMLParser:
return
if hasattr(self.target, "doctype"):
self.target.doctype(name, pubid, system[1:-1])
- elif hasattr(self, "doctype"):
- warnings.warn(
- "The doctype() method of XMLParser is ignored. "
- "Define doctype() method on the TreeBuilder target.",
- RuntimeWarning)
-
+ elif hasattr(self, "doctype"):
+ warnings.warn(
+ "The doctype() method of XMLParser is ignored. "
+ "Define doctype() method on the TreeBuilder target.",
+ RuntimeWarning)
+
self._doctype = None
def feed(self, data):
"""Feed encoded data to parser."""
try:
- self.parser.Parse(data, False)
+ self.parser.Parse(data, False)
except self._error as v:
self._raiseerror(v)
def close(self):
"""Finish feeding data to parser and return element structure."""
try:
- self.parser.Parse(b"", True) # end of data
+ self.parser.Parse(b"", True) # end of data
except self._error as v:
self._raiseerror(v)
try:
@@ -1739,341 +1739,341 @@ class XMLParser:
del self.target, self._target
-# --------------------------------------------------------------------
-# C14N 2.0
-
-def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
- """Convert XML to its C14N 2.0 serialised form.
-
- If *out* is provided, it must be a file or file-like object that receives
- the serialised canonical XML output (text, not bytes) through its ``.write()``
- method. To write to a file, open it in text mode with encoding "utf-8".
- If *out* is not provided, this function returns the output as text string.
-
- Either *xml_data* (an XML string) or *from_file* (a file path or
- file-like object) must be provided as input.
-
- The configuration options are the same as for the ``C14NWriterTarget``.
- """
- if xml_data is None and from_file is None:
- raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
- sio = None
- if out is None:
- sio = out = io.StringIO()
-
- parser = XMLParser(target=C14NWriterTarget(out.write, **options))
-
- if xml_data is not None:
- parser.feed(xml_data)
- parser.close()
- elif from_file is not None:
- parse(from_file, parser=parser)
-
- return sio.getvalue() if sio is not None else None
-
-
-_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
-
-
-class C14NWriterTarget:
- """
- Canonicalization writer target for the XMLParser.
-
- Serialises parse events to XML C14N 2.0.
-
- The *write* function is used for writing out the resulting data stream
- as text (not bytes). To write to a file, open it in text mode with encoding
- "utf-8" and pass its ``.write`` method.
-
- Configuration options:
-
- - *with_comments*: set to true to include comments
- - *strip_text*: set to true to strip whitespace before and after text content
- - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
- - *qname_aware_tags*: a set of qname aware tag names in which prefixes
- should be replaced in text content
- - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
- should be replaced in text content
- - *exclude_attrs*: a set of attribute names that should not be serialised
- - *exclude_tags*: a set of tag names that should not be serialised
- """
- def __init__(self, write, *,
- with_comments=False, strip_text=False, rewrite_prefixes=False,
- qname_aware_tags=None, qname_aware_attrs=None,
- exclude_attrs=None, exclude_tags=None):
- self._write = write
- self._data = []
- self._with_comments = with_comments
- self._strip_text = strip_text
- self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
- self._exclude_tags = set(exclude_tags) if exclude_tags else None
-
- self._rewrite_prefixes = rewrite_prefixes
- if qname_aware_tags:
- self._qname_aware_tags = set(qname_aware_tags)
- else:
- self._qname_aware_tags = None
- if qname_aware_attrs:
- self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
- else:
- self._find_qname_aware_attrs = None
-
- # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
- self._declared_ns_stack = [[
- ("http://www.w3.org/XML/1998/namespace", "xml"),
- ]]
- # Stack with user declared namespace prefixes as (uri, prefix) pairs.
- self._ns_stack = []
- if not rewrite_prefixes:
- self._ns_stack.append(list(_namespace_map.items()))
- self._ns_stack.append([])
- self._prefix_map = {}
- self._preserve_space = [False]
- self._pending_start = None
- self._root_seen = False
- self._root_done = False
- self._ignored_depth = 0
-
- def _iter_namespaces(self, ns_stack, _reversed=reversed):
- for namespaces in _reversed(ns_stack):
- if namespaces: # almost no element declares new namespaces
- yield from namespaces
-
- def _resolve_prefix_name(self, prefixed_name):
- prefix, name = prefixed_name.split(':', 1)
- for uri, p in self._iter_namespaces(self._ns_stack):
- if p == prefix:
- return f'{{{uri}}}{name}'
- raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
-
- def _qname(self, qname, uri=None):
- if uri is None:
- uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
- else:
- tag = qname
-
- prefixes_seen = set()
- for u, prefix in self._iter_namespaces(self._declared_ns_stack):
- if u == uri and prefix not in prefixes_seen:
- return f'{prefix}:{tag}' if prefix else tag, tag, uri
- prefixes_seen.add(prefix)
-
- # Not declared yet => add new declaration.
- if self._rewrite_prefixes:
- if uri in self._prefix_map:
- prefix = self._prefix_map[uri]
- else:
- prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
- self._declared_ns_stack[-1].append((uri, prefix))
- return f'{prefix}:{tag}', tag, uri
-
- if not uri and '' not in prefixes_seen:
- # No default namespace declared => no prefix needed.
- return tag, tag, uri
-
- for u, prefix in self._iter_namespaces(self._ns_stack):
- if u == uri:
- self._declared_ns_stack[-1].append((uri, prefix))
- return f'{prefix}:{tag}' if prefix else tag, tag, uri
-
- if not uri:
- # As soon as a default namespace is defined,
- # anything that has no namespace (and thus, no prefix) goes there.
- return tag, tag, uri
-
- raise ValueError(f'Namespace "{uri}" is not declared in scope')
-
- def data(self, data):
- if not self._ignored_depth:
- self._data.append(data)
-
- def _flush(self, _join_text=''.join):
- data = _join_text(self._data)
- del self._data[:]
- if self._strip_text and not self._preserve_space[-1]:
- data = data.strip()
- if self._pending_start is not None:
- args, self._pending_start = self._pending_start, None
- qname_text = data if data and _looks_like_prefix_name(data) else None
- self._start(*args, qname_text)
- if qname_text is not None:
- return
- if data and self._root_seen:
- self._write(_escape_cdata_c14n(data))
-
- def start_ns(self, prefix, uri):
- if self._ignored_depth:
- return
- # we may have to resolve qnames in text content
- if self._data:
- self._flush()
- self._ns_stack[-1].append((uri, prefix))
-
- def start(self, tag, attrs):
- if self._exclude_tags is not None and (
- self._ignored_depth or tag in self._exclude_tags):
- self._ignored_depth += 1
- return
- if self._data:
- self._flush()
-
- new_namespaces = []
- self._declared_ns_stack.append(new_namespaces)
-
- if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
- # Need to parse text first to see if it requires a prefix declaration.
- self._pending_start = (tag, attrs, new_namespaces)
- return
- self._start(tag, attrs, new_namespaces)
-
- def _start(self, tag, attrs, new_namespaces, qname_text=None):
- if self._exclude_attrs is not None and attrs:
- attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
-
- qnames = {tag, *attrs}
- resolved_names = {}
-
- # Resolve prefixes in attribute and tag text.
- if qname_text is not None:
- qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
- qnames.add(qname)
- if self._find_qname_aware_attrs is not None and attrs:
- qattrs = self._find_qname_aware_attrs(attrs)
- if qattrs:
- for attr_name in qattrs:
- value = attrs[attr_name]
- if _looks_like_prefix_name(value):
- qname = resolved_names[value] = self._resolve_prefix_name(value)
- qnames.add(qname)
- else:
- qattrs = None
- else:
- qattrs = None
-
- # Assign prefixes in lexicographical order of used URIs.
- parse_qname = self._qname
- parsed_qnames = {n: parse_qname(n) for n in sorted(
- qnames, key=lambda n: n.split('}', 1))}
-
- # Write namespace declarations in prefix order ...
- if new_namespaces:
- attr_list = [
- ('xmlns:' + prefix if prefix else 'xmlns', uri)
- for uri, prefix in new_namespaces
- ]
- attr_list.sort()
- else:
- # almost always empty
- attr_list = []
-
- # ... followed by attributes in URI+name order
- if attrs:
- for k, v in sorted(attrs.items()):
- if qattrs is not None and k in qattrs and v in resolved_names:
- v = parsed_qnames[resolved_names[v]][0]
- attr_qname, attr_name, uri = parsed_qnames[k]
- # No prefix for attributes in default ('') namespace.
- attr_list.append((attr_qname if uri else attr_name, v))
-
- # Honour xml:space attributes.
- space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
- self._preserve_space.append(
- space_behaviour == 'preserve' if space_behaviour
- else self._preserve_space[-1])
-
- # Write the tag.
- write = self._write
- write('<' + parsed_qnames[tag][0])
- if attr_list:
- write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
- write('>')
-
- # Write the resolved qname text content.
- if qname_text is not None:
- write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
-
- self._root_seen = True
- self._ns_stack.append([])
-
- def end(self, tag):
- if self._ignored_depth:
- self._ignored_depth -= 1
- return
- if self._data:
- self._flush()
- self._write(f'</{self._qname(tag)[0]}>')
- self._preserve_space.pop()
- self._root_done = len(self._preserve_space) == 1
- self._declared_ns_stack.pop()
- self._ns_stack.pop()
-
- def comment(self, text):
- if not self._with_comments:
- return
- if self._ignored_depth:
- return
- if self._root_done:
- self._write('\n')
- elif self._root_seen and self._data:
- self._flush()
- self._write(f'<!--{_escape_cdata_c14n(text)}-->')
- if not self._root_seen:
- self._write('\n')
-
- def pi(self, target, data):
- if self._ignored_depth:
- return
- if self._root_done:
- self._write('\n')
- elif self._root_seen and self._data:
- self._flush()
- self._write(
- f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
- if not self._root_seen:
- self._write('\n')
-
-
-def _escape_cdata_c14n(text):
- # escape character data
- try:
- # it's worth avoiding do-nothing calls for strings that are
- # shorter than 500 character, or so. assume that's, by far,
- # the most common case in most applications.
- if '&' in text:
- text = text.replace('&', '&amp;')
- if '<' in text:
- text = text.replace('<', '&lt;')
- if '>' in text:
- text = text.replace('>', '&gt;')
- if '\r' in text:
- text = text.replace('\r', '&#xD;')
- return text
- except (TypeError, AttributeError):
- _raise_serialization_error(text)
-
-
-def _escape_attrib_c14n(text):
- # escape attribute value
- try:
- if '&' in text:
- text = text.replace('&', '&amp;')
- if '<' in text:
- text = text.replace('<', '&lt;')
- if '"' in text:
- text = text.replace('"', '&quot;')
- if '\t' in text:
- text = text.replace('\t', '&#x9;')
- if '\n' in text:
- text = text.replace('\n', '&#xA;')
- if '\r' in text:
- text = text.replace('\r', '&#xD;')
- return text
- except (TypeError, AttributeError):
- _raise_serialization_error(text)
-
-
-# --------------------------------------------------------------------
-
+# --------------------------------------------------------------------
+# C14N 2.0
+
+def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
+ """Convert XML to its C14N 2.0 serialised form.
+
+ If *out* is provided, it must be a file or file-like object that receives
+ the serialised canonical XML output (text, not bytes) through its ``.write()``
+ method. To write to a file, open it in text mode with encoding "utf-8".
+ If *out* is not provided, this function returns the output as text string.
+
+ Either *xml_data* (an XML string) or *from_file* (a file path or
+ file-like object) must be provided as input.
+
+ The configuration options are the same as for the ``C14NWriterTarget``.
+ """
+ if xml_data is None and from_file is None:
+ raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
+ sio = None
+ if out is None:
+ sio = out = io.StringIO()
+
+ parser = XMLParser(target=C14NWriterTarget(out.write, **options))
+
+ if xml_data is not None:
+ parser.feed(xml_data)
+ parser.close()
+ elif from_file is not None:
+ parse(from_file, parser=parser)
+
+ return sio.getvalue() if sio is not None else None
+
+
+_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
+
+
+class C14NWriterTarget:
+ """
+ Canonicalization writer target for the XMLParser.
+
+ Serialises parse events to XML C14N 2.0.
+
+ The *write* function is used for writing out the resulting data stream
+ as text (not bytes). To write to a file, open it in text mode with encoding
+ "utf-8" and pass its ``.write`` method.
+
+ Configuration options:
+
+ - *with_comments*: set to true to include comments
+ - *strip_text*: set to true to strip whitespace before and after text content
+ - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
+ - *qname_aware_tags*: a set of qname aware tag names in which prefixes
+ should be replaced in text content
+ - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
+ should be replaced in text content
+ - *exclude_attrs*: a set of attribute names that should not be serialised
+ - *exclude_tags*: a set of tag names that should not be serialised
+ """
+ def __init__(self, write, *,
+ with_comments=False, strip_text=False, rewrite_prefixes=False,
+ qname_aware_tags=None, qname_aware_attrs=None,
+ exclude_attrs=None, exclude_tags=None):
+ self._write = write
+ self._data = []
+ self._with_comments = with_comments
+ self._strip_text = strip_text
+ self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
+ self._exclude_tags = set(exclude_tags) if exclude_tags else None
+
+ self._rewrite_prefixes = rewrite_prefixes
+ if qname_aware_tags:
+ self._qname_aware_tags = set(qname_aware_tags)
+ else:
+ self._qname_aware_tags = None
+ if qname_aware_attrs:
+ self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
+ else:
+ self._find_qname_aware_attrs = None
+
+ # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
+ self._declared_ns_stack = [[
+ ("http://www.w3.org/XML/1998/namespace", "xml"),
+ ]]
+ # Stack with user declared namespace prefixes as (uri, prefix) pairs.
+ self._ns_stack = []
+ if not rewrite_prefixes:
+ self._ns_stack.append(list(_namespace_map.items()))
+ self._ns_stack.append([])
+ self._prefix_map = {}
+ self._preserve_space = [False]
+ self._pending_start = None
+ self._root_seen = False
+ self._root_done = False
+ self._ignored_depth = 0
+
+ def _iter_namespaces(self, ns_stack, _reversed=reversed):
+ for namespaces in _reversed(ns_stack):
+ if namespaces: # almost no element declares new namespaces
+ yield from namespaces
+
+ def _resolve_prefix_name(self, prefixed_name):
+ prefix, name = prefixed_name.split(':', 1)
+ for uri, p in self._iter_namespaces(self._ns_stack):
+ if p == prefix:
+ return f'{{{uri}}}{name}'
+ raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
+
+ def _qname(self, qname, uri=None):
+ if uri is None:
+ uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
+ else:
+ tag = qname
+
+ prefixes_seen = set()
+ for u, prefix in self._iter_namespaces(self._declared_ns_stack):
+ if u == uri and prefix not in prefixes_seen:
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
+ prefixes_seen.add(prefix)
+
+ # Not declared yet => add new declaration.
+ if self._rewrite_prefixes:
+ if uri in self._prefix_map:
+ prefix = self._prefix_map[uri]
+ else:
+ prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
+ self._declared_ns_stack[-1].append((uri, prefix))
+ return f'{prefix}:{tag}', tag, uri
+
+ if not uri and '' not in prefixes_seen:
+ # No default namespace declared => no prefix needed.
+ return tag, tag, uri
+
+ for u, prefix in self._iter_namespaces(self._ns_stack):
+ if u == uri:
+ self._declared_ns_stack[-1].append((uri, prefix))
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
+
+ if not uri:
+ # As soon as a default namespace is defined,
+ # anything that has no namespace (and thus, no prefix) goes there.
+ return tag, tag, uri
+
+ raise ValueError(f'Namespace "{uri}" is not declared in scope')
+
+ def data(self, data):
+ if not self._ignored_depth:
+ self._data.append(data)
+
+ def _flush(self, _join_text=''.join):
+ data = _join_text(self._data)
+ del self._data[:]
+ if self._strip_text and not self._preserve_space[-1]:
+ data = data.strip()
+ if self._pending_start is not None:
+ args, self._pending_start = self._pending_start, None
+ qname_text = data if data and _looks_like_prefix_name(data) else None
+ self._start(*args, qname_text)
+ if qname_text is not None:
+ return
+ if data and self._root_seen:
+ self._write(_escape_cdata_c14n(data))
+
+ def start_ns(self, prefix, uri):
+ if self._ignored_depth:
+ return
+ # we may have to resolve qnames in text content
+ if self._data:
+ self._flush()
+ self._ns_stack[-1].append((uri, prefix))
+
+ def start(self, tag, attrs):
+ if self._exclude_tags is not None and (
+ self._ignored_depth or tag in self._exclude_tags):
+ self._ignored_depth += 1
+ return
+ if self._data:
+ self._flush()
+
+ new_namespaces = []
+ self._declared_ns_stack.append(new_namespaces)
+
+ if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
+ # Need to parse text first to see if it requires a prefix declaration.
+ self._pending_start = (tag, attrs, new_namespaces)
+ return
+ self._start(tag, attrs, new_namespaces)
+
+ def _start(self, tag, attrs, new_namespaces, qname_text=None):
+ if self._exclude_attrs is not None and attrs:
+ attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
+
+ qnames = {tag, *attrs}
+ resolved_names = {}
+
+ # Resolve prefixes in attribute and tag text.
+ if qname_text is not None:
+ qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
+ qnames.add(qname)
+ if self._find_qname_aware_attrs is not None and attrs:
+ qattrs = self._find_qname_aware_attrs(attrs)
+ if qattrs:
+ for attr_name in qattrs:
+ value = attrs[attr_name]
+ if _looks_like_prefix_name(value):
+ qname = resolved_names[value] = self._resolve_prefix_name(value)
+ qnames.add(qname)
+ else:
+ qattrs = None
+ else:
+ qattrs = None
+
+ # Assign prefixes in lexicographical order of used URIs.
+ parse_qname = self._qname
+ parsed_qnames = {n: parse_qname(n) for n in sorted(
+ qnames, key=lambda n: n.split('}', 1))}
+
+ # Write namespace declarations in prefix order ...
+ if new_namespaces:
+ attr_list = [
+ ('xmlns:' + prefix if prefix else 'xmlns', uri)
+ for uri, prefix in new_namespaces
+ ]
+ attr_list.sort()
+ else:
+ # almost always empty
+ attr_list = []
+
+ # ... followed by attributes in URI+name order
+ if attrs:
+ for k, v in sorted(attrs.items()):
+ if qattrs is not None and k in qattrs and v in resolved_names:
+ v = parsed_qnames[resolved_names[v]][0]
+ attr_qname, attr_name, uri = parsed_qnames[k]
+ # No prefix for attributes in default ('') namespace.
+ attr_list.append((attr_qname if uri else attr_name, v))
+
+ # Honour xml:space attributes.
+ space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
+ self._preserve_space.append(
+ space_behaviour == 'preserve' if space_behaviour
+ else self._preserve_space[-1])
+
+ # Write the tag.
+ write = self._write
+ write('<' + parsed_qnames[tag][0])
+ if attr_list:
+ write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
+ write('>')
+
+ # Write the resolved qname text content.
+ if qname_text is not None:
+ write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
+
+ self._root_seen = True
+ self._ns_stack.append([])
+
+ def end(self, tag):
+ if self._ignored_depth:
+ self._ignored_depth -= 1
+ return
+ if self._data:
+ self._flush()
+ self._write(f'</{self._qname(tag)[0]}>')
+ self._preserve_space.pop()
+ self._root_done = len(self._preserve_space) == 1
+ self._declared_ns_stack.pop()
+ self._ns_stack.pop()
+
+ def comment(self, text):
+ if not self._with_comments:
+ return
+ if self._ignored_depth:
+ return
+ if self._root_done:
+ self._write('\n')
+ elif self._root_seen and self._data:
+ self._flush()
+ self._write(f'<!--{_escape_cdata_c14n(text)}-->')
+ if not self._root_seen:
+ self._write('\n')
+
+ def pi(self, target, data):
+ if self._ignored_depth:
+ return
+ if self._root_done:
+ self._write('\n')
+ elif self._root_seen and self._data:
+ self._flush()
+ self._write(
+ f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
+ if not self._root_seen:
+ self._write('\n')
+
+
+def _escape_cdata_c14n(text):
+ # escape character data
+ try:
+ # it's worth avoiding do-nothing calls for strings that are
+ # shorter than 500 character, or so. assume that's, by far,
+ # the most common case in most applications.
+ if '&' in text:
+ text = text.replace('&', '&amp;')
+ if '<' in text:
+ text = text.replace('<', '&lt;')
+ if '>' in text:
+ text = text.replace('>', '&gt;')
+ if '\r' in text:
+ text = text.replace('\r', '&#xD;')
+ return text
+ except (TypeError, AttributeError):
+ _raise_serialization_error(text)
+
+
+def _escape_attrib_c14n(text):
+ # escape attribute value
+ try:
+ if '&' in text:
+ text = text.replace('&', '&amp;')
+ if '<' in text:
+ text = text.replace('<', '&lt;')
+ if '"' in text:
+ text = text.replace('"', '&quot;')
+ if '\t' in text:
+ text = text.replace('\t', '&#x9;')
+ if '\n' in text:
+ text = text.replace('\n', '&#xA;')
+ if '\r' in text:
+ text = text.replace('\r', '&#xD;')
+ return text
+ except (TypeError, AttributeError):
+ _raise_serialization_error(text)
+
+
+# --------------------------------------------------------------------
+
# Import the C accelerators
try:
# Element is going to be shadowed by the C implementation. We need to keep
@@ -2081,10 +2081,10 @@ try:
# (see tests)
_Element_Py = Element
- # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
+ # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
from _elementtree import *
- from _elementtree import _set_factories
+ from _elementtree import _set_factories
except ImportError:
pass
-else:
- _set_factories(Comment, ProcessingInstruction)
+else:
+ _set_factories(Comment, ProcessingInstruction)