diff options
author | AlexSm <alex@ydb.tech> | 2024-03-05 10:40:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-05 12:40:59 +0300 |
commit | 1ac13c847b5358faba44dbb638a828e24369467b (patch) | |
tree | 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Modules/_elementtree.c | |
parent | ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff) | |
download | ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz |
Library import 16 (#2433)
Co-authored-by: robot-piglet <robot-piglet@yandex-team.com>
Co-authored-by: deshevoy <deshevoy@yandex-team.com>
Co-authored-by: robot-contrib <robot-contrib@yandex-team.com>
Co-authored-by: thegeorg <thegeorg@yandex-team.com>
Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com>
Co-authored-by: svidyuk <svidyuk@yandex-team.com>
Co-authored-by: shadchin <shadchin@yandex-team.com>
Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com>
Co-authored-by: innokentii <innokentii@yandex-team.com>
Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com>
Co-authored-by: snermolaev <snermolaev@yandex-team.com>
Co-authored-by: dimdim11 <dimdim11@yandex-team.com>
Co-authored-by: kickbutt <kickbutt@yandex-team.com>
Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com>
Co-authored-by: korsunandrei <korsunandrei@yandex-team.com>
Co-authored-by: petrk <petrk@yandex-team.com>
Co-authored-by: miroslav2 <miroslav2@yandex-team.com>
Co-authored-by: serjflint <serjflint@yandex-team.com>
Co-authored-by: akhropov <akhropov@yandex-team.com>
Co-authored-by: prettyboy <prettyboy@yandex-team.com>
Co-authored-by: ilikepugs <ilikepugs@yandex-team.com>
Co-authored-by: hiddenpath <hiddenpath@yandex-team.com>
Co-authored-by: mikhnenko <mikhnenko@yandex-team.com>
Co-authored-by: spreis <spreis@yandex-team.com>
Co-authored-by: andreyshspb <andreyshspb@yandex-team.com>
Co-authored-by: dimaandreev <dimaandreev@yandex-team.com>
Co-authored-by: rashid <rashid@yandex-team.com>
Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com>
Co-authored-by: r-vetrov <r-vetrov@yandex-team.com>
Co-authored-by: ypodlesov <ypodlesov@yandex-team.com>
Co-authored-by: zaverden <zaverden@yandex-team.com>
Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com>
Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com>
Co-authored-by: v-korovin <v-korovin@yandex-team.com>
Co-authored-by: arikon <arikon@yandex-team.com>
Co-authored-by: khoden <khoden@yandex-team.com>
Co-authored-by: psydmm <psydmm@yandex-team.com>
Co-authored-by: robot-javacom <robot-javacom@yandex-team.com>
Co-authored-by: dtorilov <dtorilov@yandex-team.com>
Co-authored-by: sennikovmv <sennikovmv@yandex-team.com>
Co-authored-by: hcpp <hcpp@ydb.tech>
Diffstat (limited to 'contrib/tools/python3/Modules/_elementtree.c')
-rw-r--r-- | contrib/tools/python3/Modules/_elementtree.c | 4458 |
1 files changed, 4458 insertions, 0 deletions
diff --git a/contrib/tools/python3/Modules/_elementtree.c b/contrib/tools/python3/Modules/_elementtree.c new file mode 100644 index 0000000000..620de8bb4c --- /dev/null +++ b/contrib/tools/python3/Modules/_elementtree.c @@ -0,0 +1,4458 @@ +/*-------------------------------------------------------------------- + * Licensed to PSF under a Contributor Agreement. + * See https://www.python.org/psf/license for licensing details. + * + * _elementtree - C accelerator for xml.etree.ElementTree + * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved. + * Copyright (c) 1999-2009 by Fredrik Lundh. + * + * info@pythonware.com + * http://www.pythonware.com + *-------------------------------------------------------------------- + */ + +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "structmember.h" // PyMemberDef +#include "expat.h" +#include "pyexpat.h" + +/* -------------------------------------------------------------------- */ +/* configuration */ + +/* An element can hold this many children without extra memory + allocations. */ +#define STATIC_CHILDREN 4 + +/* For best performance, chose a value so that 80-90% of all nodes + have no more than the given number of children. Set this to zero + to minimize the size of the element structure itself (this only + helps if you have lots of leaf nodes with attributes). */ + +/* Also note that pymalloc always allocates blocks in multiples of + eight bytes. For the current C version of ElementTree, this means + that the number of children should be an even number, at least on + 32-bit platforms. */ + +/* -------------------------------------------------------------------- */ + +/* compiler tweaks */ +#if defined(_MSC_VER) +#define LOCAL(type) static __inline type __fastcall +#else +#define LOCAL(type) static type +#endif + +/* macros used to store 'join' flags in string object pointers. note + that all use of text and tail as object pointers must be wrapped in + JOIN_OBJ. see comments in the ElementObject definition for more + info. */ +#define JOIN_GET(p) ((uintptr_t) (p) & 1) +#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag))) +#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1)) + +/* Py_SETREF for a PyObject* that uses a join flag. */ +Py_LOCAL_INLINE(void) +_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr) +{ + PyObject *tmp = JOIN_OBJ(*p); + *p = new_joined_ptr; + Py_DECREF(tmp); +} + +/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by + * reference since this function sets it to NULL. +*/ +static void _clear_joined_ptr(PyObject **p) +{ + if (*p) { + _set_joined_ptr(p, NULL); + } +} + +/* Per-module state; PEP 3121 */ +typedef struct { + PyObject *parseerror_obj; + PyObject *deepcopy_obj; + PyObject *elementpath_obj; + PyObject *comment_factory; + PyObject *pi_factory; + /* Interned strings */ + PyObject *str_text; + PyObject *str_tail; + PyObject *str_append; + PyObject *str_find; + PyObject *str_findtext; + PyObject *str_findall; + PyObject *str_iterfind; + PyObject *str_doctype; + /* Types defined by this extension */ + PyTypeObject *Element_Type; + PyTypeObject *ElementIter_Type; + PyTypeObject *TreeBuilder_Type; + PyTypeObject *XMLParser_Type; + + PyObject *expat_capsule; + struct PyExpat_CAPI *expat_capi; +} elementtreestate; + +static struct PyModuleDef elementtreemodule; + +/* Given a module object (assumed to be _elementtree), get its per-module + * state. + */ +static inline elementtreestate* +get_elementtree_state(PyObject *module) +{ + void *state = PyModule_GetState(module); + assert(state != NULL); + return (elementtreestate *)state; +} + +static inline elementtreestate * +get_elementtree_state_by_cls(PyTypeObject *cls) +{ + void *state = PyType_GetModuleState(cls); + assert(state != NULL); + return (elementtreestate *)state; +} + +static inline elementtreestate * +get_elementtree_state_by_type(PyTypeObject *tp) +{ + PyObject *mod = PyType_GetModuleByDef(tp, &elementtreemodule); + assert(mod != NULL); + return get_elementtree_state(mod); +} + +static int +elementtree_clear(PyObject *m) +{ + elementtreestate *st = get_elementtree_state(m); + Py_CLEAR(st->parseerror_obj); + Py_CLEAR(st->deepcopy_obj); + Py_CLEAR(st->elementpath_obj); + Py_CLEAR(st->comment_factory); + Py_CLEAR(st->pi_factory); + + // Interned strings + Py_CLEAR(st->str_append); + Py_CLEAR(st->str_find); + Py_CLEAR(st->str_findall); + Py_CLEAR(st->str_findtext); + Py_CLEAR(st->str_iterfind); + Py_CLEAR(st->str_tail); + Py_CLEAR(st->str_text); + Py_CLEAR(st->str_doctype); + + // Heap types + Py_CLEAR(st->Element_Type); + Py_CLEAR(st->ElementIter_Type); + Py_CLEAR(st->TreeBuilder_Type); + Py_CLEAR(st->XMLParser_Type); + Py_CLEAR(st->expat_capsule); + + st->expat_capi = NULL; + return 0; +} + +static int +elementtree_traverse(PyObject *m, visitproc visit, void *arg) +{ + elementtreestate *st = get_elementtree_state(m); + Py_VISIT(st->parseerror_obj); + Py_VISIT(st->deepcopy_obj); + Py_VISIT(st->elementpath_obj); + Py_VISIT(st->comment_factory); + Py_VISIT(st->pi_factory); + + // Heap types + Py_VISIT(st->Element_Type); + Py_VISIT(st->ElementIter_Type); + Py_VISIT(st->TreeBuilder_Type); + Py_VISIT(st->XMLParser_Type); + Py_VISIT(st->expat_capsule); + return 0; +} + +static void +elementtree_free(void *m) +{ + elementtree_clear((PyObject *)m); +} + +/* helpers */ + +LOCAL(PyObject*) +list_join(PyObject* list) +{ + /* join list elements */ + PyObject* joiner; + PyObject* result; + + joiner = PyUnicode_FromStringAndSize("", 0); + if (!joiner) + return NULL; + result = PyUnicode_Join(joiner, list); + Py_DECREF(joiner); + return result; +} + +/* Is the given object an empty dictionary? +*/ +static int +is_empty_dict(PyObject *obj) +{ + return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0; +} + + +/* -------------------------------------------------------------------- */ +/* the Element type */ + +typedef struct { + + /* attributes (a dictionary object), or NULL if no attributes */ + PyObject* attrib; + + /* child elements */ + Py_ssize_t length; /* actual number of items */ + Py_ssize_t allocated; /* allocated items */ + + /* this either points to _children or to a malloced buffer */ + PyObject* *children; + + PyObject* _children[STATIC_CHILDREN]; + +} ElementObjectExtra; + +typedef struct { + PyObject_HEAD + + /* element tag (a string). */ + PyObject* tag; + + /* text before first child. note that this is a tagged pointer; + use JOIN_OBJ to get the object pointer. the join flag is used + to distinguish lists created by the tree builder from lists + assigned to the attribute by application code; the former + should be joined before being returned to the user, the latter + should be left intact. */ + PyObject* text; + + /* text after this element, in parent. note that this is a tagged + pointer; use JOIN_OBJ to get the object pointer. */ + PyObject* tail; + + ElementObjectExtra* extra; + + PyObject *weakreflist; /* For tp_weaklistoffset */ + +} ElementObject; + + +#define Element_CheckExact(st, op) Py_IS_TYPE(op, (st)->Element_Type) +#define Element_Check(st, op) PyObject_TypeCheck(op, (st)->Element_Type) + + +/* -------------------------------------------------------------------- */ +/* Element constructors and destructor */ + +LOCAL(int) +create_extra(ElementObject* self, PyObject* attrib) +{ + self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); + if (!self->extra) { + PyErr_NoMemory(); + return -1; + } + + self->extra->attrib = Py_XNewRef(attrib); + + self->extra->length = 0; + self->extra->allocated = STATIC_CHILDREN; + self->extra->children = self->extra->_children; + + return 0; +} + +LOCAL(void) +dealloc_extra(ElementObjectExtra *extra) +{ + Py_ssize_t i; + + if (!extra) + return; + + Py_XDECREF(extra->attrib); + + for (i = 0; i < extra->length; i++) + Py_DECREF(extra->children[i]); + + if (extra->children != extra->_children) + PyObject_Free(extra->children); + + PyObject_Free(extra); +} + +LOCAL(void) +clear_extra(ElementObject* self) +{ + ElementObjectExtra *myextra; + + if (!self->extra) + return; + + /* Avoid DECREFs calling into this code again (cycles, etc.) + */ + myextra = self->extra; + self->extra = NULL; + + dealloc_extra(myextra); +} + +/* Convenience internal function to create new Element objects with the given + * tag and attributes. +*/ +LOCAL(PyObject*) +create_new_element(elementtreestate *st, PyObject *tag, PyObject *attrib) +{ + ElementObject* self; + + self = PyObject_GC_New(ElementObject, st->Element_Type); + if (self == NULL) + return NULL; + self->extra = NULL; + self->tag = Py_NewRef(tag); + self->text = Py_NewRef(Py_None); + self->tail = Py_NewRef(Py_None); + self->weakreflist = NULL; + + PyObject_GC_Track(self); + + if (attrib != NULL && !is_empty_dict(attrib)) { + if (create_extra(self, attrib) < 0) { + Py_DECREF(self); + return NULL; + } + } + + return (PyObject*) self; +} + +static PyObject * +element_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + ElementObject *e = (ElementObject *)type->tp_alloc(type, 0); + if (e != NULL) { + e->tag = Py_NewRef(Py_None); + e->text = Py_NewRef(Py_None); + e->tail = Py_NewRef(Py_None); + e->extra = NULL; + e->weakreflist = NULL; + } + return (PyObject *)e; +} + +/* Helper function for extracting the attrib dictionary from a keywords dict. + * This is required by some constructors/functions in this module that can + * either accept attrib as a keyword argument or all attributes splashed + * directly into *kwds. + * + * Return a dictionary with the content of kwds merged into the content of + * attrib. If there is no attrib keyword, return a copy of kwds. + */ +static PyObject* +get_attrib_from_keywords(PyObject *kwds) +{ + PyObject *attrib_str = PyUnicode_FromString("attrib"); + if (attrib_str == NULL) { + return NULL; + } + PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str); + + if (attrib) { + /* If attrib was found in kwds, copy its value and remove it from + * kwds + */ + if (!PyDict_Check(attrib)) { + Py_DECREF(attrib_str); + PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s", + Py_TYPE(attrib)->tp_name); + return NULL; + } + attrib = PyDict_Copy(attrib); + if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) { + Py_SETREF(attrib, NULL); + } + } + else if (!PyErr_Occurred()) { + attrib = PyDict_New(); + } + + Py_DECREF(attrib_str); + + if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) { + Py_DECREF(attrib); + return NULL; + } + return attrib; +} + +/*[clinic input] +module _elementtree +class _elementtree.Element "ElementObject *" "clinic_state()->Element_Type" +class _elementtree.TreeBuilder "TreeBuilderObject *" "clinic_state()->TreeBuilder_Type" +class _elementtree.XMLParser "XMLParserObject *" "clinic_state()->XMLParser_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6c83ea832d2b0ef1]*/ + +static int +element_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *tag; + PyObject *attrib = NULL; + ElementObject *self_elem; + + if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib)) + return -1; + + if (attrib) { + /* attrib passed as positional arg */ + attrib = PyDict_Copy(attrib); + if (!attrib) + return -1; + if (kwds) { + if (PyDict_Update(attrib, kwds) < 0) { + Py_DECREF(attrib); + return -1; + } + } + } else if (kwds) { + /* have keywords args */ + attrib = get_attrib_from_keywords(kwds); + if (!attrib) + return -1; + } + + self_elem = (ElementObject *)self; + + if (attrib != NULL && !is_empty_dict(attrib)) { + if (create_extra(self_elem, attrib) < 0) { + Py_DECREF(attrib); + return -1; + } + } + + /* We own a reference to attrib here and it's no longer needed. */ + Py_XDECREF(attrib); + + /* Replace the objects already pointed to by tag, text and tail. */ + Py_XSETREF(self_elem->tag, Py_NewRef(tag)); + + _set_joined_ptr(&self_elem->text, Py_NewRef(Py_None)); + _set_joined_ptr(&self_elem->tail, Py_NewRef(Py_None)); + + return 0; +} + +LOCAL(int) +element_resize(ElementObject* self, Py_ssize_t extra) +{ + Py_ssize_t size; + PyObject* *children; + + assert(extra >= 0); + /* make sure self->children can hold the given number of extra + elements. set an exception and return -1 if allocation failed */ + + if (!self->extra) { + if (create_extra(self, NULL) < 0) + return -1; + } + + size = self->extra->length + extra; /* never overflows */ + + if (size > self->extra->allocated) { + /* use Python 2.4's list growth strategy */ + size = (size >> 3) + (size < 9 ? 3 : 6) + size; + /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" + * which needs at least 4 bytes. + * Although it's a false alarm always assume at least one child to + * be safe. + */ + size = size ? size : 1; + if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*)) + goto nomemory; + if (self->extra->children != self->extra->_children) { + /* Coverity CID #182 size_error: Allocating 1 bytes to pointer + * "children", which needs at least 4 bytes. Although it's a + * false alarm always assume at least one child to be safe. + */ + children = PyObject_Realloc(self->extra->children, + size * sizeof(PyObject*)); + if (!children) + goto nomemory; + } else { + children = PyObject_Malloc(size * sizeof(PyObject*)); + if (!children) + goto nomemory; + /* copy existing children from static area to malloc buffer */ + memcpy(children, self->extra->children, + self->extra->length * sizeof(PyObject*)); + } + self->extra->children = children; + self->extra->allocated = size; + } + + return 0; + + nomemory: + PyErr_NoMemory(); + return -1; +} + +LOCAL(void) +raise_type_error(PyObject *element) +{ + PyErr_Format(PyExc_TypeError, + "expected an Element, not \"%.200s\"", + Py_TYPE(element)->tp_name); +} + +LOCAL(int) +element_add_subelement(elementtreestate *st, ElementObject *self, + PyObject *element) +{ + /* add a child element to a parent */ + if (!Element_Check(st, element)) { + raise_type_error(element); + return -1; + } + + if (element_resize(self, 1) < 0) + return -1; + + self->extra->children[self->extra->length] = Py_NewRef(element); + + self->extra->length++; + + return 0; +} + +LOCAL(PyObject*) +element_get_attrib(ElementObject* self) +{ + /* return borrowed reference to attrib dictionary */ + /* note: this function assumes that the extra section exists */ + + PyObject* res = self->extra->attrib; + + if (!res) { + /* create missing dictionary */ + res = self->extra->attrib = PyDict_New(); + } + + return res; +} + +LOCAL(PyObject*) +element_get_text(ElementObject* self) +{ + /* return borrowed reference to text attribute */ + + PyObject *res = self->text; + + if (JOIN_GET(res)) { + res = JOIN_OBJ(res); + if (PyList_CheckExact(res)) { + PyObject *tmp = list_join(res); + if (!tmp) + return NULL; + self->text = tmp; + Py_SETREF(res, tmp); + } + } + + return res; +} + +LOCAL(PyObject*) +element_get_tail(ElementObject* self) +{ + /* return borrowed reference to text attribute */ + + PyObject *res = self->tail; + + if (JOIN_GET(res)) { + res = JOIN_OBJ(res); + if (PyList_CheckExact(res)) { + PyObject *tmp = list_join(res); + if (!tmp) + return NULL; + self->tail = tmp; + Py_SETREF(res, tmp); + } + } + + return res; +} + +static PyObject* +subelement(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject* elem; + + elementtreestate *st = get_elementtree_state(self); + ElementObject* parent; + PyObject* tag; + PyObject* attrib = NULL; + if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", + st->Element_Type, &parent, &tag, + &PyDict_Type, &attrib)) { + return NULL; + } + + if (attrib) { + /* attrib passed as positional arg */ + attrib = PyDict_Copy(attrib); + if (!attrib) + return NULL; + if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) { + Py_DECREF(attrib); + return NULL; + } + } else if (kwds) { + /* have keyword args */ + attrib = get_attrib_from_keywords(kwds); + if (!attrib) + return NULL; + } else { + /* no attrib arg, no kwds, so no attribute */ + } + + elem = create_new_element(st, tag, attrib); + Py_XDECREF(attrib); + if (elem == NULL) + return NULL; + + if (element_add_subelement(st, parent, elem) < 0) { + Py_DECREF(elem); + return NULL; + } + + return elem; +} + +static int +element_gc_traverse(ElementObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + Py_VISIT(self->tag); + Py_VISIT(JOIN_OBJ(self->text)); + Py_VISIT(JOIN_OBJ(self->tail)); + + if (self->extra) { + Py_ssize_t i; + Py_VISIT(self->extra->attrib); + + for (i = 0; i < self->extra->length; ++i) + Py_VISIT(self->extra->children[i]); + } + return 0; +} + +static int +element_gc_clear(ElementObject *self) +{ + Py_CLEAR(self->tag); + _clear_joined_ptr(&self->text); + _clear_joined_ptr(&self->tail); + + /* After dropping all references from extra, it's no longer valid anyway, + * so fully deallocate it. + */ + clear_extra(self); + return 0; +} + +static void +element_dealloc(ElementObject* self) +{ + PyTypeObject *tp = Py_TYPE(self); + + /* bpo-31095: UnTrack is needed before calling any callbacks */ + PyObject_GC_UnTrack(self); + Py_TRASHCAN_BEGIN(self, element_dealloc) + + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); + + /* element_gc_clear clears all references and deallocates extra + */ + element_gc_clear(self); + + tp->tp_free((PyObject *)self); + Py_DECREF(tp); + Py_TRASHCAN_END +} + +/* -------------------------------------------------------------------- */ + +/*[clinic input] +_elementtree.Element.append + + cls: defining_class + subelement: object(subclass_of='clinic_state()->Element_Type') + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_append_impl(ElementObject *self, PyTypeObject *cls, + PyObject *subelement) +/*[clinic end generated code: output=d00923711ea317fc input=8baf92679f9717b8]*/ +{ + elementtreestate *st = get_elementtree_state_by_cls(cls); + if (element_add_subelement(st, self, subelement) < 0) + return NULL; + + Py_RETURN_NONE; +} + +/*[clinic input] +_elementtree.Element.clear + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_clear_impl(ElementObject *self) +/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/ +{ + clear_extra(self); + + _set_joined_ptr(&self->text, Py_NewRef(Py_None)); + _set_joined_ptr(&self->tail, Py_NewRef(Py_None)); + + Py_RETURN_NONE; +} + +/*[clinic input] +_elementtree.Element.__copy__ + + cls: defining_class + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element___copy___impl(ElementObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=da22894421ff2b36 input=91edb92d9f441213]*/ +{ + Py_ssize_t i; + ElementObject* element; + elementtreestate *st = get_elementtree_state_by_cls(cls); + + element = (ElementObject*) create_new_element( + st, self->tag, self->extra ? self->extra->attrib : NULL); + if (!element) + return NULL; + + Py_INCREF(JOIN_OBJ(self->text)); + _set_joined_ptr(&element->text, self->text); + + Py_INCREF(JOIN_OBJ(self->tail)); + _set_joined_ptr(&element->tail, self->tail); + + assert(!element->extra || !element->extra->length); + if (self->extra) { + if (element_resize(element, self->extra->length) < 0) { + Py_DECREF(element); + return NULL; + } + + for (i = 0; i < self->extra->length; i++) { + element->extra->children[i] = Py_NewRef(self->extra->children[i]); + } + + assert(!element->extra->length); + element->extra->length = self->extra->length; + } + + return (PyObject*) element; +} + +/* Helper for a deep copy. */ +LOCAL(PyObject *) deepcopy(elementtreestate *, PyObject *, PyObject *); + +/*[clinic input] +_elementtree.Element.__deepcopy__ + + memo: object(subclass_of="&PyDict_Type") + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) +/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/ +{ + Py_ssize_t i; + ElementObject* element; + PyObject* tag; + PyObject* attrib; + PyObject* text; + PyObject* tail; + PyObject* id; + + PyTypeObject *tp = Py_TYPE(self); + elementtreestate *st = get_elementtree_state_by_type(tp); + tag = deepcopy(st, self->tag, memo); + if (!tag) + return NULL; + + if (self->extra && self->extra->attrib) { + attrib = deepcopy(st, self->extra->attrib, memo); + if (!attrib) { + Py_DECREF(tag); + return NULL; + } + } else { + attrib = NULL; + } + + element = (ElementObject*) create_new_element(st, tag, attrib); + + Py_DECREF(tag); + Py_XDECREF(attrib); + + if (!element) + return NULL; + + text = deepcopy(st, JOIN_OBJ(self->text), memo); + if (!text) + goto error; + _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text))); + + tail = deepcopy(st, JOIN_OBJ(self->tail), memo); + if (!tail) + goto error; + _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail))); + + assert(!element->extra || !element->extra->length); + if (self->extra) { + if (element_resize(element, self->extra->length) < 0) + goto error; + + for (i = 0; i < self->extra->length; i++) { + PyObject* child = deepcopy(st, self->extra->children[i], memo); + if (!child || !Element_Check(st, child)) { + if (child) { + raise_type_error(child); + Py_DECREF(child); + } + element->extra->length = i; + goto error; + } + element->extra->children[i] = child; + } + + assert(!element->extra->length); + element->extra->length = self->extra->length; + } + + /* add object to memo dictionary (so deepcopy won't visit it again) */ + id = PyLong_FromSsize_t((uintptr_t) self); + if (!id) + goto error; + + i = PyDict_SetItem(memo, id, (PyObject*) element); + + Py_DECREF(id); + + if (i < 0) + goto error; + + return (PyObject*) element; + + error: + Py_DECREF(element); + return NULL; +} + +LOCAL(PyObject *) +deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) +{ + /* do a deep copy of the given object */ + PyObject *stack[2]; + + /* Fast paths */ + if (object == Py_None || PyUnicode_CheckExact(object)) { + return Py_NewRef(object); + } + + if (Py_REFCNT(object) == 1) { + if (PyDict_CheckExact(object)) { + PyObject *key, *value; + Py_ssize_t pos = 0; + int simple = 1; + while (PyDict_Next(object, &pos, &key, &value)) { + if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) { + simple = 0; + break; + } + } + if (simple) + return PyDict_Copy(object); + /* Fall through to general case */ + } + else if (Element_CheckExact(st, object)) { + return _elementtree_Element___deepcopy___impl( + (ElementObject *)object, memo); + } + } + + /* General case */ + if (!st->deepcopy_obj) { + PyErr_SetString(PyExc_RuntimeError, + "deepcopy helper not found"); + return NULL; + } + + stack[0] = object; + stack[1] = memo; + return _PyObject_FastCall(st->deepcopy_obj, stack, 2); +} + + +/*[clinic input] +_elementtree.Element.__sizeof__ -> size_t + +[clinic start generated code]*/ + +static size_t +_elementtree_Element___sizeof___impl(ElementObject *self) +/*[clinic end generated code: output=baae4e7ae9fe04ec input=54e298c501f3e0d0]*/ +{ + size_t result = _PyObject_SIZE(Py_TYPE(self)); + if (self->extra) { + result += sizeof(ElementObjectExtra); + if (self->extra->children != self->extra->_children) { + result += (size_t)self->extra->allocated * sizeof(PyObject*); + } + } + return result; +} + +/* dict keys for getstate/setstate. */ +#define PICKLED_TAG "tag" +#define PICKLED_CHILDREN "_children" +#define PICKLED_ATTRIB "attrib" +#define PICKLED_TAIL "tail" +#define PICKLED_TEXT "text" + +/* __getstate__ returns a fabricated instance dict as in the pure-Python + * Element implementation, for interoperability/interchangeability. This + * makes the pure-Python implementation details an API, but (a) there aren't + * any unnecessary structures there; and (b) it buys compatibility with 3.2 + * pickles. See issue #16076. + */ +/*[clinic input] +_elementtree.Element.__getstate__ + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element___getstate___impl(ElementObject *self) +/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/ +{ + Py_ssize_t i; + PyObject *children, *attrib; + + /* Build a list of children. */ + children = PyList_New(self->extra ? self->extra->length : 0); + if (!children) + return NULL; + for (i = 0; i < PyList_GET_SIZE(children); i++) { + PyObject *child = Py_NewRef(self->extra->children[i]); + PyList_SET_ITEM(children, i, child); + } + + if (self->extra && self->extra->attrib) { + attrib = Py_NewRef(self->extra->attrib); + } + else { + attrib = PyDict_New(); + if (!attrib) { + Py_DECREF(children); + return NULL; + } + } + + return Py_BuildValue("{sOsNsNsOsO}", + PICKLED_TAG, self->tag, + PICKLED_CHILDREN, children, + PICKLED_ATTRIB, attrib, + PICKLED_TEXT, JOIN_OBJ(self->text), + PICKLED_TAIL, JOIN_OBJ(self->tail)); +} + +static PyObject * +element_setstate_from_attributes(elementtreestate *st, + ElementObject *self, + PyObject *tag, + PyObject *attrib, + PyObject *text, + PyObject *tail, + PyObject *children) +{ + Py_ssize_t i, nchildren; + ElementObjectExtra *oldextra = NULL; + + if (!tag) { + PyErr_SetString(PyExc_TypeError, "tag may not be NULL"); + return NULL; + } + + Py_XSETREF(self->tag, Py_NewRef(tag)); + + text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None; + Py_INCREF(JOIN_OBJ(text)); + _set_joined_ptr(&self->text, text); + + tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None; + Py_INCREF(JOIN_OBJ(tail)); + _set_joined_ptr(&self->tail, tail); + + /* Handle ATTRIB and CHILDREN. */ + if (!children && !attrib) { + Py_RETURN_NONE; + } + + /* Compute 'nchildren'. */ + if (children) { + if (!PyList_Check(children)) { + PyErr_SetString(PyExc_TypeError, "'_children' is not a list"); + return NULL; + } + nchildren = PyList_GET_SIZE(children); + + /* (Re-)allocate 'extra'. + Avoid DECREFs calling into this code again (cycles, etc.) + */ + oldextra = self->extra; + self->extra = NULL; + if (element_resize(self, nchildren)) { + assert(!self->extra || !self->extra->length); + clear_extra(self); + self->extra = oldextra; + return NULL; + } + assert(self->extra); + assert(self->extra->allocated >= nchildren); + if (oldextra) { + assert(self->extra->attrib == NULL); + self->extra->attrib = oldextra->attrib; + oldextra->attrib = NULL; + } + + /* Copy children */ + for (i = 0; i < nchildren; i++) { + PyObject *child = PyList_GET_ITEM(children, i); + if (!Element_Check(st, child)) { + raise_type_error(child); + self->extra->length = i; + dealloc_extra(oldextra); + return NULL; + } + self->extra->children[i] = Py_NewRef(child); + } + + assert(!self->extra->length); + self->extra->length = nchildren; + } + else { + if (element_resize(self, 0)) { + return NULL; + } + } + + /* Stash attrib. */ + Py_XSETREF(self->extra->attrib, Py_XNewRef(attrib)); + dealloc_extra(oldextra); + + Py_RETURN_NONE; +} + +/* __setstate__ for Element instance from the Python implementation. + * 'state' should be the instance dict. + */ + +static PyObject * +element_setstate_from_Python(elementtreestate *st, ElementObject *self, + PyObject *state) +{ + static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT, + PICKLED_TAIL, PICKLED_CHILDREN, 0}; + PyObject *args; + PyObject *tag, *attrib, *text, *tail, *children; + PyObject *retval; + + tag = attrib = text = tail = children = NULL; + args = PyTuple_New(0); + if (!args) + return NULL; + + if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag, + &attrib, &text, &tail, &children)) + retval = element_setstate_from_attributes(st, self, tag, attrib, text, + tail, children); + else + retval = NULL; + + Py_DECREF(args); + return retval; +} + +/*[clinic input] +_elementtree.Element.__setstate__ + + cls: defining_class + state: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element___setstate___impl(ElementObject *self, + PyTypeObject *cls, PyObject *state) +/*[clinic end generated code: output=598bfb5730f71509 input=13830488d35d51f7]*/ +{ + if (!PyDict_CheckExact(state)) { + PyErr_Format(PyExc_TypeError, + "Don't know how to unpickle \"%.200R\" as an Element", + state); + return NULL; + } + else { + elementtreestate *st = get_elementtree_state_by_cls(cls); + return element_setstate_from_Python(st, self, state); + } +} + +LOCAL(int) +checkpath(PyObject* tag) +{ + Py_ssize_t i; + int check = 1; + + /* check if a tag contains an xpath character */ + +#define PATHCHAR(ch) \ + (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') + + if (PyUnicode_Check(tag)) { + const Py_ssize_t len = PyUnicode_GET_LENGTH(tag); + const void *data = PyUnicode_DATA(tag); + int kind = PyUnicode_KIND(tag); + if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && ( + PyUnicode_READ(kind, data, 1) == '}' || ( + PyUnicode_READ(kind, data, 1) == '*' && + PyUnicode_READ(kind, data, 2) == '}'))) { + /* wildcard: '{}tag' or '{*}tag' */ + return 1; + } + for (i = 0; i < len; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (ch == '{') + check = 0; + else if (ch == '}') + check = 1; + else if (check && PATHCHAR(ch)) + return 1; + } + return 0; + } + if (PyBytes_Check(tag)) { + const char *p = PyBytes_AS_STRING(tag); + const Py_ssize_t len = PyBytes_GET_SIZE(tag); + if (len >= 3 && p[0] == '{' && ( + p[1] == '}' || (p[1] == '*' && p[2] == '}'))) { + /* wildcard: '{}tag' or '{*}tag' */ + return 1; + } + for (i = 0; i < len; i++) { + if (p[i] == '{') + check = 0; + else if (p[i] == '}') + check = 1; + else if (check && PATHCHAR(p[i])) + return 1; + } + return 0; + } + + return 1; /* unknown type; might be path expression */ +} + +/*[clinic input] +_elementtree.Element.extend + + cls: defining_class + elements: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_extend_impl(ElementObject *self, PyTypeObject *cls, + PyObject *elements) +/*[clinic end generated code: output=3e86d37fac542216 input=6479b1b5379d09ae]*/ +{ + PyObject* seq; + Py_ssize_t i; + + seq = PySequence_Fast(elements, ""); + if (!seq) { + PyErr_Format( + PyExc_TypeError, + "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name + ); + return NULL; + } + + elementtreestate *st = get_elementtree_state_by_cls(cls); + for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) { + PyObject* element = Py_NewRef(PySequence_Fast_GET_ITEM(seq, i)); + if (element_add_subelement(st, self, element) < 0) { + Py_DECREF(seq); + Py_DECREF(element); + return NULL; + } + Py_DECREF(element); + } + + Py_DECREF(seq); + + Py_RETURN_NONE; +} + +/*[clinic input] +_elementtree.Element.find + + cls: defining_class + / + path: object + namespaces: object = None + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_find_impl(ElementObject *self, PyTypeObject *cls, + PyObject *path, PyObject *namespaces) +/*[clinic end generated code: output=18f77d393c9fef1b input=94df8a83f956acc6]*/ +{ + Py_ssize_t i; + elementtreestate *st = get_elementtree_state_by_cls(cls); + + if (checkpath(path) || namespaces != Py_None) { + return PyObject_CallMethodObjArgs( + st->elementpath_obj, st->str_find, self, path, namespaces, NULL + ); + } + + if (!self->extra) + Py_RETURN_NONE; + + for (i = 0; i < self->extra->length; i++) { + PyObject* item = self->extra->children[i]; + int rc; + assert(Element_Check(st, item)); + Py_INCREF(item); + rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); + if (rc > 0) + return item; + Py_DECREF(item); + if (rc < 0) + return NULL; + } + + Py_RETURN_NONE; +} + +/*[clinic input] +_elementtree.Element.findtext + + cls: defining_class + / + path: object + default: object = None + namespaces: object = None + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_findtext_impl(ElementObject *self, PyTypeObject *cls, + PyObject *path, PyObject *default_value, + PyObject *namespaces) +/*[clinic end generated code: output=6af7a2d96aac32cb input=32f252099f62a3d2]*/ +{ + Py_ssize_t i; + elementtreestate *st = get_elementtree_state_by_cls(cls); + + if (checkpath(path) || namespaces != Py_None) + return PyObject_CallMethodObjArgs( + st->elementpath_obj, st->str_findtext, + self, path, default_value, namespaces, NULL + ); + + if (!self->extra) { + return Py_NewRef(default_value); + } + + for (i = 0; i < self->extra->length; i++) { + PyObject *item = self->extra->children[i]; + int rc; + assert(Element_Check(st, item)); + Py_INCREF(item); + rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); + if (rc > 0) { + PyObject* text = element_get_text((ElementObject*)item); + if (text == Py_None) { + Py_DECREF(item); + return PyUnicode_New(0, 0); + } + Py_XINCREF(text); + Py_DECREF(item); + return text; + } + Py_DECREF(item); + if (rc < 0) + return NULL; + } + + return Py_NewRef(default_value); +} + +/*[clinic input] +_elementtree.Element.findall + + cls: defining_class + / + path: object + namespaces: object = None + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_findall_impl(ElementObject *self, PyTypeObject *cls, + PyObject *path, PyObject *namespaces) +/*[clinic end generated code: output=65e39a1208f3b59e input=7aa0db45673fc9a5]*/ +{ + Py_ssize_t i; + PyObject* out; + elementtreestate *st = get_elementtree_state_by_cls(cls); + + if (checkpath(path) || namespaces != Py_None) { + return PyObject_CallMethodObjArgs( + st->elementpath_obj, st->str_findall, self, path, namespaces, NULL + ); + } + + out = PyList_New(0); + if (!out) + return NULL; + + if (!self->extra) + return out; + + for (i = 0; i < self->extra->length; i++) { + PyObject* item = self->extra->children[i]; + int rc; + assert(Element_Check(st, item)); + Py_INCREF(item); + rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); + if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) { + Py_DECREF(item); + Py_DECREF(out); + return NULL; + } + Py_DECREF(item); + } + + return out; +} + +/*[clinic input] +_elementtree.Element.iterfind + + cls: defining_class + / + path: object + namespaces: object = None + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_iterfind_impl(ElementObject *self, PyTypeObject *cls, + PyObject *path, PyObject *namespaces) +/*[clinic end generated code: output=be5c3f697a14e676 input=88766875a5c9a88b]*/ +{ + PyObject* tag = path; + elementtreestate *st = get_elementtree_state_by_cls(cls); + + return PyObject_CallMethodObjArgs( + st->elementpath_obj, st->str_iterfind, self, tag, namespaces, NULL); +} + +/*[clinic input] +_elementtree.Element.get + + key: object + default: object = None + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_get_impl(ElementObject *self, PyObject *key, + PyObject *default_value) +/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/ +{ + if (self->extra && self->extra->attrib) { + PyObject *attrib = Py_NewRef(self->extra->attrib); + PyObject *value = Py_XNewRef(PyDict_GetItemWithError(attrib, key)); + Py_DECREF(attrib); + if (value != NULL || PyErr_Occurred()) { + return value; + } + } + + return Py_NewRef(default_value); +} + +static PyObject * +create_elementiter(elementtreestate *st, ElementObject *self, PyObject *tag, + int gettext); + + +/*[clinic input] +_elementtree.Element.iter + + cls: defining_class + / + tag: object = None + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_iter_impl(ElementObject *self, PyTypeObject *cls, + PyObject *tag) +/*[clinic end generated code: output=bff29dc5d4566c68 input=f6944c48d3f84c58]*/ +{ + if (PyUnicode_Check(tag)) { + if (PyUnicode_READY(tag) < 0) + return NULL; + if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*') + tag = Py_None; + } + else if (PyBytes_Check(tag)) { + if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*') + tag = Py_None; + } + + elementtreestate *st = get_elementtree_state_by_cls(cls); + return create_elementiter(st, self, tag, 0); +} + + +/*[clinic input] +_elementtree.Element.itertext + + cls: defining_class + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_itertext_impl(ElementObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=fdeb2a3bca0ae063 input=a1ef1f0fc872a586]*/ +{ + elementtreestate *st = get_elementtree_state_by_cls(cls); + return create_elementiter(st, self, Py_None, 1); +} + + +static PyObject* +element_getitem(PyObject* self_, Py_ssize_t index) +{ + ElementObject* self = (ElementObject*) self_; + + if (!self->extra || index < 0 || index >= self->extra->length) { + PyErr_SetString( + PyExc_IndexError, + "child index out of range" + ); + return NULL; + } + + return Py_NewRef(self->extra->children[index]); +} + +static int +element_bool(PyObject* self_) +{ + ElementObject* self = (ElementObject*) self_; + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "Testing an element's truth value will raise an exception " + "in future versions. Use specific 'len(elem)' or " + "'elem is not None' test instead.", + 1) < 0) { + return -1; + }; + if (self->extra ? self->extra->length : 0) { + return 1; + } + return 0; +} + +/*[clinic input] +_elementtree.Element.insert + + index: Py_ssize_t + subelement: object(subclass_of='clinic_state()->Element_Type') + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index, + PyObject *subelement) +/*[clinic end generated code: output=990adfef4d424c0b input=9530f4905aa401ca]*/ +{ + Py_ssize_t i; + + if (!self->extra) { + if (create_extra(self, NULL) < 0) + return NULL; + } + + if (index < 0) { + index += self->extra->length; + if (index < 0) + index = 0; + } + if (index > self->extra->length) + index = self->extra->length; + + if (element_resize(self, 1) < 0) + return NULL; + + for (i = self->extra->length; i > index; i--) + self->extra->children[i] = self->extra->children[i-1]; + + self->extra->children[index] = Py_NewRef(subelement); + + self->extra->length++; + + Py_RETURN_NONE; +} + +/*[clinic input] +_elementtree.Element.items + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_items_impl(ElementObject *self) +/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/ +{ + if (!self->extra || !self->extra->attrib) + return PyList_New(0); + + return PyDict_Items(self->extra->attrib); +} + +/*[clinic input] +_elementtree.Element.keys + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_keys_impl(ElementObject *self) +/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/ +{ + if (!self->extra || !self->extra->attrib) + return PyList_New(0); + + return PyDict_Keys(self->extra->attrib); +} + +static Py_ssize_t +element_length(ElementObject* self) +{ + if (!self->extra) + return 0; + + return self->extra->length; +} + +/*[clinic input] +_elementtree.Element.makeelement + + cls: defining_class + tag: object + attrib: object(subclass_of='&PyDict_Type') + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_makeelement_impl(ElementObject *self, PyTypeObject *cls, + PyObject *tag, PyObject *attrib) +/*[clinic end generated code: output=d50bb17a47077d47 input=589829dab92f26e8]*/ +{ + PyObject* elem; + + attrib = PyDict_Copy(attrib); + if (!attrib) + return NULL; + + elementtreestate *st = get_elementtree_state_by_cls(cls); + elem = create_new_element(st, tag, attrib); + + Py_DECREF(attrib); + + return elem; +} + +/*[clinic input] +_elementtree.Element.remove + + subelement: object(subclass_of='clinic_state()->Element_Type') + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) +/*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/ +{ + Py_ssize_t i; + int rc; + PyObject *found; + + if (!self->extra) { + /* element has no children, so raise exception */ + PyErr_SetString( + PyExc_ValueError, + "list.remove(x): x not in list" + ); + return NULL; + } + + for (i = 0; i < self->extra->length; i++) { + if (self->extra->children[i] == subelement) + break; + rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ); + if (rc > 0) + break; + if (rc < 0) + return NULL; + } + + if (i >= self->extra->length) { + /* subelement is not in children, so raise exception */ + PyErr_SetString( + PyExc_ValueError, + "list.remove(x): x not in list" + ); + return NULL; + } + + found = self->extra->children[i]; + + self->extra->length--; + for (; i < self->extra->length; i++) + self->extra->children[i] = self->extra->children[i+1]; + + Py_DECREF(found); + Py_RETURN_NONE; +} + +static PyObject* +element_repr(ElementObject* self) +{ + int status; + + if (self->tag == NULL) + return PyUnicode_FromFormat("<Element at %p>", self); + + status = Py_ReprEnter((PyObject *)self); + if (status == 0) { + PyObject *res; + res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self); + Py_ReprLeave((PyObject *)self); + return res; + } + if (status > 0) + PyErr_Format(PyExc_RuntimeError, + "reentrant call inside %s.__repr__", + Py_TYPE(self)->tp_name); + return NULL; +} + +/*[clinic input] +_elementtree.Element.set + + key: object + value: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_Element_set_impl(ElementObject *self, PyObject *key, + PyObject *value) +/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/ +{ + PyObject* attrib; + + if (!self->extra) { + if (create_extra(self, NULL) < 0) + return NULL; + } + + attrib = element_get_attrib(self); + if (!attrib) + return NULL; + + if (PyDict_SetItem(attrib, key, value) < 0) + return NULL; + + Py_RETURN_NONE; +} + +static int +element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) +{ + ElementObject* self = (ElementObject*) self_; + Py_ssize_t i; + PyObject* old; + + if (!self->extra || index < 0 || index >= self->extra->length) { + PyErr_SetString( + PyExc_IndexError, + "child assignment index out of range"); + return -1; + } + + old = self->extra->children[index]; + + if (item) { + PyTypeObject *tp = Py_TYPE(self); + elementtreestate *st = get_elementtree_state_by_type(tp); + if (!Element_Check(st, item)) { + raise_type_error(item); + return -1; + } + self->extra->children[index] = Py_NewRef(item); + } else { + self->extra->length--; + for (i = index; i < self->extra->length; i++) + self->extra->children[i] = self->extra->children[i+1]; + } + + Py_DECREF(old); + + return 0; +} + +static PyObject* +element_subscr(PyObject* self_, PyObject* item) +{ + ElementObject* self = (ElementObject*) self_; + + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); + + if (i == -1 && PyErr_Occurred()) { + return NULL; + } + if (i < 0 && self->extra) + i += self->extra->length; + return element_getitem(self_, i); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelen, i; + size_t cur; + PyObject* list; + + if (!self->extra) + return PyList_New(0); + + if (PySlice_Unpack(item, &start, &stop, &step) < 0) { + return NULL; + } + slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop, + step); + + if (slicelen <= 0) + return PyList_New(0); + else { + list = PyList_New(slicelen); + if (!list) + return NULL; + + for (cur = start, i = 0; i < slicelen; + cur += step, i++) { + PyObject* item = Py_NewRef(self->extra->children[cur]); + PyList_SET_ITEM(list, i, item); + } + + return list; + } + } + else { + PyErr_SetString(PyExc_TypeError, + "element indices must be integers"); + return NULL; + } +} + +static int +element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) +{ + ElementObject* self = (ElementObject*) self_; + + if (PyIndex_Check(item)) { + Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); + + if (i == -1 && PyErr_Occurred()) { + return -1; + } + if (i < 0 && self->extra) + i += self->extra->length; + return element_setitem(self_, i, value); + } + else if (PySlice_Check(item)) { + Py_ssize_t start, stop, step, slicelen, newlen, i; + size_t cur; + + PyObject* recycle = NULL; + PyObject* seq; + + if (!self->extra) { + if (create_extra(self, NULL) < 0) + return -1; + } + + if (PySlice_Unpack(item, &start, &stop, &step) < 0) { + return -1; + } + slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop, + step); + + if (value == NULL) { + /* Delete slice */ + size_t cur; + Py_ssize_t i; + + if (slicelen <= 0) + return 0; + + /* Since we're deleting, the direction of the range doesn't matter, + * so for simplicity make it always ascending. + */ + if (step < 0) { + stop = start + 1; + start = stop + step * (slicelen - 1) - 1; + step = -step; + } + + assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *)); + + /* recycle is a list that will contain all the children + * scheduled for removal. + */ + if (!(recycle = PyList_New(slicelen))) { + return -1; + } + + /* This loop walks over all the children that have to be deleted, + * with cur pointing at them. num_moved is the amount of children + * until the next deleted child that have to be "shifted down" to + * occupy the deleted's places. + * Note that in the ith iteration, shifting is done i+i places down + * because i children were already removed. + */ + for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) { + /* Compute how many children have to be moved, clipping at the + * list end. + */ + Py_ssize_t num_moved = step - 1; + if (cur + step >= (size_t)self->extra->length) { + num_moved = self->extra->length - cur - 1; + } + + PyList_SET_ITEM(recycle, i, self->extra->children[cur]); + + memmove( + self->extra->children + cur - i, + self->extra->children + cur + 1, + num_moved * sizeof(PyObject *)); + } + + /* Leftover "tail" after the last removed child */ + cur = start + (size_t)slicelen * step; + if (cur < (size_t)self->extra->length) { + memmove( + self->extra->children + cur - slicelen, + self->extra->children + cur, + (self->extra->length - cur) * sizeof(PyObject *)); + } + + self->extra->length -= slicelen; + + /* Discard the recycle list with all the deleted sub-elements */ + Py_DECREF(recycle); + return 0; + } + + /* A new slice is actually being assigned */ + seq = PySequence_Fast(value, ""); + if (!seq) { + PyErr_Format( + PyExc_TypeError, + "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name + ); + return -1; + } + newlen = PySequence_Fast_GET_SIZE(seq); + + if (step != 1 && newlen != slicelen) + { + Py_DECREF(seq); + PyErr_Format(PyExc_ValueError, + "attempt to assign sequence of size %zd " + "to extended slice of size %zd", + newlen, slicelen + ); + return -1; + } + + /* Resize before creating the recycle bin, to prevent refleaks. */ + if (newlen > slicelen) { + if (element_resize(self, newlen - slicelen) < 0) { + Py_DECREF(seq); + return -1; + } + } + + PyTypeObject *tp = Py_TYPE(self); + elementtreestate *st = get_elementtree_state_by_type(tp); + for (i = 0; i < newlen; i++) { + PyObject *element = PySequence_Fast_GET_ITEM(seq, i); + if (!Element_Check(st, element)) { + raise_type_error(element); + Py_DECREF(seq); + return -1; + } + } + + if (slicelen > 0) { + /* to avoid recursive calls to this method (via decref), move + old items to the recycle bin here, and get rid of them when + we're done modifying the element */ + recycle = PyList_New(slicelen); + if (!recycle) { + Py_DECREF(seq); + return -1; + } + for (cur = start, i = 0; i < slicelen; + cur += step, i++) + PyList_SET_ITEM(recycle, i, self->extra->children[cur]); + } + + if (newlen < slicelen) { + /* delete slice */ + for (i = stop; i < self->extra->length; i++) + self->extra->children[i + newlen - slicelen] = self->extra->children[i]; + } else if (newlen > slicelen) { + /* insert slice */ + for (i = self->extra->length-1; i >= stop; i--) + self->extra->children[i + newlen - slicelen] = self->extra->children[i]; + } + + /* replace the slice */ + for (cur = start, i = 0; i < newlen; + cur += step, i++) { + PyObject* element = PySequence_Fast_GET_ITEM(seq, i); + self->extra->children[cur] = Py_NewRef(element); + } + + self->extra->length += newlen - slicelen; + + Py_DECREF(seq); + + /* discard the recycle bin, and everything in it */ + Py_XDECREF(recycle); + + return 0; + } + else { + PyErr_SetString(PyExc_TypeError, + "element indices must be integers"); + return -1; + } +} + +static PyObject* +element_tag_getter(ElementObject *self, void *closure) +{ + PyObject *res = self->tag; + return Py_NewRef(res); +} + +static PyObject* +element_text_getter(ElementObject *self, void *closure) +{ + PyObject *res = element_get_text(self); + return Py_XNewRef(res); +} + +static PyObject* +element_tail_getter(ElementObject *self, void *closure) +{ + PyObject *res = element_get_tail(self); + return Py_XNewRef(res); +} + +static PyObject* +element_attrib_getter(ElementObject *self, void *closure) +{ + PyObject *res; + if (!self->extra) { + if (create_extra(self, NULL) < 0) + return NULL; + } + res = element_get_attrib(self); + return Py_XNewRef(res); +} + +/* macro for setter validation */ +#define _VALIDATE_ATTR_VALUE(V) \ + if ((V) == NULL) { \ + PyErr_SetString( \ + PyExc_AttributeError, \ + "can't delete element attribute"); \ + return -1; \ + } + +static int +element_tag_setter(ElementObject *self, PyObject *value, void *closure) +{ + _VALIDATE_ATTR_VALUE(value); + Py_SETREF(self->tag, Py_NewRef(value)); + return 0; +} + +static int +element_text_setter(ElementObject *self, PyObject *value, void *closure) +{ + _VALIDATE_ATTR_VALUE(value); + _set_joined_ptr(&self->text, Py_NewRef(value)); + return 0; +} + +static int +element_tail_setter(ElementObject *self, PyObject *value, void *closure) +{ + _VALIDATE_ATTR_VALUE(value); + _set_joined_ptr(&self->tail, Py_NewRef(value)); + return 0; +} + +static int +element_attrib_setter(ElementObject *self, PyObject *value, void *closure) +{ + _VALIDATE_ATTR_VALUE(value); + if (!PyDict_Check(value)) { + PyErr_Format(PyExc_TypeError, + "attrib must be dict, not %.200s", + Py_TYPE(value)->tp_name); + return -1; + } + if (!self->extra) { + if (create_extra(self, NULL) < 0) + return -1; + } + Py_XSETREF(self->extra->attrib, Py_NewRef(value)); + return 0; +} + +/******************************* Element iterator ****************************/ + +/* ElementIterObject represents the iteration state over an XML element in + * pre-order traversal. To keep track of which sub-element should be returned + * next, a stack of parents is maintained. This is a standard stack-based + * iterative pre-order traversal of a tree. + * The stack is managed using a continuous array. + * Each stack item contains the saved parent to which we should return after + * the current one is exhausted, and the next child to examine in that parent. + */ +typedef struct ParentLocator_t { + ElementObject *parent; + Py_ssize_t child_index; +} ParentLocator; + +typedef struct { + PyObject_HEAD + ParentLocator *parent_stack; + Py_ssize_t parent_stack_used; + Py_ssize_t parent_stack_size; + ElementObject *root_element; + PyObject *sought_tag; + int gettext; +} ElementIterObject; + + +static void +elementiter_dealloc(ElementIterObject *it) +{ + PyTypeObject *tp = Py_TYPE(it); + Py_ssize_t i = it->parent_stack_used; + it->parent_stack_used = 0; + /* bpo-31095: UnTrack is needed before calling any callbacks */ + PyObject_GC_UnTrack(it); + while (i--) + Py_XDECREF(it->parent_stack[i].parent); + PyMem_Free(it->parent_stack); + + Py_XDECREF(it->sought_tag); + Py_XDECREF(it->root_element); + + tp->tp_free(it); + Py_DECREF(tp); +} + +static int +elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg) +{ + Py_ssize_t i = it->parent_stack_used; + while (i--) + Py_VISIT(it->parent_stack[i].parent); + + Py_VISIT(it->root_element); + Py_VISIT(it->sought_tag); + Py_VISIT(Py_TYPE(it)); + return 0; +} + +/* Helper function for elementiter_next. Add a new parent to the parent stack. + */ +static int +parent_stack_push_new(ElementIterObject *it, ElementObject *parent) +{ + ParentLocator *item; + + if (it->parent_stack_used >= it->parent_stack_size) { + Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */ + ParentLocator *parent_stack = it->parent_stack; + PyMem_Resize(parent_stack, ParentLocator, new_size); + if (parent_stack == NULL) + return -1; + it->parent_stack = parent_stack; + it->parent_stack_size = new_size; + } + item = it->parent_stack + it->parent_stack_used++; + item->parent = (ElementObject*)Py_NewRef(parent); + item->child_index = 0; + return 0; +} + +static PyObject * +elementiter_next(ElementIterObject *it) +{ + /* Sub-element iterator. + * + * A short note on gettext: this function serves both the iter() and + * itertext() methods to avoid code duplication. However, there are a few + * small differences in the way these iterations work. Namely: + * - itertext() only yields text from nodes that have it, and continues + * iterating when a node doesn't have text (so it doesn't return any + * node like iter()) + * - itertext() also has to handle tail, after finishing with all the + * children of a node. + */ + int rc; + ElementObject *elem; + PyObject *text; + + while (1) { + /* Handle the case reached in the beginning and end of iteration, where + * the parent stack is empty. If root_element is NULL and we're here, the + * iterator is exhausted. + */ + if (!it->parent_stack_used) { + if (!it->root_element) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + elem = it->root_element; /* steals a reference */ + it->root_element = NULL; + } + else { + /* See if there are children left to traverse in the current parent. If + * yes, visit the next child. If not, pop the stack and try again. + */ + ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1]; + Py_ssize_t child_index = item->child_index; + ElementObjectExtra *extra; + elem = item->parent; + extra = elem->extra; + if (!extra || child_index >= extra->length) { + it->parent_stack_used--; + /* Note that extra condition on it->parent_stack_used here; + * this is because itertext() is supposed to only return *inner* + * text, not text following the element it began iteration with. + */ + if (it->gettext && it->parent_stack_used) { + text = element_get_tail(elem); + goto gettext; + } + Py_DECREF(elem); + continue; + } + +#ifndef NDEBUG + PyTypeObject *tp = Py_TYPE(it); + elementtreestate *st = get_elementtree_state_by_type(tp); + assert(Element_Check(st, extra->children[child_index])); +#endif + elem = (ElementObject *)Py_NewRef(extra->children[child_index]); + item->child_index++; + } + + if (parent_stack_push_new(it, elem) < 0) { + Py_DECREF(elem); + PyErr_NoMemory(); + return NULL; + } + if (it->gettext) { + text = element_get_text(elem); + goto gettext; + } + + if (it->sought_tag == Py_None) + return (PyObject *)elem; + + rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ); + if (rc > 0) + return (PyObject *)elem; + + Py_DECREF(elem); + if (rc < 0) + return NULL; + continue; + +gettext: + if (!text) { + Py_DECREF(elem); + return NULL; + } + if (text == Py_None) { + Py_DECREF(elem); + } + else { + Py_INCREF(text); + Py_DECREF(elem); + rc = PyObject_IsTrue(text); + if (rc > 0) + return text; + Py_DECREF(text); + if (rc < 0) + return NULL; + } + } + + return NULL; +} + +static PyType_Slot elementiter_slots[] = { + {Py_tp_dealloc, elementiter_dealloc}, + {Py_tp_traverse, elementiter_traverse}, + {Py_tp_iter, PyObject_SelfIter}, + {Py_tp_iternext, elementiter_next}, + {0, NULL}, +}; + +static PyType_Spec elementiter_spec = { + /* Using the module's name since the pure-Python implementation does not + have such a type. */ + .name = "_elementtree._element_iterator", + .basicsize = sizeof(ElementIterObject), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION), + .slots = elementiter_slots, +}; + +#define INIT_PARENT_STACK_SIZE 8 + +static PyObject * +create_elementiter(elementtreestate *st, ElementObject *self, PyObject *tag, + int gettext) +{ + ElementIterObject *it; + + it = PyObject_GC_New(ElementIterObject, st->ElementIter_Type); + if (!it) + return NULL; + + it->sought_tag = Py_NewRef(tag); + it->gettext = gettext; + it->root_element = (ElementObject*)Py_NewRef(self); + + it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE); + if (it->parent_stack == NULL) { + Py_DECREF(it); + PyErr_NoMemory(); + return NULL; + } + it->parent_stack_used = 0; + it->parent_stack_size = INIT_PARENT_STACK_SIZE; + + PyObject_GC_Track(it); + + return (PyObject *)it; +} + + +/* ==================================================================== */ +/* the tree builder type */ + +typedef struct { + PyObject_HEAD + + PyObject *root; /* root node (first created node) */ + + PyObject *this; /* current node */ + PyObject *last; /* most recently created node */ + PyObject *last_for_tail; /* most recently created node that takes a tail */ + + PyObject *data; /* data collector (string or list), or NULL */ + + PyObject *stack; /* element stack */ + Py_ssize_t index; /* current stack size (0 means empty) */ + + PyObject *element_factory; + PyObject *comment_factory; + PyObject *pi_factory; + + /* element tracing */ + PyObject *events_append; /* the append method of the list of events, or NULL */ + PyObject *start_event_obj; /* event objects (NULL to ignore) */ + PyObject *end_event_obj; + PyObject *start_ns_event_obj; + PyObject *end_ns_event_obj; + PyObject *comment_event_obj; + PyObject *pi_event_obj; + + char insert_comments; + char insert_pis; + elementtreestate *state; +} TreeBuilderObject; + +#define TreeBuilder_CheckExact(st, op) Py_IS_TYPE((op), (st)->TreeBuilder_Type) + +/* -------------------------------------------------------------------- */ +/* constructor and destructor */ + +static PyObject * +treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0); + if (t != NULL) { + t->root = NULL; + t->this = Py_NewRef(Py_None); + t->last = Py_NewRef(Py_None); + t->data = NULL; + t->element_factory = NULL; + t->comment_factory = NULL; + t->pi_factory = NULL; + t->stack = PyList_New(20); + if (!t->stack) { + Py_DECREF(t->this); + Py_DECREF(t->last); + Py_DECREF((PyObject *) t); + return NULL; + } + t->index = 0; + + t->events_append = NULL; + t->start_event_obj = t->end_event_obj = NULL; + t->start_ns_event_obj = t->end_ns_event_obj = NULL; + t->comment_event_obj = t->pi_event_obj = NULL; + t->insert_comments = t->insert_pis = 0; + t->state = get_elementtree_state_by_type(type); + } + return (PyObject *)t; +} + +/*[clinic input] +_elementtree.TreeBuilder.__init__ + + element_factory: object = None + * + comment_factory: object = None + pi_factory: object = None + insert_comments: bool = False + insert_pis: bool = False + +[clinic start generated code]*/ + +static int +_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, + PyObject *element_factory, + PyObject *comment_factory, + PyObject *pi_factory, + int insert_comments, int insert_pis) +/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/ +{ + if (element_factory != Py_None) { + Py_XSETREF(self->element_factory, Py_NewRef(element_factory)); + } else { + Py_CLEAR(self->element_factory); + } + + if (comment_factory == Py_None) { + elementtreestate *st = self->state; + comment_factory = st->comment_factory; + } + if (comment_factory) { + Py_XSETREF(self->comment_factory, Py_NewRef(comment_factory)); + self->insert_comments = insert_comments; + } else { + Py_CLEAR(self->comment_factory); + self->insert_comments = 0; + } + + if (pi_factory == Py_None) { + elementtreestate *st = self->state; + pi_factory = st->pi_factory; + } + if (pi_factory) { + Py_XSETREF(self->pi_factory, Py_NewRef(pi_factory)); + self->insert_pis = insert_pis; + } else { + Py_CLEAR(self->pi_factory); + self->insert_pis = 0; + } + + return 0; +} + +static int +treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + Py_VISIT(self->pi_event_obj); + Py_VISIT(self->comment_event_obj); + Py_VISIT(self->end_ns_event_obj); + Py_VISIT(self->start_ns_event_obj); + Py_VISIT(self->end_event_obj); + Py_VISIT(self->start_event_obj); + Py_VISIT(self->events_append); + Py_VISIT(self->root); + Py_VISIT(self->this); + Py_VISIT(self->last); + Py_VISIT(self->last_for_tail); + Py_VISIT(self->data); + Py_VISIT(self->stack); + Py_VISIT(self->pi_factory); + Py_VISIT(self->comment_factory); + Py_VISIT(self->element_factory); + return 0; +} + +static int +treebuilder_gc_clear(TreeBuilderObject *self) +{ + Py_CLEAR(self->pi_event_obj); + Py_CLEAR(self->comment_event_obj); + Py_CLEAR(self->end_ns_event_obj); + Py_CLEAR(self->start_ns_event_obj); + Py_CLEAR(self->end_event_obj); + Py_CLEAR(self->start_event_obj); + Py_CLEAR(self->events_append); + Py_CLEAR(self->stack); + Py_CLEAR(self->data); + Py_CLEAR(self->last); + Py_CLEAR(self->last_for_tail); + Py_CLEAR(self->this); + Py_CLEAR(self->pi_factory); + Py_CLEAR(self->comment_factory); + Py_CLEAR(self->element_factory); + Py_CLEAR(self->root); + return 0; +} + +static void +treebuilder_dealloc(TreeBuilderObject *self) +{ + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_UnTrack(self); + treebuilder_gc_clear(self); + tp->tp_free(self); + Py_DECREF(tp); +} + +/* -------------------------------------------------------------------- */ +/* helpers for handling of arbitrary element-like objects */ + +/*[clinic input] +_elementtree._set_factories + + comment_factory: object + pi_factory: object + / + +Change the factories used to create comments and processing instructions. + +For internal use only. +[clinic start generated code]*/ + +static PyObject * +_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory, + PyObject *pi_factory) +/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/ +{ + elementtreestate *st = get_elementtree_state(module); + PyObject *old; + + if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) { + PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s", + Py_TYPE(comment_factory)->tp_name); + return NULL; + } + if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) { + PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s", + Py_TYPE(pi_factory)->tp_name); + return NULL; + } + + old = PyTuple_Pack(2, + st->comment_factory ? st->comment_factory : Py_None, + st->pi_factory ? st->pi_factory : Py_None); + + if (comment_factory == Py_None) { + Py_CLEAR(st->comment_factory); + } else { + Py_XSETREF(st->comment_factory, Py_NewRef(comment_factory)); + } + if (pi_factory == Py_None) { + Py_CLEAR(st->pi_factory); + } else { + Py_XSETREF(st->pi_factory, Py_NewRef(pi_factory)); + } + + return old; +} + +static int +treebuilder_extend_element_text_or_tail(elementtreestate *st, PyObject *element, + PyObject **data, PyObject **dest, + PyObject *name) +{ + /* Fast paths for the "almost always" cases. */ + if (Element_CheckExact(st, element)) { + PyObject *dest_obj = JOIN_OBJ(*dest); + if (dest_obj == Py_None) { + *dest = JOIN_SET(*data, PyList_CheckExact(*data)); + *data = NULL; + Py_DECREF(dest_obj); + return 0; + } + else if (JOIN_GET(*dest)) { + if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) { + return -1; + } + Py_CLEAR(*data); + return 0; + } + } + + /* Fallback for the non-Element / non-trivial cases. */ + { + int r; + PyObject* joined; + PyObject* previous = PyObject_GetAttr(element, name); + if (!previous) + return -1; + joined = list_join(*data); + if (!joined) { + Py_DECREF(previous); + return -1; + } + if (previous != Py_None) { + PyObject *tmp = PyNumber_Add(previous, joined); + Py_DECREF(joined); + Py_DECREF(previous); + if (!tmp) + return -1; + joined = tmp; + } else { + Py_DECREF(previous); + } + + r = PyObject_SetAttr(element, name, joined); + Py_DECREF(joined); + if (r < 0) + return -1; + Py_CLEAR(*data); + return 0; + } +} + +LOCAL(int) +treebuilder_flush_data(TreeBuilderObject* self) +{ + if (!self->data) { + return 0; + } + elementtreestate *st = self->state; + if (!self->last_for_tail) { + PyObject *element = self->last; + return treebuilder_extend_element_text_or_tail( + st, element, &self->data, + &((ElementObject *) element)->text, st->str_text); + } + else { + PyObject *element = self->last_for_tail; + return treebuilder_extend_element_text_or_tail( + st, element, &self->data, + &((ElementObject *) element)->tail, st->str_tail); + } +} + +static int +treebuilder_add_subelement(elementtreestate *st, PyObject *element, + PyObject *child) +{ + if (Element_CheckExact(st, element)) { + ElementObject *elem = (ElementObject *) element; + return element_add_subelement(st, elem, child); + } + else { + PyObject *res; + res = PyObject_CallMethodOneArg(element, st->str_append, child); + if (res == NULL) + return -1; + Py_DECREF(res); + return 0; + } +} + +LOCAL(int) +treebuilder_append_event(TreeBuilderObject *self, PyObject *action, + PyObject *node) +{ + if (action != NULL) { + PyObject *res; + PyObject *event = PyTuple_Pack(2, action, node); + if (event == NULL) + return -1; + res = PyObject_CallOneArg(self->events_append, event); + Py_DECREF(event); + if (res == NULL) + return -1; + Py_DECREF(res); + } + return 0; +} + +/* -------------------------------------------------------------------- */ +/* handlers */ + +LOCAL(PyObject*) +treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, + PyObject* attrib) +{ + PyObject* node; + PyObject* this; + elementtreestate *st = self->state; + + if (treebuilder_flush_data(self) < 0) { + return NULL; + } + + if (!self->element_factory) { + node = create_new_element(st, tag, attrib); + } + else if (attrib == NULL) { + attrib = PyDict_New(); + if (!attrib) + return NULL; + node = PyObject_CallFunctionObjArgs(self->element_factory, + tag, attrib, NULL); + Py_DECREF(attrib); + } + else { + node = PyObject_CallFunctionObjArgs(self->element_factory, + tag, attrib, NULL); + } + if (!node) { + return NULL; + } + + this = self->this; + Py_CLEAR(self->last_for_tail); + + if (this != Py_None) { + if (treebuilder_add_subelement(st, this, node) < 0) { + goto error; + } + } else { + if (self->root) { + PyErr_SetString( + st->parseerror_obj, + "multiple elements on top level" + ); + goto error; + } + self->root = Py_NewRef(node); + } + + if (self->index < PyList_GET_SIZE(self->stack)) { + if (PyList_SetItem(self->stack, self->index, this) < 0) + goto error; + Py_INCREF(this); + } else { + if (PyList_Append(self->stack, this) < 0) + goto error; + } + self->index++; + + Py_SETREF(self->this, Py_NewRef(node)); + Py_SETREF(self->last, Py_NewRef(node)); + + if (treebuilder_append_event(self, self->start_event_obj, node) < 0) + goto error; + + return node; + + error: + Py_DECREF(node); + return NULL; +} + +LOCAL(PyObject*) +treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) +{ + if (!self->data) { + if (self->last == Py_None) { + /* ignore calls to data before the first call to start */ + Py_RETURN_NONE; + } + /* store the first item as is */ + self->data = Py_NewRef(data); + } else { + /* more than one item; use a list to collect items */ + if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && + PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) { + /* XXX this code path unused in Python 3? */ + /* expat often generates single character data sections; handle + the most common case by resizing the existing string... */ + Py_ssize_t size = PyBytes_GET_SIZE(self->data); + if (_PyBytes_Resize(&self->data, size + 1) < 0) + return NULL; + PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0]; + } else if (PyList_CheckExact(self->data)) { + if (PyList_Append(self->data, data) < 0) + return NULL; + } else { + PyObject* list = PyList_New(2); + if (!list) + return NULL; + PyList_SET_ITEM(list, 0, Py_NewRef(self->data)); + PyList_SET_ITEM(list, 1, Py_NewRef(data)); + Py_SETREF(self->data, list); + } + } + + Py_RETURN_NONE; +} + +LOCAL(PyObject*) +treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) +{ + PyObject* item; + + if (treebuilder_flush_data(self) < 0) { + return NULL; + } + + if (self->index == 0) { + PyErr_SetString( + PyExc_IndexError, + "pop from empty stack" + ); + return NULL; + } + + item = self->last; + self->last = Py_NewRef(self->this); + Py_XSETREF(self->last_for_tail, self->last); + self->index--; + self->this = Py_NewRef(PyList_GET_ITEM(self->stack, self->index)); + Py_DECREF(item); + + if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0) + return NULL; + + return Py_NewRef(self->last); +} + +LOCAL(PyObject*) +treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text) +{ + PyObject* comment; + PyObject* this; + + if (treebuilder_flush_data(self) < 0) { + return NULL; + } + + if (self->comment_factory) { + comment = PyObject_CallOneArg(self->comment_factory, text); + if (!comment) + return NULL; + + this = self->this; + if (self->insert_comments && this != Py_None) { + if (treebuilder_add_subelement(self->state, this, comment) < 0) { + goto error; + } + Py_XSETREF(self->last_for_tail, Py_NewRef(comment)); + } + } else { + comment = Py_NewRef(text); + } + + if (self->events_append && self->comment_event_obj) { + if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0) + goto error; + } + + return comment; + + error: + Py_DECREF(comment); + return NULL; +} + +LOCAL(PyObject*) +treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text) +{ + PyObject* pi; + PyObject* this; + PyObject* stack[2] = {target, text}; + + if (treebuilder_flush_data(self) < 0) { + return NULL; + } + + if (self->pi_factory) { + pi = _PyObject_FastCall(self->pi_factory, stack, 2); + if (!pi) { + return NULL; + } + + this = self->this; + if (self->insert_pis && this != Py_None) { + if (treebuilder_add_subelement(self->state, this, pi) < 0) { + goto error; + } + Py_XSETREF(self->last_for_tail, Py_NewRef(pi)); + } + } else { + pi = PyTuple_Pack(2, target, text); + if (!pi) { + return NULL; + } + } + + if (self->events_append && self->pi_event_obj) { + if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0) + goto error; + } + + return pi; + + error: + Py_DECREF(pi); + return NULL; +} + +LOCAL(PyObject*) +treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri) +{ + PyObject* parcel; + + if (self->events_append && self->start_ns_event_obj) { + parcel = PyTuple_Pack(2, prefix, uri); + if (!parcel) { + return NULL; + } + + if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) { + Py_DECREF(parcel); + return NULL; + } + Py_DECREF(parcel); + } + + Py_RETURN_NONE; +} + +LOCAL(PyObject*) +treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix) +{ + if (self->events_append && self->end_ns_event_obj) { + if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) { + return NULL; + } + } + + Py_RETURN_NONE; +} + +/* -------------------------------------------------------------------- */ +/* methods (in alphabetical order) */ + +/*[clinic input] +_elementtree.TreeBuilder.data + + data: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data) +/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/ +{ + return treebuilder_handle_data(self, data); +} + +/*[clinic input] +_elementtree.TreeBuilder.end + + tag: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag) +/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/ +{ + return treebuilder_handle_end(self, tag); +} + +/*[clinic input] +_elementtree.TreeBuilder.comment + + text: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text) +/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/ +{ + return treebuilder_handle_comment(self, text); +} + +/*[clinic input] +_elementtree.TreeBuilder.pi + + target: object + text: object = None + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target, + PyObject *text) +/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/ +{ + return treebuilder_handle_pi(self, target, text); +} + +LOCAL(PyObject*) +treebuilder_done(TreeBuilderObject* self) +{ + PyObject* res; + + /* FIXME: check stack size? */ + + if (self->root) + res = self->root; + else + res = Py_None; + + return Py_NewRef(res); +} + +/*[clinic input] +_elementtree.TreeBuilder.close + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self) +/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/ +{ + return treebuilder_done(self); +} + +/*[clinic input] +_elementtree.TreeBuilder.start + + tag: object + attrs: object(subclass_of='&PyDict_Type') + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag, + PyObject *attrs) +/*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/ +{ + return treebuilder_handle_start(self, tag, attrs); +} + +/* ==================================================================== */ +/* the expat interface */ + +#define EXPAT(st, func) ((st)->expat_capi->func) + +static XML_Memory_Handling_Suite ExpatMemoryHandler = { + PyObject_Malloc, PyObject_Realloc, PyObject_Free}; + +typedef struct { + PyObject_HEAD + + XML_Parser parser; + + PyObject *target; + PyObject *entity; + + PyObject *names; + + PyObject *handle_start_ns; + PyObject *handle_end_ns; + PyObject *handle_start; + PyObject *handle_data; + PyObject *handle_end; + + PyObject *handle_comment; + PyObject *handle_pi; + PyObject *handle_doctype; + + PyObject *handle_close; + + elementtreestate *state; + PyObject *elementtree_module; +} XMLParserObject; + +/* helpers */ + +LOCAL(PyObject*) +makeuniversal(XMLParserObject* self, const char* string) +{ + /* convert a UTF-8 tag/attribute name from the expat parser + to a universal name string */ + + Py_ssize_t size = (Py_ssize_t) strlen(string); + PyObject* key; + PyObject* value; + + /* look the 'raw' name up in the names dictionary */ + key = PyBytes_FromStringAndSize(string, size); + if (!key) + return NULL; + + value = Py_XNewRef(PyDict_GetItemWithError(self->names, key)); + + if (value == NULL && !PyErr_Occurred()) { + /* new name. convert to universal name, and decode as + necessary */ + + PyObject* tag; + char* p; + Py_ssize_t i; + + /* look for namespace separator */ + for (i = 0; i < size; i++) + if (string[i] == '}') + break; + if (i != size) { + /* convert to universal name */ + tag = PyBytes_FromStringAndSize(NULL, size+1); + if (tag == NULL) { + Py_DECREF(key); + return NULL; + } + p = PyBytes_AS_STRING(tag); + p[0] = '{'; + memcpy(p+1, string, size); + size++; + } else { + /* plain name; use key as tag */ + tag = Py_NewRef(key); + } + + /* decode universal name */ + p = PyBytes_AS_STRING(tag); + value = PyUnicode_DecodeUTF8(p, size, "strict"); + Py_DECREF(tag); + if (!value) { + Py_DECREF(key); + return NULL; + } + + /* add to names dictionary */ + if (PyDict_SetItem(self->names, key, value) < 0) { + Py_DECREF(key); + Py_DECREF(value); + return NULL; + } + } + + Py_DECREF(key); + return value; +} + +/* Set the ParseError exception with the given parameters. + * If message is not NULL, it's used as the error string. Otherwise, the + * message string is the default for the given error_code. +*/ +static void +expat_set_error(elementtreestate *st, enum XML_Error error_code, + Py_ssize_t line, Py_ssize_t column, const char *message) +{ + PyObject *errmsg, *error, *position, *code; + + errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd", + message ? message : EXPAT(st, ErrorString)(error_code), + line, column); + if (errmsg == NULL) + return; + + error = PyObject_CallOneArg(st->parseerror_obj, errmsg); + Py_DECREF(errmsg); + if (!error) + return; + + /* Add code and position attributes */ + code = PyLong_FromLong((long)error_code); + if (!code) { + Py_DECREF(error); + return; + } + if (PyObject_SetAttrString(error, "code", code) == -1) { + Py_DECREF(error); + Py_DECREF(code); + return; + } + Py_DECREF(code); + + position = Py_BuildValue("(nn)", line, column); + if (!position) { + Py_DECREF(error); + return; + } + if (PyObject_SetAttrString(error, "position", position) == -1) { + Py_DECREF(error); + Py_DECREF(position); + return; + } + Py_DECREF(position); + + PyErr_SetObject(st->parseerror_obj, error); + Py_DECREF(error); +} + +/* -------------------------------------------------------------------- */ +/* handlers */ + +static void +expat_default_handler(XMLParserObject* self, const XML_Char* data_in, + int data_len) +{ + PyObject* key; + PyObject* value; + PyObject* res; + + if (data_len < 2 || data_in[0] != '&') + return; + + if (PyErr_Occurred()) + return; + + key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict"); + if (!key) + return; + + value = PyDict_GetItemWithError(self->entity, key); + + elementtreestate *st = self->state; + if (value) { + if (TreeBuilder_CheckExact(st, self->target)) + res = treebuilder_handle_data( + (TreeBuilderObject*) self->target, value + ); + else if (self->handle_data) + res = PyObject_CallOneArg(self->handle_data, value); + else + res = NULL; + Py_XDECREF(res); + } else if (!PyErr_Occurred()) { + /* Report the first error, not the last */ + char message[128] = "undefined entity "; + strncat(message, data_in, data_len < 100?data_len:100); + expat_set_error( + st, + XML_ERROR_UNDEFINED_ENTITY, + EXPAT(st, GetErrorLineNumber)(self->parser), + EXPAT(st, GetErrorColumnNumber)(self->parser), + message + ); + } + + Py_DECREF(key); +} + +static void +expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, + const XML_Char **attrib_in) +{ + PyObject* res; + PyObject* tag; + PyObject* attrib; + int ok; + + if (PyErr_Occurred()) + return; + + /* tag name */ + tag = makeuniversal(self, tag_in); + if (!tag) + return; /* parser will look for errors */ + + /* attributes */ + if (attrib_in[0]) { + attrib = PyDict_New(); + if (!attrib) { + Py_DECREF(tag); + return; + } + while (attrib_in[0] && attrib_in[1]) { + PyObject* key = makeuniversal(self, attrib_in[0]); + if (key == NULL) { + Py_DECREF(attrib); + Py_DECREF(tag); + return; + } + PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict"); + if (value == NULL) { + Py_DECREF(key); + Py_DECREF(attrib); + Py_DECREF(tag); + return; + } + ok = PyDict_SetItem(attrib, key, value); + Py_DECREF(value); + Py_DECREF(key); + if (ok < 0) { + Py_DECREF(attrib); + Py_DECREF(tag); + return; + } + attrib_in += 2; + } + } else { + attrib = NULL; + } + + elementtreestate *st = self->state; + if (TreeBuilder_CheckExact(st, self->target)) { + /* shortcut */ + res = treebuilder_handle_start((TreeBuilderObject*) self->target, + tag, attrib); + } + else if (self->handle_start) { + if (attrib == NULL) { + attrib = PyDict_New(); + if (!attrib) { + Py_DECREF(tag); + return; + } + } + res = PyObject_CallFunctionObjArgs(self->handle_start, + tag, attrib, NULL); + } else + res = NULL; + + Py_DECREF(tag); + Py_XDECREF(attrib); + + Py_XDECREF(res); +} + +static void +expat_data_handler(XMLParserObject* self, const XML_Char* data_in, + int data_len) +{ + PyObject* data; + PyObject* res; + + if (PyErr_Occurred()) + return; + + data = PyUnicode_DecodeUTF8(data_in, data_len, "strict"); + if (!data) + return; /* parser will look for errors */ + + elementtreestate *st = self->state; + if (TreeBuilder_CheckExact(st, self->target)) + /* shortcut */ + res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); + else if (self->handle_data) + res = PyObject_CallOneArg(self->handle_data, data); + else + res = NULL; + + Py_DECREF(data); + + Py_XDECREF(res); +} + +static void +expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) +{ + PyObject* tag; + PyObject* res = NULL; + + if (PyErr_Occurred()) + return; + + elementtreestate *st = self->state; + if (TreeBuilder_CheckExact(st, self->target)) + /* shortcut */ + /* the standard tree builder doesn't look at the end tag */ + res = treebuilder_handle_end( + (TreeBuilderObject*) self->target, Py_None + ); + else if (self->handle_end) { + tag = makeuniversal(self, tag_in); + if (tag) { + res = PyObject_CallOneArg(self->handle_end, tag); + Py_DECREF(tag); + } + } + + Py_XDECREF(res); +} + +static void +expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in, + const XML_Char *uri_in) +{ + PyObject* res = NULL; + PyObject* uri; + PyObject* prefix; + PyObject* stack[2]; + + if (PyErr_Occurred()) + return; + + if (!uri_in) + uri_in = ""; + if (!prefix_in) + prefix_in = ""; + + elementtreestate *st = self->state; + if (TreeBuilder_CheckExact(st, self->target)) { + /* shortcut - TreeBuilder does not actually implement .start_ns() */ + TreeBuilderObject *target = (TreeBuilderObject*) self->target; + + if (target->events_append && target->start_ns_event_obj) { + prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict"); + if (!prefix) + return; + uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict"); + if (!uri) { + Py_DECREF(prefix); + return; + } + + res = treebuilder_handle_start_ns(target, prefix, uri); + Py_DECREF(uri); + Py_DECREF(prefix); + } + } else if (self->handle_start_ns) { + prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict"); + if (!prefix) + return; + uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict"); + if (!uri) { + Py_DECREF(prefix); + return; + } + + stack[0] = prefix; + stack[1] = uri; + res = _PyObject_FastCall(self->handle_start_ns, stack, 2); + Py_DECREF(uri); + Py_DECREF(prefix); + } + + Py_XDECREF(res); +} + +static void +expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) +{ + PyObject *res = NULL; + PyObject* prefix; + + if (PyErr_Occurred()) + return; + + if (!prefix_in) + prefix_in = ""; + + elementtreestate *st = self->state; + if (TreeBuilder_CheckExact(st, self->target)) { + /* shortcut - TreeBuilder does not actually implement .end_ns() */ + TreeBuilderObject *target = (TreeBuilderObject*) self->target; + + if (target->events_append && target->end_ns_event_obj) { + res = treebuilder_handle_end_ns(target, Py_None); + } + } else if (self->handle_end_ns) { + prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict"); + if (!prefix) + return; + + res = PyObject_CallOneArg(self->handle_end_ns, prefix); + Py_DECREF(prefix); + } + + Py_XDECREF(res); +} + +static void +expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) +{ + PyObject* comment; + PyObject* res; + + if (PyErr_Occurred()) + return; + + elementtreestate *st = self->state; + if (TreeBuilder_CheckExact(st, self->target)) { + /* shortcut */ + TreeBuilderObject *target = (TreeBuilderObject*) self->target; + + comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); + if (!comment) + return; /* parser will look for errors */ + + res = treebuilder_handle_comment(target, comment); + Py_XDECREF(res); + Py_DECREF(comment); + } else if (self->handle_comment) { + comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); + if (!comment) + return; + + res = PyObject_CallOneArg(self->handle_comment, comment); + Py_XDECREF(res); + Py_DECREF(comment); + } +} + +static void +expat_start_doctype_handler(XMLParserObject *self, + const XML_Char *doctype_name, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) +{ + PyObject *doctype_name_obj, *sysid_obj, *pubid_obj; + PyObject *res; + + if (PyErr_Occurred()) + return; + + doctype_name_obj = makeuniversal(self, doctype_name); + if (!doctype_name_obj) + return; + + if (sysid) { + sysid_obj = makeuniversal(self, sysid); + if (!sysid_obj) { + Py_DECREF(doctype_name_obj); + return; + } + } else { + sysid_obj = Py_NewRef(Py_None); + } + + if (pubid) { + pubid_obj = makeuniversal(self, pubid); + if (!pubid_obj) { + Py_DECREF(doctype_name_obj); + Py_DECREF(sysid_obj); + return; + } + } else { + pubid_obj = Py_NewRef(Py_None); + } + + elementtreestate *st = self->state; + /* If the target has a handler for doctype, call it. */ + if (self->handle_doctype) { + res = PyObject_CallFunctionObjArgs(self->handle_doctype, + doctype_name_obj, pubid_obj, + sysid_obj, NULL); + Py_XDECREF(res); + } + else if (_PyObject_LookupAttr((PyObject *)self, st->str_doctype, &res) > 0) { + (void)PyErr_WarnEx(PyExc_RuntimeWarning, + "The doctype() method of XMLParser is ignored. " + "Define doctype() method on the TreeBuilder target.", + 1); + Py_DECREF(res); + } + + Py_DECREF(doctype_name_obj); + Py_DECREF(pubid_obj); + Py_DECREF(sysid_obj); +} + +static void +expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, + const XML_Char* data_in) +{ + PyObject* pi_target; + PyObject* data; + PyObject* res; + PyObject* stack[2]; + + if (PyErr_Occurred()) + return; + + elementtreestate *st = self->state; + if (TreeBuilder_CheckExact(st, self->target)) { + /* shortcut */ + TreeBuilderObject *target = (TreeBuilderObject*) self->target; + + if ((target->events_append && target->pi_event_obj) || target->insert_pis) { + pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); + if (!pi_target) + goto error; + data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); + if (!data) + goto error; + res = treebuilder_handle_pi(target, pi_target, data); + Py_XDECREF(res); + Py_DECREF(data); + Py_DECREF(pi_target); + } + } else if (self->handle_pi) { + pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); + if (!pi_target) + goto error; + data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); + if (!data) + goto error; + + stack[0] = pi_target; + stack[1] = data; + res = _PyObject_FastCall(self->handle_pi, stack, 2); + Py_XDECREF(res); + Py_DECREF(data); + Py_DECREF(pi_target); + } + + return; + + error: + Py_XDECREF(pi_target); + return; +} + +/* -------------------------------------------------------------------- */ + +static PyObject * +xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0); + if (self) { + self->parser = NULL; + self->target = self->entity = self->names = NULL; + self->handle_start_ns = self->handle_end_ns = NULL; + self->handle_start = self->handle_data = self->handle_end = NULL; + self->handle_comment = self->handle_pi = self->handle_close = NULL; + self->handle_doctype = NULL; + self->elementtree_module = PyType_GetModuleByDef(type, &elementtreemodule); + assert(self->elementtree_module != NULL); + Py_INCREF(self->elementtree_module); + // See gh-111784 for explanation why is reference to module needed here. + self->state = get_elementtree_state(self->elementtree_module); + } + return (PyObject *)self; +} + +static int +ignore_attribute_error(PyObject *value) +{ + if (value == NULL) { + if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { + return -1; + } + PyErr_Clear(); + } + return 0; +} + +/*[clinic input] +_elementtree.XMLParser.__init__ + + * + target: object = None + encoding: str(accept={str, NoneType}) = None + +[clinic start generated code]*/ + +static int +_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target, + const char *encoding) +/*[clinic end generated code: output=3ae45ec6cdf344e4 input=7e716dd6e4f3e439]*/ +{ + self->entity = PyDict_New(); + if (!self->entity) + return -1; + + self->names = PyDict_New(); + if (!self->names) { + Py_CLEAR(self->entity); + return -1; + } + elementtreestate *st = self->state; + self->parser = EXPAT(st, ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}"); + if (!self->parser) { + Py_CLEAR(self->entity); + Py_CLEAR(self->names); + PyErr_NoMemory(); + return -1; + } + /* expat < 2.1.0 has no XML_SetHashSalt() */ + if (EXPAT(st, SetHashSalt) != NULL) { + EXPAT(st, SetHashSalt)(self->parser, + (unsigned long)_Py_HashSecret.expat.hashsalt); + } + + if (target != Py_None) { + Py_INCREF(target); + } else { + target = treebuilder_new(st->TreeBuilder_Type, NULL, NULL); + if (!target) { + Py_CLEAR(self->entity); + Py_CLEAR(self->names); + return -1; + } + } + self->target = target; + + self->handle_start_ns = PyObject_GetAttrString(target, "start_ns"); + if (ignore_attribute_error(self->handle_start_ns)) { + return -1; + } + self->handle_end_ns = PyObject_GetAttrString(target, "end_ns"); + if (ignore_attribute_error(self->handle_end_ns)) { + return -1; + } + self->handle_start = PyObject_GetAttrString(target, "start"); + if (ignore_attribute_error(self->handle_start)) { + return -1; + } + self->handle_data = PyObject_GetAttrString(target, "data"); + if (ignore_attribute_error(self->handle_data)) { + return -1; + } + self->handle_end = PyObject_GetAttrString(target, "end"); + if (ignore_attribute_error(self->handle_end)) { + return -1; + } + self->handle_comment = PyObject_GetAttrString(target, "comment"); + if (ignore_attribute_error(self->handle_comment)) { + return -1; + } + self->handle_pi = PyObject_GetAttrString(target, "pi"); + if (ignore_attribute_error(self->handle_pi)) { + return -1; + } + self->handle_close = PyObject_GetAttrString(target, "close"); + if (ignore_attribute_error(self->handle_close)) { + return -1; + } + self->handle_doctype = PyObject_GetAttrString(target, "doctype"); + if (ignore_attribute_error(self->handle_doctype)) { + return -1; + } + + /* configure parser */ + EXPAT(st, SetUserData)(self->parser, self); + if (self->handle_start_ns || self->handle_end_ns) + EXPAT(st, SetNamespaceDeclHandler)( + self->parser, + (XML_StartNamespaceDeclHandler) expat_start_ns_handler, + (XML_EndNamespaceDeclHandler) expat_end_ns_handler + ); + EXPAT(st, SetElementHandler)( + self->parser, + (XML_StartElementHandler) expat_start_handler, + (XML_EndElementHandler) expat_end_handler + ); + EXPAT(st, SetDefaultHandlerExpand)( + self->parser, + (XML_DefaultHandler) expat_default_handler + ); + EXPAT(st, SetCharacterDataHandler)( + self->parser, + (XML_CharacterDataHandler) expat_data_handler + ); + if (self->handle_comment) + EXPAT(st, SetCommentHandler)( + self->parser, + (XML_CommentHandler) expat_comment_handler + ); + if (self->handle_pi) + EXPAT(st, SetProcessingInstructionHandler)( + self->parser, + (XML_ProcessingInstructionHandler) expat_pi_handler + ); + EXPAT(st, SetStartDoctypeDeclHandler)( + self->parser, + (XML_StartDoctypeDeclHandler) expat_start_doctype_handler + ); + EXPAT(st, SetUnknownEncodingHandler)( + self->parser, + EXPAT(st, DefaultUnknownEncodingHandler), NULL + ); + + return 0; +} + +static int +xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + Py_VISIT(self->handle_close); + Py_VISIT(self->handle_pi); + Py_VISIT(self->handle_comment); + Py_VISIT(self->handle_end); + Py_VISIT(self->handle_data); + Py_VISIT(self->handle_start); + Py_VISIT(self->handle_start_ns); + Py_VISIT(self->handle_end_ns); + Py_VISIT(self->handle_doctype); + + Py_VISIT(self->target); + Py_VISIT(self->entity); + Py_VISIT(self->names); + + return 0; +} + +static int +xmlparser_gc_clear(XMLParserObject *self) +{ + elementtreestate *st = self->state; + if (self->parser != NULL) { + XML_Parser parser = self->parser; + self->parser = NULL; + EXPAT(st, ParserFree)(parser); + } + + Py_CLEAR(self->elementtree_module); + Py_CLEAR(self->handle_close); + Py_CLEAR(self->handle_pi); + Py_CLEAR(self->handle_comment); + Py_CLEAR(self->handle_end); + Py_CLEAR(self->handle_data); + Py_CLEAR(self->handle_start); + Py_CLEAR(self->handle_start_ns); + Py_CLEAR(self->handle_end_ns); + Py_CLEAR(self->handle_doctype); + + Py_CLEAR(self->target); + Py_CLEAR(self->entity); + Py_CLEAR(self->names); + + return 0; +} + +static void +xmlparser_dealloc(XMLParserObject* self) +{ + PyTypeObject *tp = Py_TYPE(self); + PyObject_GC_UnTrack(self); + xmlparser_gc_clear(self); + tp->tp_free(self); + Py_DECREF(tp); +} + +Py_LOCAL_INLINE(int) +_check_xmlparser(XMLParserObject* self) +{ + if (self->target == NULL) { + PyErr_SetString(PyExc_ValueError, + "XMLParser.__init__() wasn't called"); + return 0; + } + return 1; +} + +LOCAL(PyObject*) +expat_parse(elementtreestate *st, XMLParserObject *self, const char *data, + int data_len, int final) +{ + int ok; + + assert(!PyErr_Occurred()); + ok = EXPAT(st, Parse)(self->parser, data, data_len, final); + + if (PyErr_Occurred()) + return NULL; + + if (!ok) { + expat_set_error( + st, + EXPAT(st, GetErrorCode)(self->parser), + EXPAT(st, GetErrorLineNumber)(self->parser), + EXPAT(st, GetErrorColumnNumber)(self->parser), + NULL + ); + return NULL; + } + + Py_RETURN_NONE; +} + +/*[clinic input] +_elementtree.XMLParser.close + +[clinic start generated code]*/ + +static PyObject * +_elementtree_XMLParser_close_impl(XMLParserObject *self) +/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/ +{ + /* end feeding data to parser */ + + PyObject* res; + + if (!_check_xmlparser(self)) { + return NULL; + } + elementtreestate *st = self->state; + res = expat_parse(st, self, "", 0, 1); + if (!res) + return NULL; + + if (TreeBuilder_CheckExact(st, self->target)) { + Py_DECREF(res); + return treebuilder_done((TreeBuilderObject*) self->target); + } + else if (self->handle_close) { + Py_DECREF(res); + return PyObject_CallNoArgs(self->handle_close); + } + else { + return res; + } +} + +/*[clinic input] +_elementtree.XMLParser.feed + + data: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data) +/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/ +{ + /* feed data to parser */ + + if (!_check_xmlparser(self)) { + return NULL; + } + elementtreestate *st = self->state; + if (PyUnicode_Check(data)) { + Py_ssize_t data_len; + const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len); + if (data_ptr == NULL) + return NULL; + if (data_len > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); + return NULL; + } + /* Explicitly set UTF-8 encoding. Return code ignored. */ + (void)EXPAT(st, SetEncoding)(self->parser, "utf-8"); + + return expat_parse(st, self, data_ptr, (int)data_len, 0); + } + else { + Py_buffer view; + PyObject *res; + if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0) + return NULL; + if (view.len > INT_MAX) { + PyBuffer_Release(&view); + PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); + return NULL; + } + res = expat_parse(st, self, view.buf, (int)view.len, 0); + PyBuffer_Release(&view); + return res; + } +} + +/*[clinic input] +_elementtree.XMLParser._parse_whole + + file: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file) +/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/ +{ + /* (internal) parse the whole input, until end of stream */ + PyObject* reader; + PyObject* buffer; + PyObject* temp; + PyObject* res; + + if (!_check_xmlparser(self)) { + return NULL; + } + reader = PyObject_GetAttrString(file, "read"); + if (!reader) + return NULL; + + /* read from open file object */ + elementtreestate *st = self->state; + for (;;) { + + buffer = PyObject_CallFunction(reader, "i", 64*1024); + + if (!buffer) { + /* read failed (e.g. due to KeyboardInterrupt) */ + Py_DECREF(reader); + return NULL; + } + + if (PyUnicode_CheckExact(buffer)) { + /* A unicode object is encoded into bytes using UTF-8 */ + if (PyUnicode_GET_LENGTH(buffer) == 0) { + Py_DECREF(buffer); + break; + } + temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass"); + Py_DECREF(buffer); + if (!temp) { + /* Propagate exception from PyUnicode_AsEncodedString */ + Py_DECREF(reader); + return NULL; + } + buffer = temp; + } + else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { + Py_DECREF(buffer); + break; + } + + if (PyBytes_GET_SIZE(buffer) > INT_MAX) { + Py_DECREF(buffer); + Py_DECREF(reader); + PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); + return NULL; + } + res = expat_parse( + st, self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), + 0); + + Py_DECREF(buffer); + + if (!res) { + Py_DECREF(reader); + return NULL; + } + Py_DECREF(res); + + } + + Py_DECREF(reader); + + res = expat_parse(st, self, "", 0, 1); + + if (res && TreeBuilder_CheckExact(st, self->target)) { + Py_DECREF(res); + return treebuilder_done((TreeBuilderObject*) self->target); + } + + return res; +} + +/*[clinic input] +_elementtree.XMLParser._setevents + + events_queue: object + events_to_report: object = None + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_XMLParser__setevents_impl(XMLParserObject *self, + PyObject *events_queue, + PyObject *events_to_report) +/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/ +{ + /* activate element event reporting */ + Py_ssize_t i; + TreeBuilderObject *target; + PyObject *events_append, *events_seq; + + if (!_check_xmlparser(self)) { + return NULL; + } + elementtreestate *st = self->state; + if (!TreeBuilder_CheckExact(st, self->target)) { + PyErr_SetString( + PyExc_TypeError, + "event handling only supported for ElementTree.TreeBuilder " + "targets" + ); + return NULL; + } + + target = (TreeBuilderObject*) self->target; + + events_append = PyObject_GetAttrString(events_queue, "append"); + if (events_append == NULL) + return NULL; + Py_XSETREF(target->events_append, events_append); + + /* clear out existing events */ + Py_CLEAR(target->start_event_obj); + Py_CLEAR(target->end_event_obj); + Py_CLEAR(target->start_ns_event_obj); + Py_CLEAR(target->end_ns_event_obj); + Py_CLEAR(target->comment_event_obj); + Py_CLEAR(target->pi_event_obj); + + if (events_to_report == Py_None) { + /* default is "end" only */ + target->end_event_obj = PyUnicode_FromString("end"); + Py_RETURN_NONE; + } + + if (!(events_seq = PySequence_Fast(events_to_report, + "events must be a sequence"))) { + return NULL; + } + + for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) { + PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i); + const char *event_name = NULL; + if (PyUnicode_Check(event_name_obj)) { + event_name = PyUnicode_AsUTF8(event_name_obj); + } else if (PyBytes_Check(event_name_obj)) { + event_name = PyBytes_AS_STRING(event_name_obj); + } + if (event_name == NULL) { + Py_DECREF(events_seq); + PyErr_Format(PyExc_ValueError, "invalid events sequence"); + return NULL; + } + + if (strcmp(event_name, "start") == 0) { + Py_XSETREF(target->start_event_obj, Py_NewRef(event_name_obj)); + } else if (strcmp(event_name, "end") == 0) { + Py_XSETREF(target->end_event_obj, Py_NewRef(event_name_obj)); + } else if (strcmp(event_name, "start-ns") == 0) { + Py_XSETREF(target->start_ns_event_obj, Py_NewRef(event_name_obj)); + EXPAT(st, SetNamespaceDeclHandler)( + self->parser, + (XML_StartNamespaceDeclHandler) expat_start_ns_handler, + (XML_EndNamespaceDeclHandler) expat_end_ns_handler + ); + } else if (strcmp(event_name, "end-ns") == 0) { + Py_XSETREF(target->end_ns_event_obj, Py_NewRef(event_name_obj)); + EXPAT(st, SetNamespaceDeclHandler)( + self->parser, + (XML_StartNamespaceDeclHandler) expat_start_ns_handler, + (XML_EndNamespaceDeclHandler) expat_end_ns_handler + ); + } else if (strcmp(event_name, "comment") == 0) { + Py_XSETREF(target->comment_event_obj, Py_NewRef(event_name_obj)); + EXPAT(st, SetCommentHandler)( + self->parser, + (XML_CommentHandler) expat_comment_handler + ); + } else if (strcmp(event_name, "pi") == 0) { + Py_XSETREF(target->pi_event_obj, Py_NewRef(event_name_obj)); + EXPAT(st, SetProcessingInstructionHandler)( + self->parser, + (XML_ProcessingInstructionHandler) expat_pi_handler + ); + } else { + Py_DECREF(events_seq); + PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name); + return NULL; + } + } + + Py_DECREF(events_seq); + Py_RETURN_NONE; +} + +static PyMemberDef xmlparser_members[] = { + {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL}, + {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL}, + {NULL} +}; + +static PyObject* +xmlparser_version_getter(XMLParserObject *self, void *closure) +{ + return PyUnicode_FromFormat( + "Expat %d.%d.%d", XML_MAJOR_VERSION, + XML_MINOR_VERSION, XML_MICRO_VERSION); +} + +static PyGetSetDef xmlparser_getsetlist[] = { + {"version", (getter)xmlparser_version_getter, NULL, NULL}, + {NULL}, +}; + +#define clinic_state() (get_elementtree_state_by_type(Py_TYPE(self))) +#include "clinic/_elementtree.c.h" +#undef clinic_state + +static PyMethodDef element_methods[] = { + + _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF + + _ELEMENTTREE_ELEMENT_GET_METHODDEF + _ELEMENTTREE_ELEMENT_SET_METHODDEF + + _ELEMENTTREE_ELEMENT_FIND_METHODDEF + _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF + _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF + + _ELEMENTTREE_ELEMENT_APPEND_METHODDEF + _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF + _ELEMENTTREE_ELEMENT_INSERT_METHODDEF + _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF + + _ELEMENTTREE_ELEMENT_ITER_METHODDEF + _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF + _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF + + _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF + _ELEMENTTREE_ELEMENT_KEYS_METHODDEF + + _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF + + _ELEMENTTREE_ELEMENT___COPY___METHODDEF + _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF + _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF + _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF + _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF + + {NULL, NULL} +}; + +static struct PyMemberDef element_members[] = { + {"__weaklistoffset__", T_PYSSIZET, offsetof(ElementObject, weakreflist), READONLY}, + {NULL}, +}; + +static PyGetSetDef element_getsetlist[] = { + {"tag", + (getter)element_tag_getter, + (setter)element_tag_setter, + "A string identifying what kind of data this element represents"}, + {"text", + (getter)element_text_getter, + (setter)element_text_setter, + "A string of text directly after the start tag, or None"}, + {"tail", + (getter)element_tail_getter, + (setter)element_tail_setter, + "A string of text directly after the end tag, or None"}, + {"attrib", + (getter)element_attrib_getter, + (setter)element_attrib_setter, + "A dictionary containing the element's attributes"}, + {NULL}, +}; + +static PyType_Slot element_slots[] = { + {Py_tp_dealloc, element_dealloc}, + {Py_tp_repr, element_repr}, + {Py_tp_getattro, PyObject_GenericGetAttr}, + {Py_tp_traverse, element_gc_traverse}, + {Py_tp_clear, element_gc_clear}, + {Py_tp_methods, element_methods}, + {Py_tp_members, element_members}, + {Py_tp_getset, element_getsetlist}, + {Py_tp_init, element_init}, + {Py_tp_alloc, PyType_GenericAlloc}, + {Py_tp_new, element_new}, + {Py_sq_length, element_length}, + {Py_sq_item, element_getitem}, + {Py_sq_ass_item, element_setitem}, + {Py_nb_bool, element_bool}, + {Py_mp_length, element_length}, + {Py_mp_subscript, element_subscr}, + {Py_mp_ass_subscript, element_ass_subscr}, + {0, NULL}, +}; + +static PyType_Spec element_spec = { + .name = "xml.etree.ElementTree.Element", + .basicsize = sizeof(ElementObject), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_IMMUTABLETYPE), + .slots = element_slots, +}; + +static PyMethodDef treebuilder_methods[] = { + _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF + _ELEMENTTREE_TREEBUILDER_START_METHODDEF + _ELEMENTTREE_TREEBUILDER_END_METHODDEF + _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF + _ELEMENTTREE_TREEBUILDER_PI_METHODDEF + _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF + {NULL, NULL} +}; + +static PyType_Slot treebuilder_slots[] = { + {Py_tp_dealloc, treebuilder_dealloc}, + {Py_tp_traverse, treebuilder_gc_traverse}, + {Py_tp_clear, treebuilder_gc_clear}, + {Py_tp_methods, treebuilder_methods}, + {Py_tp_init, _elementtree_TreeBuilder___init__}, + {Py_tp_alloc, PyType_GenericAlloc}, + {Py_tp_new, treebuilder_new}, + {0, NULL}, +}; + +static PyType_Spec treebuilder_spec = { + .name = "xml.etree.ElementTree.TreeBuilder", + .basicsize = sizeof(TreeBuilderObject), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE, + .slots = treebuilder_slots, +}; + +static PyMethodDef xmlparser_methods[] = { + _ELEMENTTREE_XMLPARSER_FEED_METHODDEF + _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF + _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF + _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF + {NULL, NULL} +}; + +static PyType_Slot xmlparser_slots[] = { + {Py_tp_dealloc, xmlparser_dealloc}, + {Py_tp_traverse, xmlparser_gc_traverse}, + {Py_tp_clear, xmlparser_gc_clear}, + {Py_tp_methods, xmlparser_methods}, + {Py_tp_members, xmlparser_members}, + {Py_tp_getset, xmlparser_getsetlist}, + {Py_tp_init, _elementtree_XMLParser___init__}, + {Py_tp_alloc, PyType_GenericAlloc}, + {Py_tp_new, xmlparser_new}, + {0, NULL}, +}; + +static PyType_Spec xmlparser_spec = { + .name = "xml.etree.ElementTree.XMLParser", + .basicsize = sizeof(XMLParserObject), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_IMMUTABLETYPE), + .slots = xmlparser_slots, +}; + +/* ==================================================================== */ +/* python module interface */ + +static PyMethodDef _functions[] = { + {"SubElement", _PyCFunction_CAST(subelement), METH_VARARGS | METH_KEYWORDS}, + _ELEMENTTREE__SET_FACTORIES_METHODDEF + {NULL, NULL} +}; + +#define CREATE_TYPE(module, type, spec) \ +do { \ + if (type != NULL) { \ + break; \ + } \ + type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \ + if (type == NULL) { \ + goto error; \ + } \ +} while (0) + +static int +module_exec(PyObject *m) +{ + elementtreestate *st = get_elementtree_state(m); + + /* Initialize object types */ + CREATE_TYPE(m, st->ElementIter_Type, &elementiter_spec); + CREATE_TYPE(m, st->TreeBuilder_Type, &treebuilder_spec); + CREATE_TYPE(m, st->Element_Type, &element_spec); + CREATE_TYPE(m, st->XMLParser_Type, &xmlparser_spec); + + st->deepcopy_obj = _PyImport_GetModuleAttrString("copy", "deepcopy"); + if (st->deepcopy_obj == NULL) { + goto error; + } + + assert(!PyErr_Occurred()); + if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath"))) + goto error; + + /* link against pyexpat */ + if (!(st->expat_capsule = _PyImport_GetModuleAttrString("pyexpat", "expat_CAPI"))) + goto error; + if (!(st->expat_capi = PyCapsule_GetPointer(st->expat_capsule, PyExpat_CAPSULE_NAME))) + goto error; + if (st->expat_capi) { + /* check that it's usable */ + if (strcmp(st->expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 || + (size_t)st->expat_capi->size < sizeof(struct PyExpat_CAPI) || + st->expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || + st->expat_capi->MINOR_VERSION != XML_MINOR_VERSION || + st->expat_capi->MICRO_VERSION != XML_MICRO_VERSION) { + PyErr_SetString(PyExc_ImportError, + "pyexpat version is incompatible"); + goto error; + } + } else { + goto error; + } + + st->str_append = PyUnicode_InternFromString("append"); + if (st->str_append == NULL) { + goto error; + } + st->str_find = PyUnicode_InternFromString("find"); + if (st->str_find == NULL) { + goto error; + } + st->str_findall = PyUnicode_InternFromString("findall"); + if (st->str_findall == NULL) { + goto error; + } + st->str_findtext = PyUnicode_InternFromString("findtext"); + if (st->str_findtext == NULL) { + goto error; + } + st->str_iterfind = PyUnicode_InternFromString("iterfind"); + if (st->str_iterfind == NULL) { + goto error; + } + st->str_tail = PyUnicode_InternFromString("tail"); + if (st->str_tail == NULL) { + goto error; + } + st->str_text = PyUnicode_InternFromString("text"); + if (st->str_text == NULL) { + goto error; + } + st->str_doctype = PyUnicode_InternFromString("doctype"); + if (st->str_doctype == NULL) { + goto error; + } + st->parseerror_obj = PyErr_NewException( + "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL + ); + if (PyModule_AddObjectRef(m, "ParseError", st->parseerror_obj) < 0) { + goto error; + } + + PyTypeObject *types[] = { + st->Element_Type, + st->TreeBuilder_Type, + st->XMLParser_Type + }; + + for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) { + if (PyModule_AddType(m, types[i]) < 0) { + goto error; + } + } + + return 0; + +error: + return -1; +} + +static struct PyModuleDef_Slot elementtree_slots[] = { + {Py_mod_exec, module_exec}, + // XXX gh-103092: fix isolation. + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + //{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {0, NULL}, +}; + +static struct PyModuleDef elementtreemodule = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_elementtree", + .m_size = sizeof(elementtreestate), + .m_methods = _functions, + .m_slots = elementtree_slots, + .m_traverse = elementtree_traverse, + .m_clear = elementtree_clear, + .m_free = elementtree_free, +}; + +PyMODINIT_FUNC +PyInit__elementtree(void) +{ + return PyModuleDef_Init(&elementtreemodule); +} |