diff options
author | robot-piglet <[email protected]> | 2025-07-13 23:05:26 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-07-13 23:15:55 +0300 |
commit | 8cb77383dfe491d72dd3820518a3c1b49c8487a8 (patch) | |
tree | a5f7ff010010986024367dd7755b55f723775ad0 /contrib/python | |
parent | e3196ab85e3299b286ebe0f8c2d37b6841f7e5e5 (diff) |
Intermediate changes
commit_hash:78e65545069362958586e25e4c5fd36d5ac3a4a2
Diffstat (limited to 'contrib/python')
-rw-r--r-- | contrib/python/multidict/.dist-info/METADATA | 5 | ||||
-rw-r--r-- | contrib/python/multidict/multidict/__init__.py | 2 | ||||
-rw-r--r-- | contrib/python/multidict/multidict/_abc.py | 4 | ||||
-rw-r--r-- | contrib/python/multidict/multidict/_multidict.c | 69 | ||||
-rw-r--r-- | contrib/python/multidict/multidict/_multidict_py.py | 94 | ||||
-rw-r--r-- | contrib/python/multidict/multidict/_multilib/hashtable.h | 230 | ||||
-rw-r--r-- | contrib/python/multidict/tests/test_multidict_benchmarks.py | 183 | ||||
-rw-r--r-- | contrib/python/multidict/tests/test_mutable_multidict.py | 104 | ||||
-rw-r--r-- | contrib/python/multidict/ya.make | 2 |
9 files changed, 531 insertions, 162 deletions
diff --git a/contrib/python/multidict/.dist-info/METADATA b/contrib/python/multidict/.dist-info/METADATA index 4c018b118d2..75e88620163 100644 --- a/contrib/python/multidict/.dist-info/METADATA +++ b/contrib/python/multidict/.dist-info/METADATA @@ -1,11 +1,11 @@ Metadata-Version: 2.4 Name: multidict -Version: 6.5.1 +Version: 6.6.2 Summary: multidict implementation Home-page: https://github.com/aio-libs/multidict Author: Andrew Svetlov Author-email: [email protected] -License: Apache 2 +License: Apache License 2.0 Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org Project-URL: CI: GitHub, https://github.com/aio-libs/multidict/actions @@ -17,7 +17,6 @@ Project-URL: GitHub: issues, https://github.com/aio-libs/multidict/issues Project-URL: GitHub: repo, https://github.com/aio-libs/multidict Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: Apache Software License Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.9 diff --git a/contrib/python/multidict/multidict/__init__.py b/contrib/python/multidict/multidict/__init__.py index 1d8a2ae54c5..84126465975 100644 --- a/contrib/python/multidict/multidict/__init__.py +++ b/contrib/python/multidict/multidict/__init__.py @@ -22,7 +22,7 @@ __all__ = ( "getversion", ) -__version__ = "6.5.1" +__version__ = "6.6.2" if TYPE_CHECKING or not USE_EXTENSIONS: diff --git a/contrib/python/multidict/multidict/_abc.py b/contrib/python/multidict/multidict/_abc.py index ff0e2a6976c..54253e9e779 100644 --- a/contrib/python/multidict/multidict/_abc.py +++ b/contrib/python/multidict/multidict/_abc.py @@ -52,6 +52,10 @@ class MutableMultiMapping(MultiMapping[_V], MutableMapping[str, _V]): def extend(self, arg: MDArg[_V] = None, /, **kwargs: _V) -> None: """Add everything from arg and kwargs to the mapping.""" + @abc.abstractmethod + def merge(self, arg: MDArg[_V] = None, /, **kwargs: _V) -> None: + """Merge into the mapping, adding non-existing keys.""" + @overload def popone(self, key: str) -> _V: ... @overload diff --git a/contrib/python/multidict/multidict/_multidict.c b/contrib/python/multidict/multidict/_multidict.c index cd958931153..73c00229687 100644 --- a/contrib/python/multidict/multidict/_multidict.c +++ b/contrib/python/multidict/multidict/_multidict.c @@ -66,7 +66,7 @@ _multidict_getone(MultiDictObject *self, PyObject *key, PyObject *_default) static inline int _multidict_extend(MultiDictObject *self, PyObject *arg, PyObject *kwds, - const char *name, bool update) + const char *name, UpdateOp op) { mod_state *state = self->state; PyObject *seq = NULL; @@ -78,24 +78,24 @@ _multidict_extend(MultiDictObject *self, PyObject *arg, PyObject *kwds, if (arg != NULL) { if (AnyMultiDict_Check(state, arg)) { MultiDictObject *other = (MultiDictObject *)arg; - if (md_update_from_ht(self, other, update) < 0) { + if (md_update_from_ht(self, other, op) < 0) { goto fail; } } else if (AnyMultiDictProxy_Check(state, arg)) { MultiDictObject *other = ((MultiDictProxyObject *)arg)->md; - if (md_update_from_ht(self, other, update) < 0) { + if (md_update_from_ht(self, other, op) < 0) { goto fail; } } else if (PyDict_CheckExact(arg)) { - if (md_update_from_dict(self, arg, update) < 0) { + if (md_update_from_dict(self, arg, op) < 0) { goto fail; } } else if (PyList_CheckExact(arg)) { - if (md_update_from_seq(self, arg, update) < 0) { + if (md_update_from_seq(self, arg, op) < 0) { goto fail; } } else if (PyTuple_CheckExact(arg)) { - if (md_update_from_seq(self, arg, update) < 0) { + if (md_update_from_seq(self, arg, op) < 0) { goto fail; } } else { @@ -105,28 +105,31 @@ _multidict_extend(MultiDictObject *self, PyObject *arg, PyObject *kwds, seq = Py_NewRef(arg); } - if (md_update_from_seq(self, seq, update) < 0) { + if (md_update_from_seq(self, seq, op) < 0) { goto fail; } } } if (kwds != NULL) { - if (md_update_from_dict(self, kwds, update) < 0) { + if (md_update_from_dict(self, kwds, op) < 0) { goto fail; } } - if (update) { - if (md_post_update(self) < 0) { - goto fail; - } + if (op != Extend) { // Update or Merge + md_post_update(self); } ASSERT_CONSISTENT(self, false); Py_CLEAR(seq); return 0; fail: + if (op != Extend) { // Update or Merge + // Cleanup soft-deleted items + md_post_update(self); + } + ASSERT_CONSISTENT(self, false); Py_CLEAR(seq); return -1; } @@ -136,7 +139,7 @@ _multidict_extend_parse_args(mod_state *state, PyObject *args, PyObject *kwds, const char *name, PyObject **parg) { Py_ssize_t size = 0; - Py_ssize_t s; + Py_ssize_t s = 0; if (args) { s = PyTuple_GET_SIZE(args); if (s > 1) { @@ -551,7 +554,7 @@ multidict_tp_init(MultiDictObject *self, PyObject *args, PyObject *kwds) if (md_init(self, state, false, size) < 0) { goto fail; } - if (_multidict_extend(self, arg, kwds, "MultiDict", false) < 0) { + if (_multidict_extend(self, arg, kwds, "MultiDict", Extend) < 0) { goto fail; } done: @@ -592,7 +595,7 @@ multidict_extend(MultiDictObject *self, PyObject *args, PyObject *kwds) if (md_reserve(self, size) < 0) { goto fail; } - if (_multidict_extend(self, arg, kwds, "extend", false) < 0) { + if (_multidict_extend(self, arg, kwds, "extend", Extend) < 0) { goto fail; } Py_CLEAR(arg); @@ -774,7 +777,30 @@ multidict_update(MultiDictObject *self, PyObject *args, PyObject *kwds) if (md_reserve(self, size) < 0) { goto fail; } - if (_multidict_extend(self, arg, kwds, "update", true) < 0) { + if (_multidict_extend(self, arg, kwds, "update", Update) < 0) { + goto fail; + } + Py_CLEAR(arg); + ASSERT_CONSISTENT(self, false); + Py_RETURN_NONE; +fail: + Py_CLEAR(arg); + return NULL; +} + +static PyObject * +multidict_merge(MultiDictObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *arg = NULL; + Py_ssize_t size = + _multidict_extend_parse_args(self->state, args, kwds, "merge", &arg); + if (size < 0) { + goto fail; + } + if (md_reserve(self, size) < 0) { + goto fail; + } + if (_multidict_extend(self, arg, kwds, "merge", Merge) < 0) { goto fail; } Py_CLEAR(arg); @@ -822,7 +848,10 @@ PyDoc_STRVAR(multidict_popitem_doc, "Remove and return an arbitrary (key, value) pair."); PyDoc_STRVAR(multidict_update_doc, - "Update the dictionary from *other*, overwriting existing keys."); + "Update the dictionary, overwriting existing keys."); + +PyDoc_STRVAR(multidict_merge_doc, + "Merge into the dictionary, adding non-existing keys."); PyDoc_STRVAR(sizeof__doc__, "D.__sizeof__() -> size of D in memory, in bytes"); @@ -887,6 +916,10 @@ static PyMethodDef multidict_methods[] = { (PyCFunction)multidict_update, METH_VARARGS | METH_KEYWORDS, multidict_update_doc}, + {"merge", + (PyCFunction)multidict_merge, + METH_VARARGS | METH_KEYWORDS, + multidict_merge_doc}, { "__reduce__", (PyCFunction)multidict_reduce, @@ -979,7 +1012,7 @@ cimultidict_tp_init(MultiDictObject *self, PyObject *args, PyObject *kwds) if (md_init(self, state, true, size) < 0) { goto fail; } - if (_multidict_extend(self, arg, kwds, "CIMultiDict", false) < 0) { + if (_multidict_extend(self, arg, kwds, "CIMultiDict", Extend) < 0) { goto fail; } done: diff --git a/contrib/python/multidict/multidict/_multidict_py.py b/contrib/python/multidict/multidict/_multidict_py.py index f9fa25671a0..6b68d52eeff 100644 --- a/contrib/python/multidict/multidict/_multidict_py.py +++ b/contrib/python/multidict/multidict/_multidict_py.py @@ -445,11 +445,11 @@ class _CIMixin: if isinstance(key, istr): ret = key.__istr_identity__ if ret is None: - ret = key.title() + ret = key.lower() key.__istr_identity__ = ret return ret if isinstance(key, str): - return key.title() + return key.lower() else: raise TypeError("MultiDict keys should be either str or subclasses of str") @@ -632,13 +632,13 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): self._from_md(md) return - items = self._parse_args(arg, kwargs) - log2_size = estimate_log2_keysize(len(items)) + it = self._parse_args(arg, kwargs) + log2_size = estimate_log2_keysize(cast(int, next(it))) if log2_size > 17: # pragma: no cover # Don't overallocate really huge keys space in init log2_size = 17 self._keys: _HtKeys[_V] = _HtKeys.new(log2_size, []) - self._extend_items(items) + self._extend_items(cast(Iterator[_Entry[_V]], it)) def _from_md(self, md: "MultiDict[_V]") -> None: # Copy everything as-is without compacting the new multidict, @@ -657,14 +657,17 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): identity = self._identity(key) hash_ = hash(identity) res = [] - + restore = [] for slot, idx, e in self._keys.iter_hash(hash_): if e.identity == identity: # pragma: no branch res.append(e.value) e.hash = -1 - self._keys.restore_hash(hash_) + restore.append(idx) if res: + entries = self._keys.entries + for idx in restore: + entries[idx].hash = hash_ # type: ignore[union-attr] return res if not res and default is not sentinel: return default @@ -787,35 +790,33 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): This method must be used instead of update. """ - items = self._parse_args(arg, kwargs) - newsize = self._used + len(items) + it = self._parse_args(arg, kwargs) + newsize = self._used + cast(int, next(it)) self._resize(estimate_log2_keysize(newsize), False) - self._extend_items(items) + self._extend_items(cast(Iterator[_Entry[_V]], it)) def _parse_args( self, arg: MDArg[_V], kwargs: Mapping[str, _V], - ) -> list[_Entry[_V]]: + ) -> Iterator[Union[int, _Entry[_V]]]: identity_func = self._identity if arg: if isinstance(arg, MultiDictProxy): arg = arg._md if isinstance(arg, MultiDict): + yield len(arg) + len(kwargs) if self._ci is not arg._ci: - items = [] for e in arg._keys.iter_entries(): identity = identity_func(e.key) - items.append(_Entry(hash(identity), identity, e.key, e.value)) + yield _Entry(hash(identity), identity, e.key, e.value) else: - items = [ - _Entry(e.hash, e.identity, e.key, e.value) - for e in arg._keys.iter_entries() - ] + for e in arg._keys.iter_entries(): + yield _Entry(e.hash, e.identity, e.key, e.value) if kwargs: for key, value in kwargs.items(): identity = identity_func(key) - items.append(_Entry(hash(identity), identity, key, value)) + yield _Entry(hash(identity), identity, key, value) else: if hasattr(arg, "keys"): arg = cast(SupportsKeys[_V], arg) @@ -823,7 +824,10 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): if kwargs: arg = list(arg) arg.extend(list(kwargs.items())) - items = [] + try: + yield len(arg) + len(kwargs) # type: ignore[arg-type] + except TypeError: + yield 0 for pos, item in enumerate(arg): if not len(item) == 2: raise ValueError( @@ -831,14 +835,12 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): f"has length {len(item)}; 2 is required" ) identity = identity_func(item[0]) - items.append(_Entry(hash(identity), identity, item[0], item[1])) + yield _Entry(hash(identity), identity, item[0], item[1]) else: - items = [] + yield len(kwargs) for key, value in kwargs.items(): identity = identity_func(key) - items.append(_Entry(hash(identity), identity, key, value)) - - return items + yield _Entry(hash(identity), identity, key, value) def _extend_items(self, items: Iterable[_Entry[_V]]) -> None: for e in items: @@ -985,9 +987,9 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): return ret def update(self, arg: MDArg[_V] = None, /, **kwargs: _V) -> None: - """Update the dictionary from *other*, overwriting existing keys.""" - items = self._parse_args(arg, kwargs) - newsize = self._used + len(items) + """Update the dictionary, overwriting existing keys.""" + it = self._parse_args(arg, kwargs) + newsize = self._used + cast(int, next(it)) log2_size = estimate_log2_keysize(newsize) if log2_size > 17: # pragma: no cover # Don't overallocate really huge keys space in update, @@ -995,11 +997,12 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): log2_size = 17 if log2_size > self._keys.log2_size: self._resize(log2_size, False) - self._update_items(items) + try: + self._update_items(cast(Iterator[_Entry[_V]], it)) + finally: + self._post_update() - def _update_items(self, items: list[_Entry[_V]]) -> None: - if not items: - return + def _update_items(self, items: Iterator[_Entry[_V]]) -> None: for entry in items: found = False hash_ = entry.hash @@ -1016,6 +1019,7 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): if not found: self._add_with_hash_for_upd(entry) + def _post_update(self) -> None: keys = self._keys indices = keys.indices entries = keys.entries @@ -1025,7 +1029,7 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): e2 = entries[idx] assert e2 is not None if e2.key is None: - entries[idx] = None # type: ignore[unreachable] + entries[idx] = None indices[slot] = -2 self._used -= 1 if e2.hash == -1: @@ -1033,6 +1037,32 @@ class MultiDict(_CSMixin, MutableMultiMapping[_V]): self._incr_version() + def merge(self, arg: MDArg[_V] = None, /, **kwargs: _V) -> None: + """Merge into the dictionary, adding non-existing keys.""" + it = self._parse_args(arg, kwargs) + newsize = self._used + cast(int, next(it)) + log2_size = estimate_log2_keysize(newsize) + if log2_size > 17: # pragma: no cover + # Don't overallocate really huge keys space in update, + # duplicate keys could reduce the resulting anount of entries + log2_size = 17 + if log2_size > self._keys.log2_size: + self._resize(log2_size, False) + try: + self._merge_items(cast(Iterator[_Entry[_V]], it)) + finally: + self._post_update() + + def _merge_items(self, items: Iterator[_Entry[_V]]) -> None: + for entry in items: + hash_ = entry.hash + identity = entry.identity + for slot, idx, e in self._keys.iter_hash(hash_): + if e.identity == identity: # pragma: no branch + break + else: + self._add_with_hash_for_upd(entry) + def _incr_version(self) -> None: v = _version v[0] += 1 diff --git a/contrib/python/multidict/multidict/_multilib/hashtable.h b/contrib/python/multidict/multidict/_multilib/hashtable.h index 53d39a58cda..0e0a28e5005 100644 --- a/contrib/python/multidict/multidict/_multilib/hashtable.h +++ b/contrib/python/multidict/multidict/_multilib/hashtable.h @@ -30,6 +30,12 @@ typedef struct _md_finder { PyObject *identity; // borrowed ref } md_finder_t; +typedef enum _UpdateOp { + Extend, + Update, + Merge, +} UpdateOp; + /* The multidict's implementation is close to Python's dict except for multiple keys. @@ -80,12 +86,12 @@ GROWTH_RATE(MultiDictObject *md) return md->used * 3; } +#ifndef NDEBUG static inline int _md_check_consistency(MultiDictObject *md, bool update); static inline int _md_dump(MultiDictObject *md); -#ifndef NDEBUG #define ASSERT_CONSISTENT(md, update) assert(_md_check_consistency(md, update)) #else #define ASSERT_CONSISTENT(md, update) assert(1) @@ -145,10 +151,10 @@ _ci_key_to_identity(mod_state *state, PyObject *key) } return ret; } -fail: PyErr_SetString(PyExc_TypeError, "CIMultiDict keys should be either str " "or subclasses of str"); +fail: return NULL; } @@ -209,7 +215,8 @@ _md_resize(MultiDictObject *md, uint8_t log2_newsize, bool update) } else { entry_t *new_ep = newentries; entry_t *old_ep = oldentries; - for (Py_ssize_t i = 0; i < oldkeys->nentries; ++i, ++old_ep) { + Py_ssize_t oldnumentries = oldkeys->nentries; + for (Py_ssize_t i = 0; i < oldnumentries; ++i, ++old_ep) { if (old_ep->identity != NULL) { *new_ep++ = *old_ep; } @@ -233,15 +240,52 @@ _md_resize(MultiDictObject *md, uint8_t log2_newsize, bool update) } static inline int +_md_shrink(MultiDictObject *md, bool update) +{ + htkeys_t *keys = md->keys; + Py_ssize_t nentries = keys->nentries; + entry_t *entries = htkeys_entries(keys); + entry_t *new_ep = entries; + entry_t *old_ep = entries; + Py_ssize_t newnentries = nentries; + for (Py_ssize_t i = 0; i < nentries; ++i, ++old_ep) { + if (old_ep->identity != NULL) { + if (new_ep != old_ep) { + *new_ep = *old_ep; + } + new_ep++; + } else { + newnentries -= 1; + } + } + keys->nentries = newnentries; + keys->usable += nentries - newnentries; + memset(&keys->indices[0], 0xff, ((size_t)1 << keys->log2_index_bytes)); + if (htkeys_build_indices(keys, entries, newnentries, update) < 0) { + return -1; + } + ASSERT_CONSISTENT(md, update); + return 0; +} + +static inline int _md_resize_for_insert(MultiDictObject *md) { - return _md_resize(md, calculate_log2_keysize(GROWTH_RATE(md)), false); + if (md->used < md->keys->nentries) { + return _md_shrink(md, false); + } else { + return _md_resize(md, calculate_log2_keysize(GROWTH_RATE(md)), false); + } } static inline int _md_resize_for_update(MultiDictObject *md) { - return _md_resize(md, calculate_log2_keysize(GROWTH_RATE(md)), true); + if (md->used < md->keys->nentries) { + return _md_shrink(md, true); + } else { + return _md_resize(md, calculate_log2_keysize(GROWTH_RATE(md)), true); + } } static inline int @@ -830,6 +874,8 @@ fail: static inline int md_get_all(MultiDictObject *md, PyObject *key, PyObject **ret) { + int tmp; + PyObject *value = NULL; *ret = NULL; md_finder_t finder = {0}; @@ -844,9 +890,6 @@ md_get_all(MultiDictObject *md, PyObject *key, PyObject **ret) goto fail; } - int tmp; - PyObject *value = NULL; - while ((tmp = md_find_next(&finder, NULL, &value)) > 0) { if (*ret == NULL) { *ret = PyList_New(1); @@ -866,7 +909,10 @@ md_get_all(MultiDictObject *md, PyObject *key, PyObject **ret) goto fail; } - md_finder_cleanup(&finder); + if (*ret != NULL) { + // there is no need to restore hashes if none was marked + md_finder_cleanup(&finder); + } Py_DECREF(identity); return *ret != NULL; fail: @@ -1166,7 +1212,7 @@ _md_update(MultiDictObject *md, Py_hash_t hash, PyObject *identity, bool found = false; for (; iter.index != DKIX_EMPTY; htkeysiter_next(&iter)) { - if (iter.index == DKIX_DUMMY) { + if (iter.index < 0) { continue; } entry_t *entry = entries + iter.index; @@ -1210,6 +1256,38 @@ fail: } static inline int +_md_merge(MultiDictObject *md, Py_hash_t hash, PyObject *identity, + PyObject *key, PyObject *value) +{ + htkeysiter_t iter; + htkeysiter_init(&iter, md->keys, hash); + entry_t *entries = htkeys_entries(md->keys); + + for (; iter.index != DKIX_EMPTY; htkeysiter_next(&iter)) { + if (iter.index < 0) { + continue; + } + entry_t *entry = entries + iter.index; + if (hash != entry->hash) { + continue; + } + int tmp = _str_cmp(identity, entry->identity); + if (tmp > 0) { + return 0; + } else if (tmp < 0) { + goto fail; + } + } + + if (_md_add_for_upd(md, hash, identity, key, value) < 0) { + goto fail; + } + return 0; +fail: + return -1; +} + +static inline void md_post_update(MultiDictObject *md) { htkeys_t *keys = md->keys; @@ -1228,18 +1306,15 @@ md_post_update(MultiDictObject *md) } if (entry->hash == -1) { entry->hash = _unicode_hash(entry->identity); - if (entry->hash == -1) { - // hash of string always exists but still - return -1; - } } + assert(entry->hash != -1); } } - return 0; + ASSERT_CONSISTENT(md, false); } static inline int -md_update_from_ht(MultiDictObject *md, MultiDictObject *other, bool update) +md_update_from_ht(MultiDictObject *md, MultiDictObject *other, UpdateOp op) { Py_ssize_t pos; Py_hash_t hash; @@ -1277,14 +1352,23 @@ md_update_from_ht(MultiDictObject *md, MultiDictObject *other, bool update) hash = entry->hash; key = entry->key; } - if (update) { - if (_md_update(md, hash, identity, key, entry->value) < 0) { - goto fail; - } - } else { - if (_md_add_with_hash(md, hash, identity, key, entry->value) < 0) { - goto fail; - } + switch (op) { + case Update: + if (_md_update(md, hash, identity, key, entry->value) < 0) { + goto fail; + } + break; + case Extend: + if (_md_add_with_hash(md, hash, identity, key, entry->value) < + 0) { + goto fail; + } + break; + case Merge: + if (_md_merge(md, hash, identity, key, entry->value) < 0) { + goto fail; + } + break; } if (recalc_identity) { Py_CLEAR(identity); @@ -1301,7 +1385,7 @@ fail: } static inline int -md_update_from_dict(MultiDictObject *md, PyObject *kwds, bool update) +md_update_from_dict(MultiDictObject *md, PyObject *kwds, UpdateOp op) { Py_ssize_t pos = 0; PyObject *identity = NULL; @@ -1321,22 +1405,36 @@ md_update_from_dict(MultiDictObject *md, PyObject *kwds, bool update) if (hash == -1) { goto fail; } - if (update) { - if (_md_update(md, hash, identity, key, value) < 0) { - goto fail; + switch (op) { + case Update: { + if (_md_update(md, hash, identity, key, value) < 0) { + goto fail; + } + Py_CLEAR(identity); + Py_CLEAR(key); + break; } - Py_CLEAR(identity); - Py_CLEAR(key); - } else { - int tmp = _md_add_with_hash_steal_refs( - md, hash, identity, key, Py_NewRef(value)); - if (tmp < 0) { - Py_DECREF(value); - goto fail; + case Extend: { + int tmp = _md_add_with_hash_steal_refs( + md, hash, identity, key, Py_NewRef(value)); + if (tmp < 0) { + Py_DECREF(value); + goto fail; + } + + identity = NULL; + key = NULL; + value = NULL; + break; + } + case Merge: { + if (_md_merge(md, hash, identity, key, value) < 0) { + goto fail; + } + Py_CLEAR(identity); + Py_CLEAR(key); + break; } - identity = NULL; - key = NULL; - value = NULL; } } return 0; @@ -1426,7 +1524,7 @@ fail: } static inline int -md_update_from_seq(MultiDictObject *md, PyObject *seq, bool update) +md_update_from_seq(MultiDictObject *md, PyObject *seq, UpdateOp op) { PyObject *it = NULL; PyObject *item = NULL; // seq[i] @@ -1506,21 +1604,32 @@ md_update_from_seq(MultiDictObject *md, PyObject *seq, bool update) goto fail; } - if (update) { - if (_md_update(md, hash, identity, key, value) < 0) { - goto fail; - } - Py_CLEAR(identity); - Py_CLEAR(key); - Py_CLEAR(value); - } else { - if (_md_add_with_hash_steal_refs(md, hash, identity, key, value) < - 0) { - goto fail; - } - identity = NULL; - key = NULL; - value = NULL; + switch (op) { + case Update: + if (_md_update(md, hash, identity, key, value) < 0) { + goto fail; + } + Py_CLEAR(identity); + Py_CLEAR(key); + Py_CLEAR(value); + break; + case Extend: + if (_md_add_with_hash_steal_refs( + md, hash, identity, key, value) < 0) { + goto fail; + } + identity = NULL; + key = NULL; + value = NULL; + break; + case Merge: + if (_md_merge(md, hash, identity, key, value) < 0) { + goto fail; + } + Py_CLEAR(identity); + Py_CLEAR(key); + Py_CLEAR(value); + break; } Py_CLEAR(item); } @@ -1791,6 +1900,8 @@ md_clear(MultiDictObject *md) return 0; } +#ifndef NDEBUG + static inline int _md_check_consistency(MultiDictObject *md, bool update) { @@ -1852,7 +1963,7 @@ static inline int _md_dump(MultiDictObject *md) { htkeys_t *keys = md->keys; - printf("Dump %p [%ld from %ld usable %ld nentries %ld]\n", + printf("Dump %p [%zd from %zd usable %zd nentries %zd]\n", (void *)md, md->used, htkeys_nslots(keys), @@ -1860,7 +1971,7 @@ _md_dump(MultiDictObject *md) keys->nentries); for (Py_ssize_t i = 0; i < htkeys_nslots(keys); i++) { Py_ssize_t ix = htkeys_get_index(keys, i); - printf(" %ld -> %ld\n", i, ix); + printf(" %zd -> %zd\n", i, ix); } printf(" --------\n"); entry_t *entries = htkeys_entries(keys); @@ -1869,9 +1980,9 @@ _md_dump(MultiDictObject *md) PyObject *identity = entry->identity; if (identity == NULL) { - printf(" %ld [deleted]\n", i); + printf(" %zd [deleted]\n", i); } else { - printf(" %ld h=%20ld, i=\'", i, entry->hash); + printf(" %zd h=%20zd, i=\'", i, entry->hash); PyObject_Print(entry->identity, stdout, Py_PRINT_RAW); printf("\', k=\'"); PyObject_Print(entry->key, stdout, Py_PRINT_RAW); @@ -1883,6 +1994,7 @@ _md_dump(MultiDictObject *md) printf("\n"); return 1; } +#endif // NDEBUG #ifdef __cplusplus } diff --git a/contrib/python/multidict/tests/test_multidict_benchmarks.py b/contrib/python/multidict/tests/test_multidict_benchmarks.py index b27041493ba..7fe62a40b73 100644 --- a/contrib/python/multidict/tests/test_multidict_benchmarks.py +++ b/contrib/python/multidict/tests/test_multidict_benchmarks.py @@ -33,9 +33,10 @@ def test_multidict_insert_str( def test_cimultidict_insert_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: md = case_insensitive_multidict_class() - items = [istr(i) for i in range(100)] + items = [case_insensitive_str_class(i) for i in range(100)] @benchmark def _run() -> None: @@ -60,9 +61,10 @@ def test_multidict_add_str( def test_cimultidict_add_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: base_md = case_insensitive_multidict_class() - items = [istr(i) for i in range(100)] + items = [case_insensitive_str_class(i) for i in range(100)] @benchmark def _run() -> None: @@ -88,9 +90,13 @@ def test_multidict_pop_str( def test_cimultidict_pop_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md_base = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(200)) - items = [istr(i) for i in range(50, 150)] + md_base = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(200) + ) + items = [case_insensitive_str_class(i) for i in range(50, 150)] @benchmark def _run() -> None: @@ -135,9 +141,16 @@ def test_multidict_update_str( def test_cimultidict_update_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(150)) - items: Dict[Union[str, istr], istr] = {istr(i): istr(i) for i in range(100, 200)} + md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(150) + ) + items: Dict[Union[str, istr], istr] = { + case_insensitive_str_class(i): case_insensitive_str_class(i) + for i in range(100, 200) + } @benchmark def _run() -> None: @@ -159,10 +172,17 @@ def test_multidict_update_str_with_kwargs( def test_cimultidict_update_istr_with_kwargs( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(150)) - items: Dict[Union[str, istr], istr] = {istr(i): istr(i) for i in range(100, 200)} - kwargs = {str(i): istr(i) for i in range(200, 300)} + md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(150) + ) + items: Dict[Union[str, istr], istr] = { + case_insensitive_str_class(i): case_insensitive_str_class(i) + for i in range(100, 200) + } + kwargs = {str(i): case_insensitive_str_class(i) for i in range(200, 300)} @benchmark def _run() -> None: @@ -185,9 +205,15 @@ def test_multidict_extend_str( def test_cimultidict_extend_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - base_md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(100)) - items = {istr(i): istr(i) for i in range(200)} + base_md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ) + items = { + case_insensitive_str_class(i): case_insensitive_str_class(i) for i in range(200) + } @benchmark def _run() -> None: @@ -213,10 +239,16 @@ def test_multidict_extend_str_with_kwargs( def test_cimultidict_extend_istr_with_kwargs( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - base_md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(100)) - items = {istr(i): istr(i) for i in range(200)} - kwargs = {str(i): istr(i) for i in range(200, 300)} + base_md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ) + items = { + case_insensitive_str_class(i): case_insensitive_str_class(i) for i in range(200) + } + kwargs = {str(i): case_insensitive_str_class(i) for i in range(200, 300)} @benchmark def _run() -> None: @@ -241,9 +273,13 @@ def test_multidict_delitem_str( def test_cimultidict_delitem_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md_base = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(100)) - items = [istr(i) for i in range(100)] + md_base = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ) + items = [case_insensitive_str_class(i) for i in range(100)] @benchmark def _run() -> None: @@ -256,56 +292,68 @@ def test_multidict_getall_str_hit( benchmark: BenchmarkFixture, any_multidict_class: Type[MultiDict[str]] ) -> None: md = any_multidict_class( - (f"key{j}", str(f"{i}-{j}")) for i in range(20) for j in range(5) + (f"key{j}", str(f"{i}-{j}")) for i in range(100) for j in range(10) ) + key = "key5" + @benchmark def _run() -> None: - for i in range(30): - md.getall("key3") + for i in range(1000): + md.getall(key) def test_multidict_getall_str_miss( benchmark: BenchmarkFixture, any_multidict_class: Type[MultiDict[str]] ) -> None: md = any_multidict_class( - (f"key{j}", str(f"{i}-{j}")) for i in range(20) for j in range(5) + (f"key{j}", str(f"{i}-{j}")) for i in range(100) for j in range(10) ) + key = "key-miss" + @benchmark def _run() -> None: - for i in range(30): - md.getall("miss", ()) + for i in range(1000): + md.getall(key, ()) def test_cimultidict_getall_istr_hit( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - all_istr = istr("key3") md = case_insensitive_multidict_class( - (f"key{j}", istr(f"{i}-{j}")) for i in range(20) for j in range(5) + (f"key{j}", case_insensitive_str_class(f"{i}-{j}")) + for i in range(100) + for j in range(10) ) + key = case_insensitive_str_class("key5") + @benchmark def _run() -> None: - for i in range(30): - md.getall(all_istr) + for i in range(1000): + md.getall(key) def test_cimultidict_getall_istr_miss( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - miss_istr = istr("miss") md = case_insensitive_multidict_class( - (istr(f"key{j}"), istr(f"{i}-{j}")) for i in range(20) for j in range(5) + (case_insensitive_str_class(f"key{j}"), case_insensitive_str_class(f"{i}-{j}")) + for i in range(100) + for j in range(10) ) + key = case_insensitive_str_class("key-miss") + @benchmark def _run() -> None: - for i in range(30): - md.getall(miss_istr, ()) + for i in range(1000): + md.getall(key, ()) def test_multidict_fetch( @@ -323,9 +371,13 @@ def test_multidict_fetch( def test_cimultidict_fetch_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(100)) - items = [istr(i) for i in range(100)] + md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ) + items = [case_insensitive_str_class(i) for i in range(100)] @benchmark def _run() -> None: @@ -360,9 +412,13 @@ def test_multidict_get_miss( def test_cimultidict_get_istr_hit( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(100)) - items = [istr(i) for i in range(100)] + md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ) + items = [case_insensitive_str_class(i) for i in range(100)] @benchmark def _run() -> None: @@ -373,9 +429,13 @@ def test_cimultidict_get_istr_hit( def test_cimultidict_get_istr_miss( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(100)) - items = [istr(i) for i in range(100, 200)] + md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ) + items = [case_insensitive_str_class(i) for i in range(100, 200)] @benchmark def _run() -> None: @@ -398,9 +458,13 @@ def test_multidict_get_hit_with_default( def test_cimultidict_get_istr_hit_with_default( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(100)) - items = [istr(i) for i in range(100)] + md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ) + items = [case_insensitive_str_class(i) for i in range(100)] @benchmark def _run() -> None: @@ -411,9 +475,13 @@ def test_cimultidict_get_istr_hit_with_default( def test_cimultidict_get_istr_with_default_miss( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - md = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(100)) - items = [istr(i) for i in range(100, 200)] + md = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ) + items = [case_insensitive_str_class(i) for i in range(100, 200)] @benchmark def _run() -> None: @@ -453,8 +521,12 @@ def test_create_multidict_with_items( def test_create_cimultidict_with_items_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - items = [(istr(i), istr(i)) for i in range(100)] + items = [ + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ] @benchmark def _run() -> None: @@ -474,8 +546,11 @@ def test_create_multidict_with_dict( def test_create_cimultidict_with_dict_istr( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - dct = {istr(i): istr(i) for i in range(100)} + dct = { + case_insensitive_str_class(i): case_insensitive_str_class(i) for i in range(100) + } @benchmark def _run() -> None: @@ -496,9 +571,13 @@ def test_create_multidict_with_items_with_kwargs( def test_create_cimultidict_with_items_istr_with_kwargs( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - items = [(istr(i), istr(i)) for i in range(100)] - kwargs = {str(i): istr(i) for i in range(100)} + items = [ + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ] + kwargs = {str(i): case_insensitive_str_class(i) for i in range(100)} @benchmark def _run() -> None: @@ -534,8 +613,12 @@ def test_create_empty_cimultidictproxy( def test_create_cimultidictproxy( benchmark: BenchmarkFixture, + case_insensitive_str_class: type[istr], ) -> None: - items = [(istr(i), istr(i)) for i in range(100)] + items = [ + (case_insensitive_str_class(i), case_insensitive_str_class(i)) + for i in range(100) + ] md = CIMultiDict(items) @benchmark @@ -546,8 +629,11 @@ def test_create_cimultidictproxy( def test_create_from_existing_cimultidict( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - existing = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(5)) + existing = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) for i in range(5) + ) @benchmark def _run() -> None: @@ -557,8 +643,11 @@ def test_create_from_existing_cimultidict( def test_copy_from_existing_cimultidict( benchmark: BenchmarkFixture, case_insensitive_multidict_class: Type[CIMultiDict[istr]], + case_insensitive_str_class: type[istr], ) -> None: - existing = case_insensitive_multidict_class((istr(i), istr(i)) for i in range(5)) + existing = case_insensitive_multidict_class( + (case_insensitive_str_class(i), case_insensitive_str_class(i)) for i in range(5) + ) @benchmark def _run() -> None: diff --git a/contrib/python/multidict/tests/test_mutable_multidict.py b/contrib/python/multidict/tests/test_mutable_multidict.py index d6fe42fc255..f97646a12bd 100644 --- a/contrib/python/multidict/tests/test_mutable_multidict.py +++ b/contrib/python/multidict/tests/test_mutable_multidict.py @@ -3,7 +3,6 @@ import sys from typing import Union import pytest - from multidict import ( CIMultiDict, CIMultiDictProxy, @@ -429,6 +428,51 @@ class TestMutableMultiDict: d2 = case_sensitive_multidict_class(p) assert d2 == d + def test_merge( + self, + case_sensitive_multidict_class: type[MultiDict[Union[str, int]]], + ) -> None: + d = case_sensitive_multidict_class({"key": "one"}) + assert d == {"key": "one"} + + d.merge([("key", "other"), ("key2", "two")], key2=3, foo="bar") + assert 4 == len(d) + itms = d.items() + # we can't guarantee order of kwargs + assert ("key", "one") in itms + assert ("key2", "two") in itms + assert ("key2", 3) in itms + assert ("foo", "bar") in itms + + other = case_sensitive_multidict_class({"key": "other"}, bar="baz") + + d.merge(other) + assert ("bar", "baz") in d.items() + + d.merge({"key": "other", "boo": "moo"}) + assert ("boo", "moo") in d.items() + + d.merge() + assert 6 == len(d) + + assert ("key", "other") not in d.items() + + with pytest.raises(TypeError): + d.merge("foo", "bar") # type: ignore[arg-type, call-arg] + + def test_merge_from_proxy( + self, + case_sensitive_multidict_class: type[MultiDict[str]], + case_sensitive_multidict_proxy_class: type[MultiDictProxy[str]], + ) -> None: + d = case_sensitive_multidict_class([("a", "a"), ("b", "b")]) + proxy = case_sensitive_multidict_proxy_class(d) + + d2 = case_sensitive_multidict_class() + d2.merge(proxy) + + assert [("a", "a"), ("b", "b")] == list(d2.items()) + class TestCIMutableMultiDict: def test_getall( @@ -813,3 +857,61 @@ class TestCIMutableMultiDict: assert md.keys() == md2.keys() - {"User-Agent"} md.update([("User-Agent", b"Bacon/1.0")]) assert md.keys() == md2.keys() + + def test_update_with_crash_in_the_middle( + self, case_insensitive_multidict_class: type[CIMultiDict[str]] + ) -> None: + class Hack(str): + def lower(self) -> str: + raise RuntimeError + + d = case_insensitive_multidict_class([("a", "a"), ("b", "b")]) + with pytest.raises(RuntimeError): + lst = [("c", "c"), ("a", "a2"), (Hack("b"), "b2")] + d.update(lst) + + assert [("a", "a2"), ("b", "b"), ("c", "c")] == list(d.items()) + + +def test_multidict_shrink_regression() -> None: + """ + Regression test for _md_shrink pointer increment bug in 6.6.0. + + The bug was introduced in PR #1200 which added _md_shrink to optimize + memory usage. The bug occurs when new_ep == old_ep (first non-deleted + entry), causing new_ep to not be incremented. This results in the first + entry being overwritten and memory corruption. + + See: https://github.com/aio-libs/multidict/issues/1221 + """ + # Test case that reproduces the corruption + md: MultiDict[str] = MultiDict() + + # Create pattern: [kept, deleted, kept, kept, ...] + # This triggers new_ep == old_ep on first iteration of _md_shrink + for i in range(10): + md[f"k{i}"] = f"v{i}" + + # Delete some entries but keep the first one + # This creates the exact condition for the bug + for i in range(1, 10, 2): + del md[f"k{i}"] + + # Trigger shrink by adding many entries + # When the internal array needs to resize, it will call _md_shrink + # because md->used < md->keys->nentries + for i in range(50): + md[f"new{i}"] = f"val{i}" + + # The bug would cause k0 to be lost due to memory corruption! + assert "k0" in md, "First entry k0 was lost due to memory corruption!" + assert md["k0"] == "v0", "First entry value was corrupted!" + + # Verify all other kept entries survived + for i in range(0, 10, 2): + assert f"k{i}" in md, f"Entry k{i} missing!" + assert md[f"k{i}"] == f"v{i}", f"Entry k{i} has wrong value!" + + # Verify new entries + for i in range(50): + assert md[f"new{i}"] == f"val{i}" diff --git a/contrib/python/multidict/ya.make b/contrib/python/multidict/ya.make index c24361e5d33..379decb789d 100644 --- a/contrib/python/multidict/ya.make +++ b/contrib/python/multidict/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.5.1) +VERSION(6.6.2) LICENSE(Apache-2.0) |