feat contrib: aiogram 3

Relates: https://st.yandex-team.ru/, https://st.yandex-team.ru/
author: armenqa <armenqa@yandex-team.com> 2024-01-19 12:23:50 +0300
committer: armenqa <armenqa@yandex-team.com> 2024-01-19 13:10:03 +0300
commit: 2de0149d0151c514b22bca0760b95b26c9b0b578 (patch)
tree: 2bfed9f3bce7e643ddf048bb61ce3dc0a714bcc2 /library/python/strings
parent: a8c06d218f12b2406fbce24d194885c5d7b68503 (diff)
download: ydb-2de0149d0151c514b22bca0760b95b26c9b0b578.tar.gz
3 files changed, 270 insertions, 0 deletions
diff --git a/library/python/strings/__init__.py b/library/python/strings/__init__.py
index ae27ddbdae..cd1084b0f0 100644
--- a/library/python/strings/__init__.py
+++ b/library/python/strings/__init__.py
@@ -12,10 +12,13 @@ from .strings import (
     guess_default_encoding,
     left_strip,
     locale_encoding,
+    parse_qs_binary,
+    parse_qsl_binary,
     stringize_deep,
     to_basestring,
     to_str,
     to_unicode,
     truncate,
     unicodize_deep,
+    unquote_binary,
 )
diff --git a/library/python/strings/strings.py b/library/python/strings/strings.py
index f5fa2d32c6..d068f30b76 100644
--- a/library/python/strings/strings.py
+++ b/library/python/strings/strings.py
@@ -176,3 +176,120 @@ def fix_utf8(data):
     # remove destroyed symbol code
     udata = six.ensure_text(data, 'utf-8', 'ignore')
     return six.ensure_str(udata, 'utf-8', errors='ignore')
+
+
+_hexdig = "0123456789ABCDEFabcdef"
+_hextobyte = {
+    (a + b).encode(): bytes.fromhex(a + b) if six.PY3 else (a + b).decode("hex") for a in _hexdig for b in _hexdig
+}
+
+
+def parse_qs_binary(qs, keep_blank_values=False, strict_parsing=False, max_num_fields=None, separator=b'&'):
+    """Parse a query like original `parse_qs` from `urlparse`, `urllib.parse`, but query given as a bytes argument.
+
+    Arguments:
+
+    qs: percent-encoded query string to be parsed
+
+    keep_blank_values: flag indicating whether blank values in
+        percent-encoded queries should be treated as blank byte strings.
+        A true value indicates that blanks should be retained as
+        blank byte strings. The default false value indicates that
+        blank values are to be ignored and treated as if they were
+        not included.
+
+    strict_parsing: flag indicating what to do with parsing errors.
+        If false (the default), errors are silently ignored.
+        If true, errors raise a ValueError exception.
+
+    max_num_fields: int. If set, then throws a ValueError if there
+        are more than n fields read by parse_qsl_binary().
+
+    separator: bytes. The symbol to use for separating the query arguments.
+        Defaults to &.
+
+    Returns a dictionary.
+    """
+    parsed_result = {}
+    pairs = parse_qsl_binary(qs, keep_blank_values, strict_parsing, max_num_fields=max_num_fields, separator=separator)
+    for name, value in pairs:
+        if name in parsed_result:
+            parsed_result[name].append(value)
+        else:
+            parsed_result[name] = [value]
+    return parsed_result
+
+
+def parse_qsl_binary(qs, keep_blank_values=False, strict_parsing=False, max_num_fields=None, separator=b'&'):
+    """Parse a query like original `parse_qs` from `urlparse`, `urllib.parse`, but query given as a bytes argument.
+
+    Arguments:
+
+    qs: percent-encoded query bytes to be parsed
+
+    keep_blank_values: flag indicating whether blank values in
+        percent-encoded queries should be treated as blank byte strings.
+        A true value indicates that blanks should be retained as blank
+        byte strings. The default false value indicates that blank values
+        are to be ignored and treated as if they were not included.
+
+    strict_parsing: flag indicating what to do with parsing errors. If
+        false (the default), errors are silently ignored. If true,
+        errors raise a ValueError exception.
+
+    max_num_fields: int. If set, then throws a ValueError
+        if there are more than n fields read by parse_qsl_binary().
+
+    separator: bytes. The symbol to use for separating the query arguments.
+        Defaults to &.
+
+    Returns a list.
+    """
+
+    if max_num_fields is not None:
+        num_fields = 1 + qs.count(separator) if qs else 0
+        if max_num_fields < num_fields:
+            raise ValueError('Max number of fields exceeded')
+
+    r = []
+    query_args = qs.split(separator) if qs else []
+    for name_value in query_args:
+        if not name_value and not strict_parsing:
+            continue
+        nv = name_value.split(b'=', 1)
+
+        if len(nv) != 2:
+            if strict_parsing:
+                raise ValueError("bad query field: %r" % (name_value,))
+            # Handle case of a control-name with no equal sign
+            if keep_blank_values:
+                nv.append(b'')
+            else:
+                continue
+        if len(nv[1]) or keep_blank_values:
+            name = nv[0].replace(b'+', b' ')
+            name = unquote_binary(name)
+            value = nv[1].replace(b'+', b' ')
+            value = unquote_binary(value)
+            r.append((name, value))
+    return r
+
+
+def unquote_binary(string):
+    """Replace %xx escapes by their single-character equivalent.
+    By default, percent-encoded sequences are replaced by ASCII character or
+    byte code, and invalid sequences are replaced by a placeholder character.
+
+    unquote('abc%20def') -> 'abc def'
+    unquote('abc%FFdef') -> 'abc\xffdef'
+    unquote('%no') -> '%no'
+    """
+    bits = string.split(b"%")
+    if len(bits) == 1:
+        return bits[0]
+
+    res = [bits[0]]
+    for item in bits[1:]:
+        res.append(_hextobyte.get(item[:2], b"%"))
+        res.append(item if res[-1] == b"%" else item[2:])
+    return b"".join(res)
diff --git a/library/python/strings/ut/test_strings.py b/library/python/strings/ut/test_strings.py
index 6177c10b25..d2bfe6ed8b 100644
--- a/library/python/strings/ut/test_strings.py
+++ b/library/python/strings/ut/test_strings.py
@@ -5,6 +5,11 @@ import six
 
 from library.python import strings
 
+if six.PY3:
+    from urllib.parse import parse_qs, parse_qsl, unquote
+else:
+    from urlparse import parse_qs, parse_qsl, unquote
+
 
 class Convertible(object):
     text = u'текст'
@@ -272,3 +277,148 @@ def test_truncate_utf_8_text_wrong_limit():
 
     with pytest.raises(AssertionError):
         strings.truncate("hello", 4, msg="long msg")
+
+
+@pytest.mark.parametrize(
+    "given,expected",
+    [
+        (
+            b"a=a",
+            [(b"a", b"a")],
+        ),
+        (
+            b"a=a&a=b",
+            [(b"a", b"a"), (b"a", b"b")],
+        ),
+        (
+            b"a=a+&b=b++",
+            [(b"a", b"a "), (b"b", b"b  ")],
+        ),
+        (
+            b"a=a&&b=b",
+            [(b"a", b"a"), (b"b", b"b")],
+        ),
+        (
+            b"a=a&b=%%3C%2Fscript%3E",
+            [(b"a", b"a"), (b"b", b"%</script>")],
+        ),
+        (
+            b"clid=%EF%BB%BF123",
+            [(b"clid", b"\xef\xbb\xbf123")],
+        ),
+    ],
+)
+def test_parse_qsl(given, expected):
+    assert strings.parse_qsl_binary(given) == expected
+
+
+@pytest.mark.parametrize(
+    "given,expected,keep_blank_values",
+    [
+        (b"a=", {}, False),
+        (b"a=", {b"a": [b""]}, True),
+        (b"a", {}, False),
+        (b"a", {b"a": [b""]}, True),
+        (b"a=a&a=b", {b"a": [b"a", b"b"]}, False),
+    ],
+)
+def test_parse_qs_with_keep_blank_values(given, expected, keep_blank_values):
+    assert strings.parse_qs_binary(given, keep_blank_values=keep_blank_values) == expected
+
+
+@pytest.mark.parametrize(
+    "given,strict_parsing",
+    [(b"a", True)],
+)
+def test_parse_qs_with_strict_parsing(given, strict_parsing):
+    with pytest.raises(ValueError, match="bad query field.*"):
+        strings.parse_qs_binary(given, strict_parsing=strict_parsing)
+
+    with pytest.raises(ValueError, match="bad query field.*"):
+        parse_qs(given, strict_parsing=strict_parsing)
+
+
+@pytest.mark.parametrize(
+    "given,max_num_fields",
+    [(b"a=a&b=bb&c=c", 2)],
+)
+def test_parse_qs_with_max_num_fields(given, max_num_fields):
+    with pytest.raises(ValueError, match="Max number of fields exceeded"):
+        strings.parse_qs_binary(given, max_num_fields=max_num_fields)
+
+    with pytest.raises(ValueError, match="Max number of fields exceeded"):
+        parse_qs(given, max_num_fields=max_num_fields)
+
+
+@pytest.mark.parametrize(
+    "given,expected",
+    [
+        (
+            b"",
+            b"",
+        ),
+        (
+            b"without percent",
+            b"without percent",
+        ),
+        (
+            b"%61 and %62",
+            b"a and b",
+        ),
+        (
+            b"%FF can't %unparse char%",
+            b"\xff can't %unparse char%",
+        ),
+    ],
+)
+def test_unquote(given, expected):
+    assert strings.unquote_binary(given) == expected
+
+
+URL_PARAMS = [
+    (b"a=", False, False, None),
+    (b"a=a&a=b", False, False, None),
+    (b"a=a&a=b&b=b", False, False, None),
+    (b"a=a&&b=b", False, False, None),
+    (b"a=a&b=%%3C%2Fscript%3E", False, False, None),
+    (b"a=", True, False, None),
+    (b"a", False, False, None),
+    (b"a", True, False, None),
+]
+
+
+@pytest.mark.parametrize(
+    "string,keep_blank_values,strict_parsing,max_num_fields",
+    URL_PARAMS if six.PY3 else URL_PARAMS + [(b"clid=%EF%BB%BF123", False, False, None)],
+)
+def test_parse_qs_compatibility(string, keep_blank_values, strict_parsing, max_num_fields):
+    for string_method, urlparse_method in (strings.parse_qsl_binary, parse_qsl), (strings.parse_qs_binary, parse_qs):
+        string_res = string_method(
+            string,
+            keep_blank_values=keep_blank_values,
+            strict_parsing=strict_parsing,
+            max_num_fields=max_num_fields,
+        )
+        urlparse_res = urlparse_method(
+            string,
+            keep_blank_values=keep_blank_values,
+            strict_parsing=strict_parsing,
+            max_num_fields=max_num_fields,
+        )
+        assert string_res == urlparse_res
+
+
+@pytest.mark.parametrize(
+    "string",
+    [
+        (b""),
+        (b"without percent"),
+        (b"a and b"),
+        ((b"%FF " if six.PY2 else b"") + b"can't %unparse char%"),
+    ],
+)
+def test_unquote_compatibility(string):
+    unquote_res = unquote(string)
+    if six.PY3:
+        unquote_res = six.ensure_binary(unquote_res)
+    assert strings.unquote_binary(string) == unquote_res
author	armenqa <armenqa@yandex-team.com>	2024-01-19 12:23:50 +0300
committer	armenqa <armenqa@yandex-team.com>	2024-01-19 13:10:03 +0300
commit	2de0149d0151c514b22bca0760b95b26c9b0b578 (patch)
tree	2bfed9f3bce7e643ddf048bb61ce3dc0a714bcc2 /library/python/strings
parent	a8c06d218f12b2406fbce24d194885c5d7b68503 (diff)
download	ydb-2de0149d0151c514b22bca0760b95b26c9b0b578.tar.gz