aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/yarl/tests/test_quoting.py
diff options
context:
space:
mode:
authorrekby <rekby@ydb.tech>2023-12-14 16:56:50 +0300
committerrekby <rekby@ydb.tech>2023-12-14 18:09:44 +0300
commitb2b2bb5997507072ca64548efe64447dd6395426 (patch)
treebbfbf77d11f1972c93ae4101fe561fd440d6ad6a /contrib/python/yarl/tests/test_quoting.py
parent8b8678a6a4f57c62e348cdad8afd3849011a5f11 (diff)
downloadydb-b2b2bb5997507072ca64548efe64447dd6395426.tar.gz
KIKIMR-19900 switch arcadia to python ydb sdk from contrib
Этот PR создан скриптом - для переключения зависимостей на python ydb sdk с версии внутри ydb на код, приезжающий через контриб. Код в обеих версиях одинаковый, так что поломок/изменения функционала на ожидается. На всякий случай посмотрите свои проекты и если будут возражения пишите сюда в issues или в тикет KIKIMR-19900. Если всё ок - шипните, для определённости. При отсутствии блокеров PR будет перегенерирован и влит с force-мёрджем в четверг, 14 декабря.
Diffstat (limited to 'contrib/python/yarl/tests/test_quoting.py')
-rw-r--r--contrib/python/yarl/tests/test_quoting.py450
1 files changed, 450 insertions, 0 deletions
diff --git a/contrib/python/yarl/tests/test_quoting.py b/contrib/python/yarl/tests/test_quoting.py
new file mode 100644
index 0000000000..7ebc0f9b04
--- /dev/null
+++ b/contrib/python/yarl/tests/test_quoting.py
@@ -0,0 +1,450 @@
+import pytest
+
+from yarl._quoting import NO_EXTENSIONS
+from yarl._quoting_py import _Quoter as _PyQuoter
+from yarl._quoting_py import _Unquoter as _PyUnquoter
+
+if not NO_EXTENSIONS:
+ from yarl._quoting_c import _Quoter as _CQuoter
+ from yarl._quoting_c import _Unquoter as _CUnquoter
+
+ @pytest.fixture(params=[_PyQuoter, _CQuoter], ids=["py_quoter", "c_quoter"])
+ def quoter(request):
+ return request.param
+
+ @pytest.fixture(params=[_PyUnquoter, _CUnquoter], ids=["py_unquoter", "c_unquoter"])
+ def unquoter(request):
+ return request.param
+
+else:
+
+ @pytest.fixture(params=[_PyQuoter], ids=["py_quoter"])
+ def quoter(request):
+ return request.param
+
+ @pytest.fixture(params=[_PyUnquoter], ids=["py_unquoter"])
+ def unquoter(request):
+ return request.param
+
+
+def hexescape(char):
+ """Escape char as RFC 2396 specifies"""
+ hex_repr = hex(ord(char))[2:].upper()
+ if len(hex_repr) == 1:
+ hex_repr = "0%s" % hex_repr
+ return "%" + hex_repr
+
+
+def test_quote_not_allowed_non_strict(quoter):
+ assert quoter()("%HH") == "%25HH"
+
+
+def test_quote_unfinished_tail_percent_non_strict(quoter):
+ assert quoter()("%") == "%25"
+
+
+def test_quote_unfinished_tail_digit_non_strict(quoter):
+ assert quoter()("%2") == "%252"
+
+
+def test_quote_unfinished_tail_safe_non_strict(quoter):
+ assert quoter()("%x") == "%25x"
+
+
+def test_quote_unfinished_tail_unsafe_non_strict(quoter):
+ assert quoter()("%#") == "%25%23"
+
+
+def test_quote_unfinished_tail_non_ascii_non_strict(quoter):
+ assert quoter()("%ß") == "%25%C3%9F"
+
+
+def test_quote_unfinished_tail_non_ascii2_non_strict(quoter):
+ assert quoter()("%€") == "%25%E2%82%AC"
+
+
+def test_quote_unfinished_tail_non_ascii3_non_strict(quoter):
+ assert quoter()("%🐍") == "%25%F0%9F%90%8D"
+
+
+def test_quote_from_bytes(quoter):
+ assert quoter()("archaeological arcana") == "archaeological%20arcana"
+ assert quoter()("") == ""
+
+
+def test_quote_ignore_broken_unicode(quoter):
+ s = quoter()(
+ "j\u001a\udcf4q\udcda/\udc97g\udcee\udccb\u000ch\udccb"
+ "\u0018\udce4v\u001b\udce2\udcce\udccecom/y\udccepj\u0016"
+ )
+
+ assert s == "j%1Aq%2Fg%0Ch%18v%1Bcom%2Fypj%16"
+ assert quoter()(s) == s
+
+
+def test_unquote_to_bytes(unquoter):
+ assert unquoter()("abc%20def") == "abc def"
+ assert unquoter()("") == ""
+
+
+def test_never_quote(quoter):
+ # Make sure quote() does not quote letters, digits, and "_,.-~"
+ do_not_quote = (
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "_.-~"
+ )
+ assert quoter()(do_not_quote) == do_not_quote
+ assert quoter(qs=True)(do_not_quote) == do_not_quote
+
+
+def test_safe(quoter):
+ # Test setting 'safe' parameter does what it should do
+ quote_by_default = "<>"
+ assert quoter(safe=quote_by_default)(quote_by_default) == quote_by_default
+
+ ret = quoter(safe=quote_by_default, qs=True)(quote_by_default)
+ assert ret == quote_by_default
+
+
+_SHOULD_QUOTE = [chr(num) for num in range(32)]
+_SHOULD_QUOTE.append(r'<>#"{}|\^[]`')
+_SHOULD_QUOTE.append(chr(127)) # For 0x7F
+SHOULD_QUOTE = "".join(_SHOULD_QUOTE)
+
+
+@pytest.mark.parametrize("char", SHOULD_QUOTE)
+def test_default_quoting(char, quoter):
+ # Make sure all characters that should be quoted are by default sans
+ # space (separate test for that).
+ result = quoter()(char)
+ assert hexescape(char) == result
+ result = quoter(qs=True)(char)
+ assert hexescape(char) == result
+
+
+# TODO: should it encode percent?
+def test_default_quoting_percent(quoter):
+ result = quoter()("%25")
+ assert "%25" == result
+ result = quoter(qs=True)("%25")
+ assert "%25" == result
+ result = quoter(requote=False)("%25")
+ assert "%2525" == result
+
+
+def test_default_quoting_partial(quoter):
+ partial_quote = "ab[]cd"
+ expected = "ab%5B%5Dcd"
+ result = quoter()(partial_quote)
+ assert expected == result
+ result = quoter(qs=True)(partial_quote)
+ assert expected == result
+
+
+def test_quoting_space(quoter):
+ # Make sure quote() and quote_plus() handle spaces as specified in
+ # their unique way
+ result = quoter()(" ")
+ assert result == hexescape(" ")
+ result = quoter(qs=True)(" ")
+ assert result == "+"
+
+ given = "a b cd e f"
+ expect = given.replace(" ", hexescape(" "))
+ result = quoter()(given)
+ assert expect == result
+ expect = given.replace(" ", "+")
+ result = quoter(qs=True)(given)
+ assert expect == result
+
+
+def test_quoting_plus(quoter):
+ assert quoter(qs=False)("alpha+beta gamma") == "alpha+beta%20gamma"
+ assert quoter(qs=True)("alpha+beta gamma") == "alpha%2Bbeta+gamma"
+ assert quoter(safe="+", qs=True)("alpha+beta gamma") == "alpha+beta+gamma"
+
+
+def test_quote_with_unicode(quoter):
+ # Characters in Latin-1 range, encoded by default in UTF-8
+ given = "\u00a2\u00d8ab\u00ff"
+ expect = "%C2%A2%C3%98ab%C3%BF"
+ result = quoter()(given)
+ assert expect == result
+ # Characters in BMP, encoded by default in UTF-8
+ given = "\u6f22\u5b57" # "Kanji"
+ expect = "%E6%BC%A2%E5%AD%97"
+ result = quoter()(given)
+ assert expect == result
+
+
+def test_quote_plus_with_unicode(quoter):
+ # Characters in Latin-1 range, encoded by default in UTF-8
+ given = "\u00a2\u00d8ab\u00ff"
+ expect = "%C2%A2%C3%98ab%C3%BF"
+ result = quoter(qs=True)(given)
+ assert expect == result
+ # Characters in BMP, encoded by default in UTF-8
+ given = "\u6f22\u5b57" # "Kanji"
+ expect = "%E6%BC%A2%E5%AD%97"
+ result = quoter(qs=True)(given)
+ assert expect == result
+
+
+@pytest.mark.parametrize("num", list(range(128)))
+def test_unquoting(num, unquoter):
+ # Make sure unquoting of all ASCII values works
+ given = hexescape(chr(num))
+ expect = chr(num)
+ result = unquoter()(given)
+ assert expect == result
+ if expect not in "+=&;":
+ result = unquoter(qs=True)(given)
+ assert expect == result
+
+
+# Expected value should be the same as given.
+# See https://url.spec.whatwg.org/#percent-encoded-bytes
+@pytest.mark.parametrize(
+ ("input", "expected"),
+ [
+ ("%", "%"),
+ ("%2", "%2"),
+ ("%x", "%x"),
+ ("%€", "%€"),
+ ("%2x", "%2x"),
+ ("%2 ", "%2 "),
+ ("% 2", "% 2"),
+ ("%xa", "%xa"),
+ ("%%", "%%"),
+ ("%%3f", "%?"),
+ ("%2%", "%2%"),
+ ("%2%3f", "%2?"),
+ ("%x%3f", "%x?"),
+ ("%€%3f", "%€?"),
+ ],
+)
+def test_unquoting_bad_percent_escapes(unquoter, input, expected):
+ assert unquoter()(input) == expected
+
+
+@pytest.mark.xfail
+# FIXME: After conversion to bytes, should not cause UTF-8 decode fail.
+# See https://url.spec.whatwg.org/#percent-encoded-bytes
+def test_unquoting_invalid_utf8_sequence(unquoter):
+ with pytest.raises(ValueError):
+ unquoter()("%AB")
+ with pytest.raises(ValueError):
+ unquoter()("%AB%AB")
+
+
+def test_unquoting_mixed_case_percent_escapes(unquoter):
+ expected = "𝕦"
+ assert expected == unquoter()("%F0%9D%95%A6")
+ assert expected == unquoter()("%F0%9d%95%a6")
+ assert expected == unquoter()("%f0%9D%95%a6")
+ assert expected == unquoter()("%f0%9d%95%a6")
+
+
+def test_unquoting_parts(unquoter):
+ # Make sure unquoting works when have non-quoted characters
+ # interspersed
+ given = "ab" + hexescape("c") + "d"
+ expect = "abcd"
+ result = unquoter()(given)
+ assert expect == result
+ result = unquoter(qs=True)(given)
+ assert expect == result
+
+
+def test_quote_None(quoter):
+ assert quoter()(None) is None
+
+
+def test_unquote_None(unquoter):
+ assert unquoter()(None) is None
+
+
+def test_quote_empty_string(quoter):
+ assert quoter()("") == ""
+
+
+def test_unquote_empty_string(unquoter):
+ assert unquoter()("") == ""
+
+
+def test_quote_bad_types(quoter):
+ with pytest.raises(TypeError):
+ quoter()(123)
+
+
+def test_unquote_bad_types(unquoter):
+ with pytest.raises(TypeError):
+ unquoter()(123)
+
+
+def test_quote_lowercase(quoter):
+ assert quoter()("%d1%84") == "%D1%84"
+
+
+def test_quote_unquoted(quoter):
+ assert quoter()("%41") == "A"
+
+
+def test_quote_space(quoter):
+ assert quoter()(" ") == "%20" # NULL
+
+
+# test to see if this would work to fix
+# coverage on this file.
+def test_quote_percent_last_character(quoter):
+ # % is last character in this case.
+ assert quoter()("%") == "%25"
+
+
+def test_unquote_unsafe(unquoter):
+ assert unquoter(unsafe="@")("%40") == "%40"
+
+
+def test_unquote_unsafe2(unquoter):
+ assert unquoter(unsafe="@")("%40abc") == "%40abc"
+
+
+def test_unquote_unsafe3(unquoter):
+ assert unquoter(qs=True)("a%2Bb=?%3D%2B%26") == "a%2Bb=?%3D%2B%26"
+
+
+def test_unquote_unsafe4(unquoter):
+ assert unquoter(unsafe="@")("a@b") == "a%40b"
+
+
+@pytest.mark.parametrize(
+ ("input", "expected"),
+ [
+ ("%e2%82", "%e2%82"),
+ ("%e2%82ac", "%e2%82ac"),
+ ("%e2%82%f8", "%e2%82%f8"),
+ ("%e2%82%2b", "%e2%82+"),
+ ("%e2%82%e2%82%ac", "%e2%82€"),
+ ("%e2%82%e2%82", "%e2%82%e2%82"),
+ ],
+)
+def test_unquote_non_utf8(unquoter, input, expected):
+ assert unquoter()(input) == expected
+
+
+def test_unquote_unsafe_non_utf8(unquoter):
+ assert unquoter(unsafe="\n")("%e2%82%0a") == "%e2%82%0A"
+
+
+def test_unquote_plus_non_utf8(unquoter):
+ assert unquoter(qs=True)("%e2%82%2b") == "%e2%82%2B"
+
+
+def test_quote_non_ascii(quoter):
+ assert quoter()("%F8") == "%F8"
+
+
+def test_quote_non_ascii2(quoter):
+ assert quoter()("a%F8b") == "a%F8b"
+
+
+def test_quote_percent_percent_encoded(quoter):
+ assert quoter()("%%3f") == "%25%3F"
+
+
+def test_quote_percent_digit_percent_encoded(quoter):
+ assert quoter()("%2%3f") == "%252%3F"
+
+
+def test_quote_percent_safe_percent_encoded(quoter):
+ assert quoter()("%x%3f") == "%25x%3F"
+
+
+def test_quote_percent_unsafe_percent_encoded(quoter):
+ assert quoter()("%#%3f") == "%25%23%3F"
+
+
+def test_quote_percent_non_ascii_percent_encoded(quoter):
+ assert quoter()("%ß%3f") == "%25%C3%9F%3F"
+
+
+def test_quote_percent_non_ascii2_percent_encoded(quoter):
+ assert quoter()("%€%3f") == "%25%E2%82%AC%3F"
+
+
+def test_quote_percent_non_ascii3_percent_encoded(quoter):
+ assert quoter()("%🐍%3f") == "%25%F0%9F%90%8D%3F"
+
+
+class StrLike(str):
+ pass
+
+
+def test_quote_str_like(quoter):
+ assert quoter()(StrLike("abc")) == "abc"
+
+
+def test_unquote_str_like(unquoter):
+ assert unquoter()(StrLike("abc")) == "abc"
+
+
+def test_quote_sub_delims(quoter):
+ assert quoter()("!$&'()*+,;=") == "!$&'()*+,;="
+
+
+def test_requote_sub_delims(quoter):
+ assert quoter()("%21%24%26%27%28%29%2A%2B%2C%3B%3D") == "!$&'()*+,;="
+
+
+def test_unquoting_plus(unquoter):
+ assert unquoter(qs=False)("a+b") == "a+b"
+
+
+def test_unquote_plus_to_space(unquoter):
+ assert unquoter(qs=True)("a+b") == "a b"
+
+
+def test_unquote_plus_to_space_unsafe(unquoter):
+ assert unquoter(unsafe="+", qs=True)("a+b") == "a+b"
+
+
+def test_quote_qs_with_colon(quoter):
+ s = quoter(safe="=+&?/:@", qs=True)("next=http%3A//example.com/")
+ assert s == "next=http://example.com/"
+
+
+def test_quote_protected(quoter):
+ s = quoter(protected="/")("/path%2fto/three")
+ assert s == "/path%2Fto/three"
+
+
+def test_quote_fastpath_safe(quoter):
+ s1 = "/path/to"
+ s2 = quoter(safe="/")(s1)
+ assert s1 is s2
+
+
+def test_quote_fastpath_pct(quoter):
+ s1 = "abc%A0"
+ s2 = quoter()(s1)
+ assert s1 is s2
+
+
+def test_quote_very_large_string(quoter):
+ # more than 8 KiB
+ s = "abcфух%30%0a" * 1024
+ assert quoter()(s) == "abc%D1%84%D1%83%D1%850%0A" * 1024
+
+
+def test_space(quoter):
+ s = "% A"
+ assert quoter()(s) == "%25%20A"
+
+
+def test_quoter_path_with_plus(quoter):
+ s = "/test/x+y%2Bz/:+%2B/"
+ assert "/test/x+y%2Bz/:+%2B/" == quoter(safe="@:", protected="/+")(s)
+
+
+def test_unquoter_path_with_plus(unquoter):
+ s = "/test/x+y%2Bz/:+%2B/"
+ assert "/test/x+y+z/:++/" == unquoter(unsafe="+")(s)