aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/yarl/tests/test_url_query.py
blob: bcd2433cbcc062a5b1de43531a16ed757a0ee03c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
from typing import List, Tuple
from urllib.parse import parse_qs, urlencode

import pytest
from multidict import MultiDict, MultiDictProxy

from yarl import URL

# ========================================
# Basic chars in query values
# ========================================

URLS_WITH_BASIC_QUERY_VALUES: List[Tuple[URL, MultiDict]] = [
    # Empty strings, keys and values
    (
        URL("http://example.com"),
        MultiDict(),
    ),
    (
        URL("http://example.com?a="),
        MultiDict([("a", "")]),
    ),
    # ASCII chars
    (
        URL("http://example.com?a+b=c+d"),
        MultiDict({"a b": "c d"}),
    ),
    (
        URL("http://example.com?a=1&b=2"),
        MultiDict([("a", "1"), ("b", "2")]),
    ),
    (
        URL("http://example.com?a=1&b=2&a=3"),
        MultiDict([("a", "1"), ("b", "2"), ("a", "3")]),
    ),
    # Non-ASCI BMP chars
    (
        URL("http://example.com?ключ=знач"),
        MultiDict({"ключ": "знач"}),
    ),
    (
        URL("http://example.com?foo=ᴜɴɪᴄᴏᴅᴇ"),
        MultiDict({"foo": "ᴜɴɪᴄᴏᴅᴇ"}),
    ),
    # Non-BMP chars
    (
        URL("http://example.com?bar=𝕦𝕟𝕚𝕔𝕠𝕕𝕖"),
        MultiDict({"bar": "𝕦𝕟𝕚𝕔𝕠𝕕𝕖"}),
    ),
]


@pytest.mark.parametrize(
    "original_url, expected_query",
    URLS_WITH_BASIC_QUERY_VALUES,
)
def test_query_basic_parsing(original_url, expected_query):
    assert isinstance(original_url.query, MultiDictProxy)
    assert original_url.query == expected_query


@pytest.mark.parametrize(
    "original_url, expected_query",
    URLS_WITH_BASIC_QUERY_VALUES,
)
def test_query_basic_update_query(original_url, expected_query):
    new_url = original_url.update_query({})
    assert new_url == original_url


def test_query_dont_unqoute_twice():
    sample_url = "http://base.place?" + urlencode({"a": "/////"})
    query = urlencode({"url": sample_url})
    full_url = "http://test_url.aha?" + query

    url = URL(full_url)
    assert url.query["url"] == sample_url


# ========================================
# Reserved chars in query values
# ========================================

# See https://github.com/python/cpython#87133, which introduced a new
# `separator` keyword argument to `urllib.parse.parse_qs` (among others).
# If the name doesn't exist as a variable in the function bytecode, the
# test is expected to fail.
_SEMICOLON_XFAIL = pytest.mark.xfail(
    condition="separator" not in parse_qs.__code__.co_varnames,
    reason=(
        "Python versions < 3.7.10, < 3.8.8 and < 3.9.2 lack a fix for "
        'CVE-2021-23336 dropping ";" as a valid query parameter separator, '
        "making this test fail."
    ),
    strict=True,
)


URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES = [
    # Ampersand
    (URL("http://127.0.0.1/?a=10&b=20"), 2, "10"),
    (URL("http://127.0.0.1/?a=10%26b=20"), 1, "10&b=20"),
    (URL("http://127.0.0.1/?a=10%3Bb=20"), 1, "10;b=20"),
    # Semicolon, which is *not* a query parameter separator as of RFC3986
    (URL("http://127.0.0.1/?a=10;b=20"), 1, "10;b=20"),
    (URL("http://127.0.0.1/?a=10%26b=20"), 1, "10&b=20"),
    (URL("http://127.0.0.1/?a=10%3Bb=20"), 1, "10;b=20"),
]
URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL = [
    # Ampersand
    *URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[:3],
    # Semicolon, which is *not* a query parameter separator as of RFC3986
    # Mark the first of these as expecting to fail on old Python patch releases.
    pytest.param(*URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[3], marks=_SEMICOLON_XFAIL),
    *URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[4:],
]


@pytest.mark.parametrize(
    "original_url, expected_query_len, expected_value_a",
    URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL,
)
def test_query_separators_from_parsing(
    original_url,
    expected_query_len,
    expected_value_a,
):
    assert len(original_url.query) == expected_query_len
    assert original_url.query["a"] == expected_value_a


@pytest.mark.parametrize(
    "original_url, expected_query_len, expected_value_a",
    URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL,
)
def test_query_separators_from_update_query(
    original_url,
    expected_query_len,
    expected_value_a,
):
    new_url = original_url.update_query({"c": expected_value_a})
    assert new_url.query["a"] == expected_value_a
    assert new_url.query["c"] == expected_value_a


@pytest.mark.parametrize(
    "original_url, expected_query_len, expected_value_a",
    URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES,
)
def test_query_separators_from_with_query(
    original_url,
    expected_query_len,
    expected_value_a,
):
    new_url = original_url.with_query({"c": expected_value_a})
    assert new_url.query["c"] == expected_value_a


@pytest.mark.parametrize(
    "original_url, expected_query_len, expected_value_a",
    URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES,
)
def test_query_from_empty_update_query(
    original_url,
    expected_query_len,
    expected_value_a,
):
    new_url = original_url.update_query({})

    assert new_url.query["a"] == original_url.query["a"]

    if "b" in original_url.query:
        assert new_url.query["b"] == original_url.query["b"]