aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/yarl/tests/test_quoting.py
blob: d9b6ae8e4bf471cfb52bf7f8f0dd7469a9340d92 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
import pytest

from yarl._quoting import NO_EXTENSIONS
from yarl._quoting_py import _Quoter as _PyQuoter
from yarl._quoting_py import _Unquoter as _PyUnquoter

if not NO_EXTENSIONS:
    from yarl._quoting_c import _Quoter as _CQuoter
    from yarl._quoting_c import _Unquoter as _CUnquoter

    @pytest.fixture(params=[_PyQuoter, _CQuoter], ids=["py_quoter", "c_quoter"])
    def quoter(request):
        return request.param

    @pytest.fixture(params=[_PyUnquoter, _CUnquoter], ids=["py_unquoter", "c_unquoter"])
    def unquoter(request):
        return request.param

else:

    @pytest.fixture(params=[_PyQuoter], ids=["py_quoter"])
    def quoter(request):
        return request.param

    @pytest.fixture(params=[_PyUnquoter], ids=["py_unquoter"])
    def unquoter(request):
        return request.param


def hexescape(char):
    """Escape char as RFC 2396 specifies"""
    hex_repr = hex(ord(char))[2:].upper()
    if len(hex_repr) == 1:
        hex_repr = "0%s" % hex_repr
    return "%" + hex_repr


def test_quote_not_allowed_non_strict(quoter):
    assert quoter()("%HH") == "%25HH"


def test_quote_unfinished_tail_percent_non_strict(quoter):
    assert quoter()("%") == "%25"


def test_quote_unfinished_tail_digit_non_strict(quoter):
    assert quoter()("%2") == "%252"


def test_quote_unfinished_tail_safe_non_strict(quoter):
    assert quoter()("%x") == "%25x"


def test_quote_unfinished_tail_unsafe_non_strict(quoter):
    assert quoter()("%#") == "%25%23"


def test_quote_unfinished_tail_non_ascii_non_strict(quoter):
    assert quoter()("%ß") == "%25%C3%9F"


def test_quote_unfinished_tail_non_ascii2_non_strict(quoter):
    assert quoter()("%€") == "%25%E2%82%AC"


def test_quote_unfinished_tail_non_ascii3_non_strict(quoter):
    assert quoter()("%🐍") == "%25%F0%9F%90%8D"


def test_quote_from_bytes(quoter):
    assert quoter()("archaeological arcana") == "archaeological%20arcana"
    assert quoter()("") == ""


def test_quote_ignore_broken_unicode(quoter):
    s = quoter()(
        "j\u001a\udcf4q\udcda/\udc97g\udcee\udccb\u000ch\udccb"
        "\u0018\udce4v\u001b\udce2\udcce\udccecom/y\udccepj\u0016"
    )

    assert s == "j%1Aq%2Fg%0Ch%18v%1Bcom%2Fypj%16"
    assert quoter()(s) == s


def test_unquote_to_bytes(unquoter):
    assert unquoter()("abc%20def") == "abc def"
    assert unquoter()("") == ""


def test_never_quote(quoter):
    # Make sure quote() does not quote letters, digits, and "_,.-~"
    do_not_quote = (
        "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "_.-~"
    )
    assert quoter()(do_not_quote) == do_not_quote
    assert quoter(qs=True)(do_not_quote) == do_not_quote


def test_safe(quoter):
    # Test setting 'safe' parameter does what it should do
    quote_by_default = "<>"
    assert quoter(safe=quote_by_default)(quote_by_default) == quote_by_default

    ret = quoter(safe=quote_by_default, qs=True)(quote_by_default)
    assert ret == quote_by_default


_SHOULD_QUOTE = [chr(num) for num in range(32)]
_SHOULD_QUOTE.append(r'<>#"{}|\^[]`')
_SHOULD_QUOTE.append(chr(127))  # For 0x7F
SHOULD_QUOTE = "".join(_SHOULD_QUOTE)


@pytest.mark.parametrize("char", SHOULD_QUOTE)
def test_default_quoting(char, quoter):
    # Make sure all characters that should be quoted are by default sans
    # space (separate test for that).
    result = quoter()(char)
    assert hexescape(char) == result
    result = quoter(qs=True)(char)
    assert hexescape(char) == result


# TODO: should it encode percent?
def test_default_quoting_percent(quoter):
    result = quoter()("%25")
    assert "%25" == result
    result = quoter(qs=True)("%25")
    assert "%25" == result
    result = quoter(requote=False)("%25")
    assert "%2525" == result


def test_default_quoting_partial(quoter):
    partial_quote = "ab[]cd"
    expected = "ab%5B%5Dcd"
    result = quoter()(partial_quote)
    assert expected == result
    result = quoter(qs=True)(partial_quote)
    assert expected == result


def test_quoting_space(quoter):
    # Make sure quote() and quote_plus() handle spaces as specified in
    # their unique way
    result = quoter()(" ")
    assert result == hexescape(" ")
    result = quoter(qs=True)(" ")
    assert result == "+"

    given = "a b cd e f"
    expect = given.replace(" ", hexescape(" "))
    result = quoter()(given)
    assert expect == result
    expect = given.replace(" ", "+")
    result = quoter(qs=True)(given)
    assert expect == result


def test_quoting_plus(quoter):
    assert quoter(qs=False)("alpha+beta gamma") == "alpha+beta%20gamma"
    assert quoter(qs=True)("alpha+beta gamma") == "alpha%2Bbeta+gamma"
    assert quoter(safe="+", qs=True)("alpha+beta gamma") == "alpha+beta+gamma"


def test_quote_with_unicode(quoter):
    # Characters in Latin-1 range, encoded by default in UTF-8
    given = "\u00a2\u00d8ab\u00ff"
    expect = "%C2%A2%C3%98ab%C3%BF"
    result = quoter()(given)
    assert expect == result
    # Characters in BMP, encoded by default in UTF-8
    given = "\u6f22\u5b57"  # "Kanji"
    expect = "%E6%BC%A2%E5%AD%97"
    result = quoter()(given)
    assert expect == result


def test_quote_plus_with_unicode(quoter):
    # Characters in Latin-1 range, encoded by default in UTF-8
    given = "\u00a2\u00d8ab\u00ff"
    expect = "%C2%A2%C3%98ab%C3%BF"
    result = quoter(qs=True)(given)
    assert expect == result
    # Characters in BMP, encoded by default in UTF-8
    given = "\u6f22\u5b57"  # "Kanji"
    expect = "%E6%BC%A2%E5%AD%97"
    result = quoter(qs=True)(given)
    assert expect == result


@pytest.mark.parametrize("num", list(range(128)))
def test_unquoting(num, unquoter):
    # Make sure unquoting of all ASCII values works
    given = hexescape(chr(num))
    expect = chr(num)
    result = unquoter()(given)
    assert expect == result
    if expect not in "+=&;":
        result = unquoter(qs=True)(given)
        assert expect == result


# Expected value should be the same as given.
# See https://url.spec.whatwg.org/#percent-encoded-bytes
@pytest.mark.parametrize(
    ("input", "expected"),
    [
        ("%", "%"),
        ("%2", "%2"),
        ("%x", "%x"),
        ("%€", "%€"),
        ("%2x", "%2x"),
        ("%2 ", "%2 "),
        ("% 2", "% 2"),
        ("%xa", "%xa"),
        ("%%", "%%"),
        ("%%3f", "%?"),
        ("%2%", "%2%"),
        ("%2%3f", "%2?"),
        ("%x%3f", "%x?"),
        ("%€%3f", "%€?"),
    ],
)
def test_unquoting_bad_percent_escapes(unquoter, input, expected):
    assert unquoter()(input) == expected


@pytest.mark.xfail(
    reason="""
    FIXME: After conversion to bytes, should not cause UTF-8 decode fail.
    See https://url.spec.whatwg.org/#percent-encoded-bytes

    Refs:
    * https://github.com/aio-libs/yarl/pull/216
    * https://github.com/aio-libs/yarl/pull/214
    * https://github.com/aio-libs/yarl/pull/7
    """,
)
@pytest.mark.parametrize("urlencoded_string", ("%AB", "%AB%AB"))
def test_unquoting_invalid_utf8_sequence(unquoter, urlencoded_string):
    with pytest.raises(ValueError):
        unquoter()(urlencoded_string)


def test_unquoting_mixed_case_percent_escapes(unquoter):
    expected = "𝕦"
    assert expected == unquoter()("%F0%9D%95%A6")
    assert expected == unquoter()("%F0%9d%95%a6")
    assert expected == unquoter()("%f0%9D%95%a6")
    assert expected == unquoter()("%f0%9d%95%a6")


def test_unquoting_parts(unquoter):
    # Make sure unquoting works when have non-quoted characters
    # interspersed
    given = "ab" + hexescape("c") + "d"
    expect = "abcd"
    result = unquoter()(given)
    assert expect == result
    result = unquoter(qs=True)(given)
    assert expect == result


def test_quote_None(quoter):
    assert quoter()(None) is None


def test_unquote_None(unquoter):
    assert unquoter()(None) is None


def test_quote_empty_string(quoter):
    assert quoter()("") == ""


def test_unquote_empty_string(unquoter):
    assert unquoter()("") == ""


def test_quote_bad_types(quoter):
    with pytest.raises(TypeError):
        quoter()(123)


def test_unquote_bad_types(unquoter):
    with pytest.raises(TypeError):
        unquoter()(123)


def test_quote_lowercase(quoter):
    assert quoter()("%d1%84") == "%D1%84"


def test_quote_unquoted(quoter):
    assert quoter()("%41") == "A"


def test_quote_space(quoter):
    assert quoter()(" ") == "%20"  # NULL


# test to see if this would work to fix
# coverage on this file.
def test_quote_percent_last_character(quoter):
    # % is last character in this case.
    assert quoter()("%") == "%25"


def test_unquote_unsafe(unquoter):
    assert unquoter(unsafe="@")("%40") == "%40"


def test_unquote_unsafe2(unquoter):
    assert unquoter(unsafe="@")("%40abc") == "%40abc"


def test_unquote_unsafe3(unquoter):
    assert unquoter(qs=True)("a%2Bb=?%3D%2B%26") == "a%2Bb=?%3D%2B%26"


def test_unquote_unsafe4(unquoter):
    assert unquoter(unsafe="@")("a@b") == "a%40b"


@pytest.mark.parametrize(
    ("input", "expected"),
    [
        ("%e2%82", "%e2%82"),
        ("%e2%82ac", "%e2%82ac"),
        ("%e2%82%f8", "%e2%82%f8"),
        ("%e2%82%2b", "%e2%82+"),
        ("%e2%82%e2%82%ac", "%e2%82€"),
        ("%e2%82%e2%82", "%e2%82%e2%82"),
    ],
)
def test_unquote_non_utf8(unquoter, input, expected):
    assert unquoter()(input) == expected


def test_unquote_unsafe_non_utf8(unquoter):
    assert unquoter(unsafe="\n")("%e2%82%0a") == "%e2%82%0A"


def test_unquote_plus_non_utf8(unquoter):
    assert unquoter(qs=True)("%e2%82%2b") == "%e2%82%2B"


def test_quote_non_ascii(quoter):
    assert quoter()("%F8") == "%F8"


def test_quote_non_ascii2(quoter):
    assert quoter()("a%F8b") == "a%F8b"


def test_quote_percent_percent_encoded(quoter):
    assert quoter()("%%3f") == "%25%3F"


def test_quote_percent_digit_percent_encoded(quoter):
    assert quoter()("%2%3f") == "%252%3F"


def test_quote_percent_safe_percent_encoded(quoter):
    assert quoter()("%x%3f") == "%25x%3F"


def test_quote_percent_unsafe_percent_encoded(quoter):
    assert quoter()("%#%3f") == "%25%23%3F"


def test_quote_percent_non_ascii_percent_encoded(quoter):
    assert quoter()("%ß%3f") == "%25%C3%9F%3F"


def test_quote_percent_non_ascii2_percent_encoded(quoter):
    assert quoter()("%€%3f") == "%25%E2%82%AC%3F"


def test_quote_percent_non_ascii3_percent_encoded(quoter):
    assert quoter()("%🐍%3f") == "%25%F0%9F%90%8D%3F"


class StrLike(str):
    pass


def test_quote_str_like(quoter):
    assert quoter()(StrLike("abc")) == "abc"


def test_unquote_str_like(unquoter):
    assert unquoter()(StrLike("abc")) == "abc"


def test_quote_sub_delims(quoter):
    assert quoter()("!$&'()*+,;=") == "!$&'()*+,;="


def test_requote_sub_delims(quoter):
    assert quoter()("%21%24%26%27%28%29%2A%2B%2C%3B%3D") == "!$&'()*+,;="


def test_unquoting_plus(unquoter):
    assert unquoter(qs=False)("a+b") == "a+b"


def test_unquote_plus_to_space(unquoter):
    assert unquoter(qs=True)("a+b") == "a b"


def test_unquote_plus_to_space_unsafe(unquoter):
    assert unquoter(unsafe="+", qs=True)("a+b") == "a+b"


def test_quote_qs_with_colon(quoter):
    s = quoter(safe="=+&?/:@", qs=True)("next=http%3A//example.com/")
    assert s == "next=http://example.com/"


def test_quote_protected(quoter):
    s = quoter(protected="/")("/path%2fto/three")
    assert s == "/path%2Fto/three"


def test_quote_fastpath_safe(quoter):
    s1 = "/path/to"
    s2 = quoter(safe="/")(s1)
    assert s1 is s2


def test_quote_fastpath_pct(quoter):
    s1 = "abc%A0"
    s2 = quoter()(s1)
    assert s1 is s2


def test_quote_very_large_string(quoter):
    # more than 8 KiB
    s = "abcфух%30%0a" * 1024
    assert quoter()(s) == "abc%D1%84%D1%83%D1%850%0A" * 1024


def test_space(quoter):
    s = "% A"
    assert quoter()(s) == "%25%20A"


def test_quoter_path_with_plus(quoter):
    s = "/test/x+y%2Bz/:+%2B/"
    assert "/test/x+y%2Bz/:+%2B/" == quoter(safe="@:", protected="/+")(s)


def test_unquoter_path_with_plus(unquoter):
    s = "/test/x+y%2Bz/:+%2B/"
    assert "/test/x+y+z/:++/" == unquoter(unsafe="+")(s)