aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils/url/url_ut.cpp
blob: 1588013893995063a47e8c1dccd850e17d5595c0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
#include "url.h"

#include <util/string/cast.h>

#include <library/cpp/testing/unittest/registar.h>

Y_UNIT_TEST_SUITE(TUtilUrlTest) {
    Y_UNIT_TEST(TestGetHostAndGetHostAndPort) {
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru:8080"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru/bebe:8080"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe:8080"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru:8080/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("https://ya.ru:8080/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetHost("www.ya.ru:8080/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetHost("https://www.ya.ru:8080/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080/bebe"));
        // irl RFC3986 sometimes gets ignored
        UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHost("pravda-kmv.ru?page=news&id=6973"));
        UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHostAndPort("pravda-kmv.ru?page=news&id=6973"));
        // check simple string
        UNIT_ASSERT_VALUES_EQUAL("some_blender_url", GetHost("some_blender_url"));
        UNIT_ASSERT_VALUES_EQUAL("", GetHost(""));
    }

    Y_UNIT_TEST(TestGetPathAndQuery) {
        UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org"));
        UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/"));
        UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org:8080"));
        UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("ru.wikipedia.org/index.php?123/"));
        UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("http://ru.wikipedia.org:8080"));
        UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("https://ru.wikipedia.org/index.php?123/"));
        UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/#comment"));
        UNIT_ASSERT_VALUES_EQUAL("/?1", GetPathAndQuery("ru.wikipedia.org/?1#comment"));
        UNIT_ASSERT_VALUES_EQUAL("/?1#comment", GetPathAndQuery("ru.wikipedia.org/?1#comment", false));
    }

    Y_UNIT_TEST(TestGetDomain) {
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("www.ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("a.b.ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya", GetDomain("ya"));
        UNIT_ASSERT_VALUES_EQUAL("", GetDomain(""));
    }

    Y_UNIT_TEST(TestGetParentDomain) {
        UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("www.ya.ru", 0));
        UNIT_ASSERT_VALUES_EQUAL("ru", GetParentDomain("www.ya.ru", 1));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetParentDomain("www.ya.ru", 2));
        UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 3));
        UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 4));
        UNIT_ASSERT_VALUES_EQUAL("com", GetParentDomain("ya.com", 1));
        UNIT_ASSERT_VALUES_EQUAL("ya.com", GetParentDomain("ya.com", 2));
        UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 1));
        UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 2));
        UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 0));
        UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 1));
    }

    Y_UNIT_TEST(TestGetZone) {
        UNIT_ASSERT_VALUES_EQUAL("ru", GetZone("www.ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("com", GetZone("ya.com"));
        UNIT_ASSERT_VALUES_EQUAL("RU", GetZone("RU"));
        UNIT_ASSERT_VALUES_EQUAL("FHFBN", GetZone("ya.FHFBN"));
        UNIT_ASSERT_VALUES_EQUAL("", GetZone(""));
    }

    Y_UNIT_TEST(TestAddSchemePrefix) {
        UNIT_ASSERT_VALUES_EQUAL("http://yandex.ru", AddSchemePrefix("yandex.ru"));
        UNIT_ASSERT_VALUES_EQUAL("http://yandex.ru", AddSchemePrefix("http://yandex.ru"));
        UNIT_ASSERT_VALUES_EQUAL("https://yandex.ru", AddSchemePrefix("https://yandex.ru"));
        UNIT_ASSERT_VALUES_EQUAL("file://yandex.ru", AddSchemePrefix("file://yandex.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ftp://ya.ru", AddSchemePrefix("ya.ru", "ftp"));
    }

    Y_UNIT_TEST(TestSchemeGet) {
        UNIT_ASSERT_VALUES_EQUAL("http://", GetSchemePrefix("http://ya.ru/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("", GetSchemePrefix("yaru"));
        UNIT_ASSERT_VALUES_EQUAL("yaru://", GetSchemePrefix("yaru://ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("", GetSchemePrefix("ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("ftp://", GetSchemePrefix("ftp://ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("https://", GetSchemePrefix("https://")); // is that right?
    }

    Y_UNIT_TEST(TestSchemeCut) {
        UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutSchemePrefix("http://ya.ru/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("yaru", CutSchemePrefix("yaru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("yaru://ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("ftp://ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("", CutSchemePrefix("https://")); // is that right?

        UNIT_ASSERT_VALUES_EQUAL("ftp://ya.ru", CutHttpPrefix("ftp://ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("http://ya.ru/zzz"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("http://ya.ru/zzz", true));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("https://ya.ru/zzz"));
        UNIT_ASSERT_VALUES_EQUAL("https://ya.ru/zzz", CutHttpPrefix("https://ya.ru/zzz", true));
        UNIT_ASSERT_VALUES_EQUAL("", CutHttpPrefix("https://"));               // is that right?
        UNIT_ASSERT_VALUES_EQUAL("https://", CutHttpPrefix("https://", true)); // is that right?
    }

    Y_UNIT_TEST(TestMisc) {
        UNIT_ASSERT_VALUES_EQUAL("", CutWWWPrefix("www."));
        UNIT_ASSERT_VALUES_EQUAL("", CutWWWPrefix("WwW."));
        UNIT_ASSERT_VALUES_EQUAL("www", CutWWWPrefix("www"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWPrefix("www.ya.ru"));

        UNIT_ASSERT_VALUES_EQUAL("", CutWWWNumberedPrefix("www."));
        UNIT_ASSERT_VALUES_EQUAL("www", CutWWWNumberedPrefix("www"));
        UNIT_ASSERT_VALUES_EQUAL("www27", CutWWWNumberedPrefix("www27"));
        UNIT_ASSERT_VALUES_EQUAL("", CutWWWNumberedPrefix("www27."));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www.ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www2.ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www12.ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("ww2.ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("w1w2w3.ya.ru", CutWWWNumberedPrefix("w1w2w3.ya.ru"));
        UNIT_ASSERT_VALUES_EQUAL("123.ya.ru", CutWWWNumberedPrefix("123.ya.ru"));

        UNIT_ASSERT_VALUES_EQUAL("", CutMPrefix("m."));
        UNIT_ASSERT_VALUES_EQUAL("", CutMPrefix("M."));
        UNIT_ASSERT_VALUES_EQUAL("m", CutMPrefix("m"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutMPrefix("m.ya.ru"));
    }

    Y_UNIT_TEST(TestSplitUrlToHostAndPath) {
        TStringBuf host, path;

        SplitUrlToHostAndPath("https://yandex.ru/yandsearch", host, path);
        UNIT_ASSERT_STRINGS_EQUAL(host, "https://yandex.ru");
        UNIT_ASSERT_STRINGS_EQUAL(path, "/yandsearch");

        SplitUrlToHostAndPath("yandex.ru/yandsearch", host, path);
        UNIT_ASSERT_STRINGS_EQUAL(host, "yandex.ru");
        UNIT_ASSERT_STRINGS_EQUAL(path, "/yandsearch");

        SplitUrlToHostAndPath("https://yandex.ru", host, path);
        UNIT_ASSERT_STRINGS_EQUAL(host, "https://yandex.ru");
        UNIT_ASSERT_STRINGS_EQUAL(path, "");

        SplitUrlToHostAndPath("invalid url /", host, path);
        UNIT_ASSERT_STRINGS_EQUAL(host, "invalid url ");
        UNIT_ASSERT_STRINGS_EQUAL(path, "/");

        SplitUrlToHostAndPath("some_blender_url", host, path);
        UNIT_ASSERT_STRINGS_EQUAL(host, "some_blender_url");
        UNIT_ASSERT_STRINGS_EQUAL(path, "");
    }

    Y_UNIT_TEST(TestSeparateUrlFromQueryAndFragment) {
        TStringBuf sanitizedUrl, query, fragment;

        SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch", sanitizedUrl, query, fragment);
        UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch");
        UNIT_ASSERT_STRINGS_EQUAL(query, "");
        UNIT_ASSERT_STRINGS_EQUAL(fragment, "");

        SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch?param1=val1&param2=val2", sanitizedUrl, query, fragment);
        UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch");
        UNIT_ASSERT_STRINGS_EQUAL(query, "param1=val1&param2=val2");
        UNIT_ASSERT_STRINGS_EQUAL(fragment, "");

        SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch#fragment", sanitizedUrl, query, fragment);
        UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch");
        UNIT_ASSERT_STRINGS_EQUAL(query, "");
        UNIT_ASSERT_STRINGS_EQUAL(fragment, "fragment");

        SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch?param1=val1&param2=val2#fragment", sanitizedUrl, query, fragment);
        UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch");
        UNIT_ASSERT_STRINGS_EQUAL(query, "param1=val1&param2=val2");
        UNIT_ASSERT_STRINGS_EQUAL(fragment, "fragment");
    }

    Y_UNIT_TEST(TestGetSchemeHostAndPort) {
        { // all components are present
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("https://ya.ru:8080/bebe", scheme, host, port);
            UNIT_ASSERT_VALUES_EQUAL(scheme, "https://");
            UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
            UNIT_ASSERT_VALUES_EQUAL(port, 8080);
        }
        { // scheme is abset
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("ya.ru:8080/bebe", scheme, host, port);
            UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown");
            UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
            UNIT_ASSERT_VALUES_EQUAL(port, 8080);
        }
        { // scheme and port are absent
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("ya.ru/bebe", scheme, host, port);
            UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown");
            UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
            UNIT_ASSERT_VALUES_EQUAL(port, 0);
        }
        { // port is absent, but returned its default value for HTTP
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("http://ya.ru/bebe", scheme, host, port);
            UNIT_ASSERT_VALUES_EQUAL(scheme, "http://");
            UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
            UNIT_ASSERT_VALUES_EQUAL(port, 80);
        }
        { // port is absent, but returned its default value for HTTPS
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("https://ya.ru/bebe", scheme, host, port);
            UNIT_ASSERT_VALUES_EQUAL(scheme, "https://");
            UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
            UNIT_ASSERT_VALUES_EQUAL(port, 443);
        }
        { // ipv6
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("https://[1080:0:0:0:8:800:200C:417A]:443/bebe", scheme, host, port);
            UNIT_ASSERT_VALUES_EQUAL(scheme, "https://");
            UNIT_ASSERT_VALUES_EQUAL(host, "[1080:0:0:0:8:800:200C:417A]");
            UNIT_ASSERT_VALUES_EQUAL(port, 443);
        }
        { // ipv6
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("[::1]/bebe", scheme, host, port);
            UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown");
            UNIT_ASSERT_VALUES_EQUAL(host, "[::1]");
            UNIT_ASSERT_VALUES_EQUAL(port, 0);
        }
        { // ipv6
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("unknown:///bebe", scheme, host, port);
            UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown://");
            UNIT_ASSERT_VALUES_EQUAL(host, "");
            UNIT_ASSERT_VALUES_EQUAL(port, 0);
        }
        // port overflow
        auto testCase = []() {
            TStringBuf scheme("unknown"), host("unknown");
            ui16 port = 0;
            GetSchemeHostAndPort("https://ya.ru:65536/bebe", scheme, host, port);
        };
        UNIT_ASSERT_EXCEPTION(testCase(), yexception);
    }

    Y_UNIT_TEST(TestCutUrlPrefixes) {
        UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutUrlPrefixes("http://ya.ru/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("yaru", CutUrlPrefixes("yaru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("yaru://ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ftp://ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("", CutUrlPrefixes("https://"));

        UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutUrlPrefixes("https://www.ya.ru/bebe"));
        UNIT_ASSERT_VALUES_EQUAL("yaru", CutUrlPrefixes("www.yaru"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("yaru://www.ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("www.ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ftp://www.ya.ru://zzz"));
        UNIT_ASSERT_VALUES_EQUAL("", CutUrlPrefixes("http://www."));
    }

    Y_UNIT_TEST(TestUrlPathStartWithToken) {
        UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe/zzz", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe?zzz", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe/", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe?", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("https://ya.ru/bebe", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/bebezzz", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/bebe.zzz", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://bebe", "bebe"));
        UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("https://bebe/", "bebe"));
    }
}