diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/string_utils/url/url_ut.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/string_utils/url/url_ut.cpp')
-rw-r--r-- | library/cpp/string_utils/url/url_ut.cpp | 281 |
1 files changed, 281 insertions, 0 deletions
diff --git a/library/cpp/string_utils/url/url_ut.cpp b/library/cpp/string_utils/url/url_ut.cpp new file mode 100644 index 0000000000..1588013893 --- /dev/null +++ b/library/cpp/string_utils/url/url_ut.cpp @@ -0,0 +1,281 @@ +#include "url.h" + +#include <util/string/cast.h> + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TUtilUrlTest) { + Y_UNIT_TEST(TestGetHostAndGetHostAndPort) { + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru/bebe")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru:8080")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru/bebe:8080")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe:8080")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru:8080/bebe")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("https://ya.ru:8080/bebe")); + UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetHost("www.ya.ru:8080/bebe")); + UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetHost("https://www.ya.ru:8080/bebe")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080/bebe")); + // irl RFC3986 sometimes gets ignored + UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHost("pravda-kmv.ru?page=news&id=6973")); + UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHostAndPort("pravda-kmv.ru?page=news&id=6973")); + // check simple string + UNIT_ASSERT_VALUES_EQUAL("some_blender_url", GetHost("some_blender_url")); + UNIT_ASSERT_VALUES_EQUAL("", GetHost("")); + } + + Y_UNIT_TEST(TestGetPathAndQuery) { + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org")); + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/")); + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org:8080")); + UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("ru.wikipedia.org/index.php?123/")); + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("http://ru.wikipedia.org:8080")); + UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("https://ru.wikipedia.org/index.php?123/")); + UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/#comment")); + UNIT_ASSERT_VALUES_EQUAL("/?1", GetPathAndQuery("ru.wikipedia.org/?1#comment")); + UNIT_ASSERT_VALUES_EQUAL("/?1#comment", GetPathAndQuery("ru.wikipedia.org/?1#comment", false)); + } + + Y_UNIT_TEST(TestGetDomain) { + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("www.ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("a.b.ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya", GetDomain("ya")); + UNIT_ASSERT_VALUES_EQUAL("", GetDomain("")); + } + + Y_UNIT_TEST(TestGetParentDomain) { + UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("www.ya.ru", 0)); + UNIT_ASSERT_VALUES_EQUAL("ru", GetParentDomain("www.ya.ru", 1)); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetParentDomain("www.ya.ru", 2)); + UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 3)); + UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 4)); + UNIT_ASSERT_VALUES_EQUAL("com", GetParentDomain("ya.com", 1)); + UNIT_ASSERT_VALUES_EQUAL("ya.com", GetParentDomain("ya.com", 2)); + UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 1)); + UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 2)); + UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 0)); + UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 1)); + } + + Y_UNIT_TEST(TestGetZone) { + UNIT_ASSERT_VALUES_EQUAL("ru", GetZone("www.ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("com", GetZone("ya.com")); + UNIT_ASSERT_VALUES_EQUAL("RU", GetZone("RU")); + UNIT_ASSERT_VALUES_EQUAL("FHFBN", GetZone("ya.FHFBN")); + UNIT_ASSERT_VALUES_EQUAL("", GetZone("")); + } + + Y_UNIT_TEST(TestAddSchemePrefix) { + UNIT_ASSERT_VALUES_EQUAL("http://yandex.ru", AddSchemePrefix("yandex.ru")); + UNIT_ASSERT_VALUES_EQUAL("http://yandex.ru", AddSchemePrefix("http://yandex.ru")); + UNIT_ASSERT_VALUES_EQUAL("https://yandex.ru", AddSchemePrefix("https://yandex.ru")); + UNIT_ASSERT_VALUES_EQUAL("file://yandex.ru", AddSchemePrefix("file://yandex.ru")); + UNIT_ASSERT_VALUES_EQUAL("ftp://ya.ru", AddSchemePrefix("ya.ru", "ftp")); + } + + Y_UNIT_TEST(TestSchemeGet) { + UNIT_ASSERT_VALUES_EQUAL("http://", GetSchemePrefix("http://ya.ru/bebe")); + UNIT_ASSERT_VALUES_EQUAL("", GetSchemePrefix("yaru")); + UNIT_ASSERT_VALUES_EQUAL("yaru://", GetSchemePrefix("yaru://ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("", GetSchemePrefix("ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("ftp://", GetSchemePrefix("ftp://ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("https://", GetSchemePrefix("https://")); // is that right? + } + + Y_UNIT_TEST(TestSchemeCut) { + UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutSchemePrefix("http://ya.ru/bebe")); + UNIT_ASSERT_VALUES_EQUAL("yaru", CutSchemePrefix("yaru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("yaru://ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("ftp://ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("", CutSchemePrefix("https://")); // is that right? + + UNIT_ASSERT_VALUES_EQUAL("ftp://ya.ru", CutHttpPrefix("ftp://ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("http://ya.ru/zzz")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("http://ya.ru/zzz", true)); + UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("https://ya.ru/zzz")); + UNIT_ASSERT_VALUES_EQUAL("https://ya.ru/zzz", CutHttpPrefix("https://ya.ru/zzz", true)); + UNIT_ASSERT_VALUES_EQUAL("", CutHttpPrefix("https://")); // is that right? + UNIT_ASSERT_VALUES_EQUAL("https://", CutHttpPrefix("https://", true)); // is that right? + } + + Y_UNIT_TEST(TestMisc) { + UNIT_ASSERT_VALUES_EQUAL("", CutWWWPrefix("www.")); + UNIT_ASSERT_VALUES_EQUAL("", CutWWWPrefix("WwW.")); + UNIT_ASSERT_VALUES_EQUAL("www", CutWWWPrefix("www")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWPrefix("www.ya.ru")); + + UNIT_ASSERT_VALUES_EQUAL("", CutWWWNumberedPrefix("www.")); + UNIT_ASSERT_VALUES_EQUAL("www", CutWWWNumberedPrefix("www")); + UNIT_ASSERT_VALUES_EQUAL("www27", CutWWWNumberedPrefix("www27")); + UNIT_ASSERT_VALUES_EQUAL("", CutWWWNumberedPrefix("www27.")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www.ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www2.ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www12.ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("ww2.ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("w1w2w3.ya.ru", CutWWWNumberedPrefix("w1w2w3.ya.ru")); + UNIT_ASSERT_VALUES_EQUAL("123.ya.ru", CutWWWNumberedPrefix("123.ya.ru")); + + UNIT_ASSERT_VALUES_EQUAL("", CutMPrefix("m.")); + UNIT_ASSERT_VALUES_EQUAL("", CutMPrefix("M.")); + UNIT_ASSERT_VALUES_EQUAL("m", CutMPrefix("m")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutMPrefix("m.ya.ru")); + } + + Y_UNIT_TEST(TestSplitUrlToHostAndPath) { + TStringBuf host, path; + + SplitUrlToHostAndPath("https://yandex.ru/yandsearch", host, path); + UNIT_ASSERT_STRINGS_EQUAL(host, "https://yandex.ru"); + UNIT_ASSERT_STRINGS_EQUAL(path, "/yandsearch"); + + SplitUrlToHostAndPath("yandex.ru/yandsearch", host, path); + UNIT_ASSERT_STRINGS_EQUAL(host, "yandex.ru"); + UNIT_ASSERT_STRINGS_EQUAL(path, "/yandsearch"); + + SplitUrlToHostAndPath("https://yandex.ru", host, path); + UNIT_ASSERT_STRINGS_EQUAL(host, "https://yandex.ru"); + UNIT_ASSERT_STRINGS_EQUAL(path, ""); + + SplitUrlToHostAndPath("invalid url /", host, path); + UNIT_ASSERT_STRINGS_EQUAL(host, "invalid url "); + UNIT_ASSERT_STRINGS_EQUAL(path, "/"); + + SplitUrlToHostAndPath("some_blender_url", host, path); + UNIT_ASSERT_STRINGS_EQUAL(host, "some_blender_url"); + UNIT_ASSERT_STRINGS_EQUAL(path, ""); + } + + Y_UNIT_TEST(TestSeparateUrlFromQueryAndFragment) { + TStringBuf sanitizedUrl, query, fragment; + + SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch", sanitizedUrl, query, fragment); + UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch"); + UNIT_ASSERT_STRINGS_EQUAL(query, ""); + UNIT_ASSERT_STRINGS_EQUAL(fragment, ""); + + SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch?param1=val1¶m2=val2", sanitizedUrl, query, fragment); + UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch"); + UNIT_ASSERT_STRINGS_EQUAL(query, "param1=val1¶m2=val2"); + UNIT_ASSERT_STRINGS_EQUAL(fragment, ""); + + SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch#fragment", sanitizedUrl, query, fragment); + UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch"); + UNIT_ASSERT_STRINGS_EQUAL(query, ""); + UNIT_ASSERT_STRINGS_EQUAL(fragment, "fragment"); + + SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch?param1=val1¶m2=val2#fragment", sanitizedUrl, query, fragment); + UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch"); + UNIT_ASSERT_STRINGS_EQUAL(query, "param1=val1¶m2=val2"); + UNIT_ASSERT_STRINGS_EQUAL(fragment, "fragment"); + } + + Y_UNIT_TEST(TestGetSchemeHostAndPort) { + { // all components are present + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("https://ya.ru:8080/bebe", scheme, host, port); + UNIT_ASSERT_VALUES_EQUAL(scheme, "https://"); + UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru"); + UNIT_ASSERT_VALUES_EQUAL(port, 8080); + } + { // scheme is abset + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("ya.ru:8080/bebe", scheme, host, port); + UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown"); + UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru"); + UNIT_ASSERT_VALUES_EQUAL(port, 8080); + } + { // scheme and port are absent + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("ya.ru/bebe", scheme, host, port); + UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown"); + UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru"); + UNIT_ASSERT_VALUES_EQUAL(port, 0); + } + { // port is absent, but returned its default value for HTTP + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("http://ya.ru/bebe", scheme, host, port); + UNIT_ASSERT_VALUES_EQUAL(scheme, "http://"); + UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru"); + UNIT_ASSERT_VALUES_EQUAL(port, 80); + } + { // port is absent, but returned its default value for HTTPS + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("https://ya.ru/bebe", scheme, host, port); + UNIT_ASSERT_VALUES_EQUAL(scheme, "https://"); + UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru"); + UNIT_ASSERT_VALUES_EQUAL(port, 443); + } + { // ipv6 + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("https://[1080:0:0:0:8:800:200C:417A]:443/bebe", scheme, host, port); + UNIT_ASSERT_VALUES_EQUAL(scheme, "https://"); + UNIT_ASSERT_VALUES_EQUAL(host, "[1080:0:0:0:8:800:200C:417A]"); + UNIT_ASSERT_VALUES_EQUAL(port, 443); + } + { // ipv6 + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("[::1]/bebe", scheme, host, port); + UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown"); + UNIT_ASSERT_VALUES_EQUAL(host, "[::1]"); + UNIT_ASSERT_VALUES_EQUAL(port, 0); + } + { // ipv6 + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("unknown:///bebe", scheme, host, port); + UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown://"); + UNIT_ASSERT_VALUES_EQUAL(host, ""); + UNIT_ASSERT_VALUES_EQUAL(port, 0); + } + // port overflow + auto testCase = []() { + TStringBuf scheme("unknown"), host("unknown"); + ui16 port = 0; + GetSchemeHostAndPort("https://ya.ru:65536/bebe", scheme, host, port); + }; + UNIT_ASSERT_EXCEPTION(testCase(), yexception); + } + + Y_UNIT_TEST(TestCutUrlPrefixes) { + UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutUrlPrefixes("http://ya.ru/bebe")); + UNIT_ASSERT_VALUES_EQUAL("yaru", CutUrlPrefixes("yaru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("yaru://ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ftp://ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("", CutUrlPrefixes("https://")); + + UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutUrlPrefixes("https://www.ya.ru/bebe")); + UNIT_ASSERT_VALUES_EQUAL("yaru", CutUrlPrefixes("www.yaru")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("yaru://www.ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("www.ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ftp://www.ya.ru://zzz")); + UNIT_ASSERT_VALUES_EQUAL("", CutUrlPrefixes("http://www.")); + } + + Y_UNIT_TEST(TestUrlPathStartWithToken) { + UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe/zzz", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe?zzz", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe/", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe?", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("https://ya.ru/bebe", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/bebezzz", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/bebe.zzz", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://bebe", "bebe")); + UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("https://bebe/", "bebe")); + } +} |