diff options
author | mowgli <mowgli@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
commit | 89afbbe4ca0e02e386dd4df08f7945f190dc1b84 (patch) | |
tree | c4772201af6215d48734691b8796e4cfc77c2ac8 /library/cpp/unicode | |
parent | 7510cec1516d17cbc8d7749974e36aa45f547a26 (diff) | |
download | ydb-89afbbe4ca0e02e386dd4df08f7945f190dc1b84.tar.gz |
Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/unicode')
-rw-r--r-- | library/cpp/unicode/punycode/punycode.cpp | 242 | ||||
-rw-r--r-- | library/cpp/unicode/punycode/punycode.h | 68 | ||||
-rw-r--r-- | library/cpp/unicode/punycode/punycode_ut.cpp | 174 | ||||
-rw-r--r-- | library/cpp/unicode/punycode/ut/ya.make | 18 | ||||
-rw-r--r-- | library/cpp/unicode/punycode/ya.make | 28 |
5 files changed, 265 insertions, 265 deletions
diff --git a/library/cpp/unicode/punycode/punycode.cpp b/library/cpp/unicode/punycode/punycode.cpp index 800d1f19fe..fc13a55436 100644 --- a/library/cpp/unicode/punycode/punycode.cpp +++ b/library/cpp/unicode/punycode/punycode.cpp @@ -1,143 +1,143 @@ -#include "punycode.h" -#include <contrib/libs/libidn/idna.h> -#include <contrib/libs/libidn/punycode.h> -#include <util/charset/wide.h> -#include <util/generic/ptr.h> -#include <util/generic/vector.h> - -#include <cstdlib> - -static inline void CheckPunycodeResult(int rc) { - if (rc != PUNYCODE_SUCCESS) - ythrow TPunycodeError() << punycode_strerror(static_cast<Punycode_status>(rc)); -} - -static inline void CheckIdnaResult(int rc) { - if (rc != IDNA_SUCCESS) - ythrow TPunycodeError() << idna_strerror(static_cast<Idna_rc>(rc)); -} - -// UTF-32 helpers - +#include "punycode.h" +#include <contrib/libs/libidn/idna.h> +#include <contrib/libs/libidn/punycode.h> +#include <util/charset/wide.h> +#include <util/generic/ptr.h> +#include <util/generic/vector.h> + +#include <cstdlib> + +static inline void CheckPunycodeResult(int rc) { + if (rc != PUNYCODE_SUCCESS) + ythrow TPunycodeError() << punycode_strerror(static_cast<Punycode_status>(rc)); +} + +static inline void CheckIdnaResult(int rc) { + if (rc != IDNA_SUCCESS) + ythrow TPunycodeError() << idna_strerror(static_cast<Idna_rc>(rc)); +} + +// UTF-32 helpers + static inline void AppendWideToUtf32(const TWtringBuf& in, TVector<ui32>& out) { - out.reserve(out.size() + in.size() + 1); - - const wchar16* b = in.begin(); - const wchar16* e = in.end(); - while (b < e) { - out.push_back(ReadSymbolAndAdvance(b, e)); - } -} - + out.reserve(out.size() + in.size() + 1); + + const wchar16* b = in.begin(); + const wchar16* e = in.end(); + while (b < e) { + out.push_back(ReadSymbolAndAdvance(b, e)); + } +} + static inline void AppendUtf32ToWide(const ui32* in, size_t len, TUtf16String& out) { - out.reserve(out.size() + len); - + out.reserve(out.size() + len); + const ui32* b = in; const ui32* e = in + len; - for (; b != e; ++b) { + for (; b != e; ++b) { WriteSymbol(wchar32(*b), out); - } -} - + } +} + TStringBuf WideToPunycode(const TWtringBuf& in16, TString& out) { TVector<ui32> in32; - AppendWideToUtf32(in16, in32); - size_t outlen = in32.size(); - - int rc; - do { - outlen *= 2; - out.ReserveAndResize(outlen); + AppendWideToUtf32(in16, in32); + size_t outlen = in32.size(); + + int rc; + do { + outlen *= 2; + out.ReserveAndResize(outlen); rc = punycode_encode(in32.size(), in32.data(), nullptr, &outlen, out.begin()); - } while (rc == PUNYCODE_BIG_OUTPUT); - - CheckPunycodeResult(rc); - - out.resize(outlen); - return out; -} - + } while (rc == PUNYCODE_BIG_OUTPUT); + + CheckPunycodeResult(rc); + + out.resize(outlen); + return out; +} + TWtringBuf PunycodeToWide(const TStringBuf& in, TUtf16String& out16) { - size_t outlen = in.size(); + size_t outlen = in.size(); TVector<ui32> out32(outlen); - + int rc = punycode_decode(in.size(), in.data(), &outlen, out32.begin(), nullptr); - CheckPunycodeResult(rc); - - AppendUtf32ToWide(out32.begin(), outlen, out16); - return out16; -} - -namespace { - template <typename TChar> - struct TIdnaResult { - TChar* Data = nullptr; - - ~TIdnaResult() { - free(Data); - } - }; -} - + CheckPunycodeResult(rc); + + AppendUtf32ToWide(out32.begin(), outlen, out16); + return out16; +} + +namespace { + template <typename TChar> + struct TIdnaResult { + TChar* Data = nullptr; + + ~TIdnaResult() { + free(Data); + } + }; +} + TString HostNameToPunycode(const TWtringBuf& unicodeHost) { TVector<ui32> in32; - AppendWideToUtf32(unicodeHost, in32); - in32.push_back(0); - - TIdnaResult<char> out; - int rc = idna_to_ascii_4z(in32.begin(), &out.Data, 0); - CheckIdnaResult(rc); - - return out.Data; -} - + AppendWideToUtf32(unicodeHost, in32); + in32.push_back(0); + + TIdnaResult<char> out; + int rc = idna_to_ascii_4z(in32.begin(), &out.Data, 0); + CheckIdnaResult(rc); + + return out.Data; +} + TUtf16String PunycodeToHostName(const TStringBuf& punycodeHost) { - if (!IsStringASCII(punycodeHost.begin(), punycodeHost.end())) - ythrow TPunycodeError() << "Non-ASCII punycode input"; - - size_t len = punycodeHost.size(); + if (!IsStringASCII(punycodeHost.begin(), punycodeHost.end())) + ythrow TPunycodeError() << "Non-ASCII punycode input"; + + size_t len = punycodeHost.size(); TVector<ui32> in32(len + 1, 0); - for (size_t i = 0; i < len; ++i) - in32[i] = static_cast<ui8>(punycodeHost[i]); - in32[len] = 0; - + for (size_t i = 0; i < len; ++i) + in32[i] = static_cast<ui8>(punycodeHost[i]); + in32[len] = 0; + TIdnaResult<ui32> out; - int rc = idna_to_unicode_4z4z(in32.begin(), &out.Data, 0); - CheckIdnaResult(rc); - + int rc = idna_to_unicode_4z4z(in32.begin(), &out.Data, 0); + CheckIdnaResult(rc); + TUtf16String decoded; AppendUtf32ToWide(out.Data, std::char_traits<ui32>::length(out.Data), decoded); - return decoded; -} - + return decoded; +} + TString ForceHostNameToPunycode(const TWtringBuf& unicodeHost) { - try { - return HostNameToPunycode(unicodeHost); - } catch (const TPunycodeError&) { - return WideToUTF8(unicodeHost); - } -} - + try { + return HostNameToPunycode(unicodeHost); + } catch (const TPunycodeError&) { + return WideToUTF8(unicodeHost); + } +} + TUtf16String ForcePunycodeToHostName(const TStringBuf& punycodeHost) { - try { - return PunycodeToHostName(punycodeHost); - } catch (const TPunycodeError&) { - return UTF8ToWide(punycodeHost); - } -} - -bool CanBePunycodeHostName(const TStringBuf& host) { - if (!IsStringASCII(host.begin(), host.end())) - return false; - + try { + return PunycodeToHostName(punycodeHost); + } catch (const TPunycodeError&) { + return UTF8ToWide(punycodeHost); + } +} + +bool CanBePunycodeHostName(const TStringBuf& host) { + if (!IsStringASCII(host.begin(), host.end())) + return false; + static constexpr TStringBuf ACE = "xn--"; - - TStringBuf tail(host); - while (tail) { - const TStringBuf label = tail.NextTok('.'); + + TStringBuf tail(host); + while (tail) { + const TStringBuf label = tail.NextTok('.'); if (label.StartsWith(ACE)) - return true; - } - - return false; -} + return true; + } + + return false; +} diff --git a/library/cpp/unicode/punycode/punycode.h b/library/cpp/unicode/punycode/punycode.h index af4acc25c1..9d4517fede 100644 --- a/library/cpp/unicode/punycode/punycode.h +++ b/library/cpp/unicode/punycode/punycode.h @@ -1,46 +1,46 @@ -#pragma once +#pragma once #include <util/generic/string.h> -#include <util/generic/strbuf.h> -#include <util/generic/yexception.h> - -// Simplified arcadia wrappers for contrib/libs/libidn/ - -// Raw strings encoder/decoder: does not prepend with ACE prefix ("xn--"), -// does not limit input length. Throws TPunycodeError on any internal error. -// Returned strbuf points to @out data. +#include <util/generic/strbuf.h> +#include <util/generic/yexception.h> + +// Simplified arcadia wrappers for contrib/libs/libidn/ + +// Raw strings encoder/decoder: does not prepend with ACE prefix ("xn--"), +// does not limit input length. Throws TPunycodeError on any internal error. +// Returned strbuf points to @out data. TStringBuf WideToPunycode(const TWtringBuf& in, TString& out); TWtringBuf PunycodeToWide(const TStringBuf& in, TUtf16String& out); - + inline TString WideToPunycode(const TWtringBuf& in) { TString out; - WideToPunycode(in, out); - return out; -} - + WideToPunycode(in, out); + return out; +} + inline TUtf16String PunycodeToWide(const TStringBuf& in) { TUtf16String out; - PunycodeToWide(in, out); - return out; -} - -// Encode a sequence of point-separated domain labels -// into a sequence of corresponding punycode labels. -// Labels containing non-ASCII characters are prefixed with ACE prefix ("xn--"). -// Limits maximal encoded domain label length to IDNA_LABEL_MAX_LENGTH (255 by default). -// Throws TPunycodeError on failure. + PunycodeToWide(in, out); + return out; +} + +// Encode a sequence of point-separated domain labels +// into a sequence of corresponding punycode labels. +// Labels containing non-ASCII characters are prefixed with ACE prefix ("xn--"). +// Limits maximal encoded domain label length to IDNA_LABEL_MAX_LENGTH (255 by default). +// Throws TPunycodeError on failure. TString HostNameToPunycode(const TWtringBuf& unicodeHost); TUtf16String PunycodeToHostName(const TStringBuf& punycodeHost); - -// Robust versions: on failure return original input, converted to/from UTF8 + +// Robust versions: on failure return original input, converted to/from UTF8 TString ForceHostNameToPunycode(const TWtringBuf& unicodeHost); TUtf16String ForcePunycodeToHostName(const TStringBuf& punycodeHost); - -// True if @host looks like punycode domain label sequence, -// containing at least one ACE-prefixed label. -// Note that this function does not check all requied IDNA constraints -// (max label length, empty non-root domains, etc.) -bool CanBePunycodeHostName(const TStringBuf& host); - -class TPunycodeError: public yexception { -}; + +// True if @host looks like punycode domain label sequence, +// containing at least one ACE-prefixed label. +// Note that this function does not check all requied IDNA constraints +// (max label length, empty non-root domains, etc.) +bool CanBePunycodeHostName(const TStringBuf& host); + +class TPunycodeError: public yexception { +}; diff --git a/library/cpp/unicode/punycode/punycode_ut.cpp b/library/cpp/unicode/punycode/punycode_ut.cpp index 97271cf0d8..d8a2848d47 100644 --- a/library/cpp/unicode/punycode/punycode_ut.cpp +++ b/library/cpp/unicode/punycode/punycode_ut.cpp @@ -1,8 +1,8 @@ -#include "punycode.h" - +#include "punycode.h" + #include <library/cpp/testing/unittest/registar.h> -#include <util/charset/wide.h> - +#include <util/charset/wide.h> + namespace { template<typename T1, typename T2> inline bool HasSameBuffer(const T1& s1, const T2& s2) { @@ -16,111 +16,111 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) { TString buf1; TUtf16String buf2; return HasSameBuffer(WideToPunycode(unicode, buf1), buf1) && buf1 == punycode && HasSameBuffer(PunycodeToWide(punycode, buf2), buf2) && buf2 == unicode && WideToPunycode(unicode) == punycode && PunycodeToWide(punycode) == unicode; - } - + } + Y_UNIT_TEST(RawEncodeDecode) { - UNIT_ASSERT(TestRaw("", "")); - UNIT_ASSERT(TestRaw(" ", " -")); - UNIT_ASSERT(TestRaw("-", "--")); - UNIT_ASSERT(TestRaw("!@#$%", "!@#$%-")); - UNIT_ASSERT(TestRaw("xn-", "xn--")); - UNIT_ASSERT(TestRaw("xn--", "xn---")); - UNIT_ASSERT(TestRaw("abc", "abc-")); - UNIT_ASSERT(TestRaw("Latin123", "Latin123-")); - - UNIT_ASSERT(TestRaw("München", "Mnchen-3ya")); - UNIT_ASSERT(TestRaw("bücher", "bcher-kva")); - UNIT_ASSERT(TestRaw("BüüchEr", "BchEr-kvaa")); - - UNIT_ASSERT(TestRaw("президент", "d1abbgf6aiiy")); - UNIT_ASSERT(TestRaw("Президент", "r0a6bcbig1bsy")); - UNIT_ASSERT(TestRaw("ПРЕЗИДЕНТ", "g0abbgf6aiiy")); - UNIT_ASSERT(TestRaw("рф", "p1ai")); - UNIT_ASSERT(TestRaw("пример", "e1afmkfd")); - - { + UNIT_ASSERT(TestRaw("", "")); + UNIT_ASSERT(TestRaw(" ", " -")); + UNIT_ASSERT(TestRaw("-", "--")); + UNIT_ASSERT(TestRaw("!@#$%", "!@#$%-")); + UNIT_ASSERT(TestRaw("xn-", "xn--")); + UNIT_ASSERT(TestRaw("xn--", "xn---")); + UNIT_ASSERT(TestRaw("abc", "abc-")); + UNIT_ASSERT(TestRaw("Latin123", "Latin123-")); + + UNIT_ASSERT(TestRaw("München", "Mnchen-3ya")); + UNIT_ASSERT(TestRaw("bücher", "bcher-kva")); + UNIT_ASSERT(TestRaw("BüüchEr", "BchEr-kvaa")); + + UNIT_ASSERT(TestRaw("президент", "d1abbgf6aiiy")); + UNIT_ASSERT(TestRaw("Президент", "r0a6bcbig1bsy")); + UNIT_ASSERT(TestRaw("ПРЕЗИДЕНТ", "g0abbgf6aiiy")); + UNIT_ASSERT(TestRaw("рф", "p1ai")); + UNIT_ASSERT(TestRaw("пример", "e1afmkfd")); + + { const wchar16 tmp[] = {0x82, 0x81, 0x80, 0}; UNIT_ASSERT(PunycodeToWide("abc") == tmp); // "abc" is still valid punycode - } - - UNIT_ASSERT_EXCEPTION(PunycodeToWide(" "), TPunycodeError); - UNIT_ASSERT_EXCEPTION(PunycodeToWide("абвгд"), TPunycodeError); + } + + UNIT_ASSERT_EXCEPTION(PunycodeToWide(" "), TPunycodeError); + UNIT_ASSERT_EXCEPTION(PunycodeToWide("абвгд"), TPunycodeError); UNIT_ASSERT_EXCEPTION(PunycodeToWide("-"), TPunycodeError); - - { + + { TString longIn; - for (size_t i = 0; i < 1024; ++i) - longIn += "Qй"; - + for (size_t i = 0; i < 1024; ++i) + longIn += "Qй"; + TString longOut = "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ-lo11fbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; - UNIT_ASSERT(TestRaw(longIn, longOut)); - } - } - + UNIT_ASSERT(TestRaw(longIn, longOut)); + } + } + static bool TestHostName(const TString& utf8, const TString& punycode, bool canBePunycode = false) { TUtf16String unicode = UTF8ToWide(utf8); TString buf1; TUtf16String buf2; - //Cerr << "Testing " << utf8 << Endl; + //Cerr << "Testing " << utf8 << Endl; return HostNameToPunycode(unicode) == punycode && HostNameToPunycode(UTF8ToWide(punycode)) == punycode // repeated encoding should give same result && PunycodeToHostName(punycode) == unicode && CanBePunycodeHostName(punycode) == canBePunycode; - } - + } + static bool TestForced(const TString& bad) { return ForceHostNameToPunycode(UTF8ToWide(bad)) == bad && ForcePunycodeToHostName(bad) == UTF8ToWide(bad); - } - + } + Y_UNIT_TEST(HostNameEncodeDecode) { - UNIT_ASSERT(TestHostName("президент.рф", "xn--d1abbgf6aiiy.xn--p1ai", true)); + UNIT_ASSERT(TestHostName("президент.рф", "xn--d1abbgf6aiiy.xn--p1ai", true)); UNIT_ASSERT(TestHostName("яндекс.ru", "xn--d1acpjx3f.ru", true)); UNIT_ASSERT(TestHostName("пример", "xn--e1afmkfd", true)); UNIT_ASSERT(TestHostName("ascii.test", "ascii.test")); - - UNIT_ASSERT(TestHostName("", "")); - UNIT_ASSERT(TestHostName(".", ".")); + + UNIT_ASSERT(TestHostName("", "")); + UNIT_ASSERT(TestHostName(".", ".")); UNIT_ASSERT(TestHostName("a.", "a.")); // empty root domain is ok - UNIT_ASSERT(TestHostName("a.b.c.д.e.f", "a.b.c.xn--d1a.e.f", true)); - UNIT_ASSERT(TestHostName("а.б.в.г.д", "xn--80a.xn--90a.xn--b1a.xn--c1a.xn--d1a", true)); - - UNIT_ASSERT(TestHostName("-", "-")); - UNIT_ASSERT(TestHostName("xn--", "xn--", true)); - UNIT_ASSERT(TestHostName("xn--aaa.-", "xn--aaa.-", true)); - UNIT_ASSERT(TestHostName("xn--xn--d1acpjx3f.xn--ru", "xn--xn--d1acpjx3f.xn--ru", true)); - - { - // non-ascii + UNIT_ASSERT(TestHostName("a.b.c.д.e.f", "a.b.c.xn--d1a.e.f", true)); + UNIT_ASSERT(TestHostName("а.б.в.г.д", "xn--80a.xn--90a.xn--b1a.xn--c1a.xn--d1a", true)); + + UNIT_ASSERT(TestHostName("-", "-")); + UNIT_ASSERT(TestHostName("xn--", "xn--", true)); + UNIT_ASSERT(TestHostName("xn--aaa.-", "xn--aaa.-", true)); + UNIT_ASSERT(TestHostName("xn--xn--d1acpjx3f.xn--ru", "xn--xn--d1acpjx3f.xn--ru", true)); + + { + // non-ascii TString bad = "президент.рф"; - UNIT_ASSERT_EXCEPTION(PunycodeToHostName("президент.рф"), TPunycodeError); - UNIT_ASSERT(ForcePunycodeToHostName(bad) == UTF8ToWide(bad)); - } - { - // too long domain label + UNIT_ASSERT_EXCEPTION(PunycodeToHostName("президент.рф"), TPunycodeError); + UNIT_ASSERT(ForcePunycodeToHostName(bad) == UTF8ToWide(bad)); + } + { + // too long domain label TString bad(500, 'a'); - UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); + UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); UNIT_ASSERT(TestForced(bad)); // but can decode it - } - { - // already has ACE prefix + } + { + // already has ACE prefix TString bad("xn--яндекс.xn--рф"); - UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); - UNIT_ASSERT(TestForced(bad)); - } - { - // empty non-root domain is not allowed (?) + UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); + UNIT_ASSERT(TestForced(bad)); + } + { + // empty non-root domain is not allowed (?) TString bad(".яндекс.рф"); - UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); - UNIT_ASSERT(TestForced(bad)); - } - - UNIT_ASSERT(CanBePunycodeHostName("xn--")); - UNIT_ASSERT(CanBePunycodeHostName("yandex.xn--p1ai")); - UNIT_ASSERT(CanBePunycodeHostName("xn--d1acpjx3f.xn--p1ai")); - UNIT_ASSERT(CanBePunycodeHostName("a.b.c.d.xn--e")); - UNIT_ASSERT(CanBePunycodeHostName("xn--a.b.c.xn--d.e")); - UNIT_ASSERT(!CanBePunycodeHostName("yandex.ru")); // no xn-- - UNIT_ASSERT(!CanBePunycodeHostName("яндекс.рф")); // non-ascii - UNIT_ASSERT(!CanBePunycodeHostName("яндекс.xn--p1ai")); // non-ascii - UNIT_ASSERT(!CanBePunycodeHostName("")); + UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); + UNIT_ASSERT(TestForced(bad)); + } + + UNIT_ASSERT(CanBePunycodeHostName("xn--")); + UNIT_ASSERT(CanBePunycodeHostName("yandex.xn--p1ai")); + UNIT_ASSERT(CanBePunycodeHostName("xn--d1acpjx3f.xn--p1ai")); + UNIT_ASSERT(CanBePunycodeHostName("a.b.c.d.xn--e")); + UNIT_ASSERT(CanBePunycodeHostName("xn--a.b.c.xn--d.e")); + UNIT_ASSERT(!CanBePunycodeHostName("yandex.ru")); // no xn-- + UNIT_ASSERT(!CanBePunycodeHostName("яндекс.рф")); // non-ascii + UNIT_ASSERT(!CanBePunycodeHostName("яндекс.xn--p1ai")); // non-ascii + UNIT_ASSERT(!CanBePunycodeHostName("")); UNIT_ASSERT(!CanBePunycodeHostName("http://xn--a.b")); // scheme prefix is not detected here - } -} + } +} diff --git a/library/cpp/unicode/punycode/ut/ya.make b/library/cpp/unicode/punycode/ut/ya.make index 74272102a8..a9dd5f7c34 100644 --- a/library/cpp/unicode/punycode/ut/ya.make +++ b/library/cpp/unicode/punycode/ut/ya.make @@ -1,13 +1,13 @@ UNITTEST_FOR(library/cpp/unicode/punycode) - -OWNER( + +OWNER( g:base g:middle g:upper -) - -SRCS( - punycode_ut.cpp -) - -END() +) + +SRCS( + punycode_ut.cpp +) + +END() diff --git a/library/cpp/unicode/punycode/ya.make b/library/cpp/unicode/punycode/ya.make index 62b41b07b7..4dce86fbf5 100644 --- a/library/cpp/unicode/punycode/ya.make +++ b/library/cpp/unicode/punycode/ya.make @@ -1,17 +1,17 @@ -LIBRARY() - -OWNER( +LIBRARY() + +OWNER( g:base g:middle g:upper -) - -PEERDIR( - contrib/libs/libidn -) - -SRCS( - punycode.cpp -) - -END() +) + +PEERDIR( + contrib/libs/libidn +) + +SRCS( + punycode.cpp +) + +END() |