diff options
author | mowgli <mowgli@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
commit | 89afbbe4ca0e02e386dd4df08f7945f190dc1b84 (patch) | |
tree | c4772201af6215d48734691b8796e4cfc77c2ac8 /library | |
parent | 7510cec1516d17cbc8d7749974e36aa45f547a26 (diff) | |
download | ydb-89afbbe4ca0e02e386dd4df08f7945f190dc1b84.tar.gz |
Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library')
60 files changed, 1704 insertions, 1704 deletions
diff --git a/library/cpp/accurate_accumulate/accurate_accumulate.h b/library/cpp/accurate_accumulate/accurate_accumulate.h index dacced17e9..21ed77a7ce 100644 --- a/library/cpp/accurate_accumulate/accurate_accumulate.h +++ b/library/cpp/accurate_accumulate/accurate_accumulate.h @@ -11,7 +11,7 @@ public: using TValueType = TAccumulateType; template <typename TFloatType> - explicit TKahanAccumulator(const TFloatType x) + explicit TKahanAccumulator(const TFloatType x) : Sum_(x) , Compensation_() { @@ -30,12 +30,12 @@ public: return *this; } - TValueType Get() const { + TValueType Get() const { return Sum_ + Compensation_; } template <typename TFloatType> - inline operator TFloatType() const { + inline operator TFloatType() const { return Get(); } @@ -91,31 +91,31 @@ private: }; template <typename TAccumulateType, typename TFloatType> -inline const TKahanAccumulator<TAccumulateType> +inline const TKahanAccumulator<TAccumulateType> operator+(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { return lhs += rhs; } template <typename TAccumulateType, typename TFloatType> -inline const TKahanAccumulator<TAccumulateType> +inline const TKahanAccumulator<TAccumulateType> operator-(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { return lhs -= rhs; } template <typename TAccumulateType, typename TFloatType> -inline const TKahanAccumulator<TAccumulateType> +inline const TKahanAccumulator<TAccumulateType> operator*(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { return lhs *= rhs; } template <typename TAccumulateType, typename TFloatType> -inline const TKahanAccumulator<TAccumulateType> +inline const TKahanAccumulator<TAccumulateType> operator/(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) { return lhs /= rhs; } template <typename TAccumulatorType, typename It> -static inline TAccumulatorType TypedFastAccumulate(It begin, It end) { +static inline TAccumulatorType TypedFastAccumulate(It begin, It end) { TAccumulatorType accumulator = TAccumulatorType(); for (; begin + 15 < end; begin += 16) { @@ -179,7 +179,7 @@ static inline TAccumulatorType TypedFastInnerProduct(It1 begin1, It1 end1, It2 b } template <typename It> -static inline double FastAccumulate(It begin, It end) { +static inline double FastAccumulate(It begin, It end) { return TypedFastAccumulate<double>(begin, end); } @@ -189,7 +189,7 @@ static inline double FastAccumulate(const TVector<T>& sequence) { } template <typename It> -static inline double FastKahanAccumulate(It begin, It end) { +static inline double FastKahanAccumulate(It begin, It end) { return TypedFastAccumulate<TKahanAccumulator<double>>(begin, end); } @@ -199,7 +199,7 @@ static inline double FastKahanAccumulate(const TVector<T>& sequence) { } template <typename It1, typename It2> -static inline double FastInnerProduct(It1 begin1, It1 end1, It2 begin2) { +static inline double FastInnerProduct(It1 begin1, It1 end1, It2 begin2) { return TypedFastInnerProduct<double>(begin1, end1, begin2); } @@ -210,7 +210,7 @@ static inline double FastInnerProduct(const TVector<T>& lhs, const TVector<T>& r } template <typename It1, typename It2> -static inline double FastKahanInnerProduct(It1 begin1, It1 end1, It2 begin2) { +static inline double FastKahanInnerProduct(It1 begin1, It1 end1, It2 begin2) { return TypedFastInnerProduct<TKahanAccumulator<double>>(begin1, end1, begin2); } diff --git a/library/cpp/accurate_accumulate/ya.make b/library/cpp/accurate_accumulate/ya.make index 82630d19be..609a29fc35 100644 --- a/library/cpp/accurate_accumulate/ya.make +++ b/library/cpp/accurate_accumulate/ya.make @@ -1,6 +1,6 @@ LIBRARY() -OWNER(alex-sh) +OWNER(alex-sh) SRCS( accurate_accumulate.h diff --git a/library/cpp/charset/codepage.h b/library/cpp/charset/codepage.h index 30a02a4610..419f5746bc 100644 --- a/library/cpp/charset/codepage.h +++ b/library/cpp/charset/codepage.h @@ -199,7 +199,7 @@ struct Encoder { return 0; return (unsigned char)Table[(ch >> 8) & 255][ch & 255]; } - + inline char Tr(wchar32 ch) const { char code = Code(ch); if (code == 0 && ch != 0) @@ -211,7 +211,7 @@ struct Encoder { inline unsigned char operator[](wchar32 ch) const { return Tr(ch); } - + void Tr(const wchar32* in, char* out, size_t len) const; void Tr(const wchar32* in, char* out) const; char* DefaultPlane; diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp index c3ac3ac478..7df4d27196 100644 --- a/library/cpp/charset/codepage_ut.cpp +++ b/library/cpp/charset/codepage_ut.cpp @@ -53,8 +53,8 @@ public: void TestToLower(); void TestToUpper(); - void TestCanEncode(); - + void TestCanEncode(); + inline void TestUpperLower() { const CodePage* cp = CodePageByCharset(CODES_ASCII); char tmp[100]; @@ -343,82 +343,82 @@ void TCodepageTest::TestToUpper() { ToUpper(data, n - 1); UNIT_ASSERT(strcmp(data, yandexUpperCase) == 0); } - -static void TestCanEncodeEmpty() { - TWtringBuf empty; - UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN)); - UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX)); - UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8)); -} - -static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) { - // char by char - for (size_t i = 0; i < text.size(); ++i) { - if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult) - ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding) + +static void TestCanEncodeEmpty() { + TWtringBuf empty; + UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN)); + UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX)); + UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8)); +} + +static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) { + // char by char + for (size_t i = 0; i < text.size(); ++i) { + if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult) + ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding) << " on '" << text.SubStr(i, 1) << "' (expected " << expectedResult << ")"; - } - // whole text - UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult); -} - -void TCodepageTest::TestCanEncode() { - TestCanEncodeEmpty(); - + } + // whole text + UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult); +} + +void TCodepageTest::TestCanEncode() { + TestCanEncodeEmpty(); + const TUtf16String lat = u"AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"; - TestCanEncodeEach(lat, CODES_WIN, true); - TestCanEncodeEach(lat, CODES_YANDEX, true); - TestCanEncodeEach(lat, CODES_UTF8, true); - + TestCanEncodeEach(lat, CODES_WIN, true); + TestCanEncodeEach(lat, CODES_YANDEX, true); + TestCanEncodeEach(lat, CODES_UTF8, true); + const TUtf16String rus = u"АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя"; - TestCanEncodeEach(rus, CODES_WIN, true); - TestCanEncodeEach(rus, CODES_YANDEX, true); - TestCanEncodeEach(rus, CODES_UTF8, true); - + TestCanEncodeEach(rus, CODES_WIN, true); + TestCanEncodeEach(rus, CODES_YANDEX, true); + TestCanEncodeEach(rus, CODES_UTF8, true); + const TUtf16String ukr = u"ҐґЄєІіЇї"; - TestCanEncodeEach(ukr, CODES_WIN, true); - TestCanEncodeEach(ukr, CODES_YANDEX, true); - TestCanEncodeEach(ukr, CODES_UTF8, true); - + TestCanEncodeEach(ukr, CODES_WIN, true); + TestCanEncodeEach(ukr, CODES_YANDEX, true); + TestCanEncodeEach(ukr, CODES_UTF8, true); + const TUtf16String pol = u"ĄĆĘŁŃÓŚŹŻąćęłńóśźż"; - TestCanEncodeEach(pol, CODES_WIN, false); - TestCanEncodeEach(pol, CODES_YANDEX, true); - TestCanEncodeEach(pol, CODES_UTF_16BE, true); - + TestCanEncodeEach(pol, CODES_WIN, false); + TestCanEncodeEach(pol, CODES_YANDEX, true); + TestCanEncodeEach(pol, CODES_UTF_16BE, true); + const TUtf16String ger = u"ÄäÖöÜüß"; - TestCanEncodeEach(ger, CODES_WIN, false); - TestCanEncodeEach(ger, CODES_YANDEX, true); - TestCanEncodeEach(ger, CODES_UTF_16LE, true); - + TestCanEncodeEach(ger, CODES_WIN, false); + TestCanEncodeEach(ger, CODES_YANDEX, true); + TestCanEncodeEach(ger, CODES_UTF_16LE, true); + const TUtf16String fra1 = u"éàèùâêîôûëïç"; // supported in yandex cp const TUtf16String fra2 = u"ÉÀÈÙÂÊÎÔÛËÏŸÿÇ"; const TUtf16String fra3 = u"Æ挜"; - TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false); - TestCanEncodeEach(fra1, CODES_YANDEX, true); - TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false); - TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true); - + TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false); + TestCanEncodeEach(fra1, CODES_YANDEX, true); + TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false); + TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true); + const TUtf16String kaz = u"ӘәҒғҚқҢңӨөҰұҮүҺһ"; - TestCanEncodeEach(kaz, CODES_WIN, false); - TestCanEncodeEach(kaz, CODES_YANDEX, false); - TestCanEncodeEach(kaz, CODES_UTF8, true); - TestCanEncodeEach(kaz, CODES_KAZWIN, true); - + TestCanEncodeEach(kaz, CODES_WIN, false); + TestCanEncodeEach(kaz, CODES_YANDEX, false); + TestCanEncodeEach(kaz, CODES_UTF8, true); + TestCanEncodeEach(kaz, CODES_KAZWIN, true); + const TUtf16String tur1 = u"ĞİŞğş"; const TUtf16String tur = tur1 + u"ı"; - TestCanEncodeEach(tur, CODES_WIN, false); - TestCanEncodeEach(tur, CODES_YANDEX, false); - TestCanEncodeEach(tur, CODES_UTF8, true); - + TestCanEncodeEach(tur, CODES_WIN, false); + TestCanEncodeEach(tur, CODES_YANDEX, false); + TestCanEncodeEach(tur, CODES_UTF8, true); + const TUtf16String chi = u"新隶体新隸體"; - TestCanEncodeEach(chi, CODES_WIN, false); - TestCanEncodeEach(chi, CODES_YANDEX, false); - TestCanEncodeEach(chi, CODES_UTF8, true); - TestCanEncodeEach(chi, CODES_UTF_16LE, true); - + TestCanEncodeEach(chi, CODES_WIN, false); + TestCanEncodeEach(chi, CODES_YANDEX, false); + TestCanEncodeEach(chi, CODES_UTF8, true); + TestCanEncodeEach(chi, CODES_UTF_16LE, true); + const TUtf16String jap = u"漢字仮字交じり文"; - TestCanEncodeEach(jap, CODES_WIN, false); - TestCanEncodeEach(jap, CODES_YANDEX, false); - TestCanEncodeEach(jap, CODES_UTF8, true); - TestCanEncodeEach(jap, CODES_UTF_16BE, true); -} + TestCanEncodeEach(jap, CODES_WIN, false); + TestCanEncodeEach(jap, CODES_YANDEX, false); + TestCanEncodeEach(jap, CODES_UTF8, true); + TestCanEncodeEach(jap, CODES_UTF_16BE, true); +} diff --git a/library/cpp/charset/wide.cpp b/library/cpp/charset/wide.cpp index d12b293817..ae75f45355 100644 --- a/library/cpp/charset/wide.cpp +++ b/library/cpp/charset/wide.cpp @@ -1,18 +1,18 @@ #include "wide.h" -bool CanBeEncoded(TWtringBuf text, ECharset encoding) { - const size_t LEN = 16; - const size_t BUFSIZE = LEN * 4; - char encodeBuf[BUFSIZE]; - wchar16 decodeBuf[BUFSIZE]; - - while (!text.empty()) { - TWtringBuf src = text.NextTokAt(LEN); - TStringBuf encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding); - TWtringBuf decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding); - if (decoded != src) - return false; - } - - return true; -} +bool CanBeEncoded(TWtringBuf text, ECharset encoding) { + const size_t LEN = 16; + const size_t BUFSIZE = LEN * 4; + char encodeBuf[BUFSIZE]; + wchar16 decodeBuf[BUFSIZE]; + + while (!text.empty()) { + TWtringBuf src = text.NextTokAt(LEN); + TStringBuf encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding); + TWtringBuf decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding); + if (decoded != src) + return false; + } + + return true; +} diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h index 32d30e849e..c8f78a9eb4 100644 --- a/library/cpp/charset/wide.h +++ b/library/cpp/charset/wide.h @@ -47,61 +47,61 @@ inline void CharToWide(const char* text, size_t len, TCharType* dest, const Code } } -namespace NDetail { - namespace NBaseOps { - // Template interface base recoding drivers, do not perform any memory management, - // do not care about buffer size, so supplied @dst - // should have enough room for the result (with proper reserve for the worst case) - - // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string. - +namespace NDetail { + namespace NBaseOps { + // Template interface base recoding drivers, do not perform any memory management, + // do not care about buffer size, so supplied @dst + // should have enough room for the result (with proper reserve for the worst case) + + // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string. + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeSingleByteChar(const TStringBuf src, TCharType* dst, const CodePage& cp) { Y_ASSERT(cp.SingleByteCodepage()); ::CharToWide(src.data(), src.size(), dst, cp); return TBasicStringBuf<TCharType>(dst, src.size()); - } - + } + template <typename TCharType> inline TStringBuf RecodeSingleByteChar(const TBasicStringBuf<TCharType> src, char* dst, const CodePage& cp) { Y_ASSERT(cp.SingleByteCodepage()); ::WideToChar(src.data(), src.size(), dst, cp.CPEnum); return TStringBuf(dst, src.size()); - } - + } + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeMultiByteChar(const TStringBuf src, TCharType* dst, ECharset encoding) { Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding)); - size_t read = 0; - size_t written = 0; + size_t read = 0; + size_t written = 0; ::NICONVPrivate::RecodeToUnicode(encoding, src.data(), dst, src.size(), src.size(), read, written); return TBasicStringBuf<TCharType>(dst, written); - } - + } + template <typename TCharType> inline TStringBuf RecodeMultiByteChar(const TBasicStringBuf<TCharType> src, char* dst, ECharset encoding) { Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding)); - size_t read = 0; - size_t written = 0; + size_t read = 0; + size_t written = 0; ::NICONVPrivate::RecodeFromUnicode(encoding, src.data(), dst, src.size(), src.size() * 3, read, written); - return TStringBuf(dst, written); - } - + return TStringBuf(dst, written); + } + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeUtf8(const TStringBuf src, TCharType* dst) { - size_t len = 0; + size_t len = 0; if (!::UTF8ToWide(src.data(), src.size(), dst, len)) - ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\""); + ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\""); return TBasicStringBuf<TCharType>(dst, len); - } - + } + template <typename TCharType> inline TStringBuf RecodeUtf8(const TBasicStringBuf<TCharType> src, char* dst) { - size_t len = 0; + size_t len = 0; ::WideToUTF8(src.data(), src.size(), dst, len); - return TStringBuf(dst, len); - } - + return TStringBuf(dst, len); + } + // Select one of re-coding methods from above, based on provided @encoding template <typename TCharFrom, typename TCharTo> @@ -115,73 +115,73 @@ namespace NDetail { } } - - template <typename TCharFrom> - struct TRecodeTraits; - - template <> - struct TRecodeTraits<char> { + + template <typename TCharFrom> + struct TRecodeTraits; + + template <> + struct TRecodeTraits<char> { using TCharTo = wchar16; using TStringBufTo = TWtringBuf; using TStringTo = TUtf16String; enum { ReserveSize = 4 }; // How many TCharFrom characters we should reserve for one TCharTo character in worst case // Here an unicode character can be converted up to 4 bytes of UTF8 - }; - - template <> - struct TRecodeTraits<wchar16> { + }; + + template <> + struct TRecodeTraits<wchar16> { using TCharTo = char; using TStringBufTo = TStringBuf; using TStringTo = TString; enum { ReserveSize = 2 }; // possible surrogate pairs ? - }; - - // Operations with destination buffer where recoded string will be written - template <typename TResult> - struct TRecodeResultOps { + }; + + // Operations with destination buffer where recoded string will be written + template <typename TResult> + struct TRecodeResultOps { // default implementation will work with TString and TUtf16String - 99% of usage using TResultChar = typename TResult::char_type; - - static inline size_t Size(const TResult& dst) { - return dst.size(); - } - - static inline TResultChar* Reserve(TResult& dst, size_t len) { - dst.ReserveAndResize(len); - return dst.begin(); - } - - static inline void Truncate(TResult& dst, size_t len) { - dst.resize(len); - } - }; - - // Main template interface for recoding in both directions - - template <typename TCharFrom, typename TResult> + + static inline size_t Size(const TResult& dst) { + return dst.size(); + } + + static inline TResultChar* Reserve(TResult& dst, size_t len) { + dst.ReserveAndResize(len); + return dst.begin(); + } + + static inline void Truncate(TResult& dst, size_t len) { + dst.resize(len); + } + }; + + // Main template interface for recoding in both directions + + template <typename TCharFrom, typename TResult> typename TRecodeTraits<TCharFrom>::TStringBufTo Recode(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) { using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo; - // make enough room for re-coded string - TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize); - // do re-coding + // make enough room for re-coded string + TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize); + // do re-coding TBasicStringBuf<TCharTo> res = NBaseOps::Recode(src, dstbuf, encoding); - // truncate result back to proper size - TRecodeResultOps<TResult>::Truncate(dst, res.size()); - return res; - } - - // appending version of Recode() - template <typename TCharFrom, typename TResult> + // truncate result back to proper size + TRecodeResultOps<TResult>::Truncate(dst, res.size()); + return res; + } + + // appending version of Recode() + template <typename TCharFrom, typename TResult> typename TRecodeTraits<TCharFrom>::TStringBufTo RecodeAppend(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) { using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo; - size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst); - TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize); + size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst); + TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize); TBasicStringBuf<TCharTo> appended = NBaseOps::Recode(src, dstbuf + dstOrigSize, encoding); - size_t dstFinalSize = dstOrigSize + appended.size(); - TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize); + size_t dstFinalSize = dstOrigSize + appended.size(); + TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize); return TBasicStringBuf<TCharTo>(dstbuf, dstFinalSize); - } - + } + // special implementation for robust utf8 functions template <typename TResult> TWtringBuf RecodeUTF8Robust(const TStringBuf src, TResult& dst) { @@ -197,31 +197,31 @@ namespace NDetail { return TWtringBuf(dstbuf, written); } - template <typename TCharFrom> + template <typename TCharFrom> inline typename TRecodeTraits<TCharFrom>::TStringTo Recode(const TBasicStringBuf<TCharFrom> src, ECharset encoding) { - typename TRecodeTraits<TCharFrom>::TStringTo res; - Recode<TCharFrom>(src, res, encoding); - return res; - } + typename TRecodeTraits<TCharFrom>::TStringTo res; + Recode<TCharFrom>(src, res, encoding); + return res; + } } - -// Write result into @dst. Return string-buffer pointing to re-coded content of @dst. - + +// Write result into @dst. Return string-buffer pointing to re-coded content of @dst. + template <bool robust> inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) { if (robust && CODES_UTF8 == encoding) return ::NDetail::RecodeUTF8Robust(src, dst); return ::NDetail::Recode<char>(src, dst, encoding); -} - +} + inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) { return ::NDetail::Recode<char>(src, dst, encoding); } inline TStringBuf WideToChar(const TWtringBuf src, TString& dst, ECharset encoding) { return ::NDetail::Recode<wchar16>(src, dst, encoding); -} - +} + //! calls either to @c WideToUTF8 or @c WideToChar depending on the encoding type inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) { if (NCodepagePrivate::NativeCodepage(enc)) { @@ -301,6 +301,6 @@ inline TUtf16String CharToWide(const TStringBuf s, const CodePage& cp) { return CharToWide(s.data(), s.size(), cp); } -// true if @text can be fully encoded to specified @encoding, -// with possibility to recover exact original text after decoding -bool CanBeEncoded(TWtringBuf text, ECharset encoding); +// true if @text can be fully encoded to specified @encoding, +// with possibility to recover exact original text after decoding +bool CanBeEncoded(TWtringBuf text, ECharset encoding); diff --git a/library/cpp/charset/wide_ut.cpp b/library/cpp/charset/wide_ut.cpp index 78947d51ba..63112f432c 100644 --- a/library/cpp/charset/wide_ut.cpp +++ b/library/cpp/charset/wide_ut.cpp @@ -9,7 +9,7 @@ #include <util/generic/hash_set.h> #include <algorithm> - + namespace { //! three UTF8 encoded russian letters (A, B, V) const char yandexCyrillicAlphabet[] = @@ -143,8 +143,8 @@ public: void TestCharToWide(); void TestWideToChar(); void TestYandexEncoding(); - void TestRecodeIntoString(); - void TestRecodeAppend(); + void TestRecodeIntoString(); + void TestRecodeAppend(); void TestRecode(); void TestUnicodeLimit(); }; @@ -228,114 +228,114 @@ void TConversionTest::TestYandexEncoding() { } } -void TConversionTest::TestRecodeIntoString() { +void TConversionTest::TestRecodeIntoString() { TString sYandex(UnicodeText.size() * 4, 'x'); const char* sdata = sYandex.data(); - TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX); + TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX); UNIT_ASSERT(sYandex == YandexText); // same content UNIT_ASSERT(sYandex.data() == sdata); // reserved buffer reused UNIT_ASSERT(sYandex.data() == sres.data()); // same buffer UNIT_ASSERT(sYandex.size() == sres.size()); // same size TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX); - + TUtf16String sUnicode; - sUnicode.reserve(YandexText.size() * 4); + sUnicode.reserve(YandexText.size() * 4); const wchar16* wdata = sUnicode.data(); - TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX); + TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX); UNIT_ASSERT(sUnicode == UnicodeText); // same content UNIT_ASSERT(sUnicode.data() == wdata); // reserved buffer reused UNIT_ASSERT(sUnicode.data() == wres.data()); // same buffer UNIT_ASSERT(sUnicode.size() == wres.size()); // same size - + TString sUtf8 = " "; - size_t scap = sUtf8.capacity(); - sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8); + size_t scap = sUtf8.capacity(); + sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8); UNIT_ASSERT(sUtf8 == UTF8Text); // same content UNIT_ASSERT(sUtf8.capacity() > scap); // increased buffer capacity (supplied was too small) UNIT_ASSERT(sUtf8.data() == sres.data()); // same buffer UNIT_ASSERT(sUtf8.size() == sres.size()); // same size TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8); - - sUnicode.clear(); + + sUnicode.clear(); wdata = sUnicode.data(); TUtf16String copy = sUnicode; // increase ref-counter - wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8); + wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8); UNIT_ASSERT(sUnicode == UnicodeText); // same content #ifndef TSTRING_IS_STD_STRING UNIT_ASSERT(sUnicode.data() != wdata); // re-allocated (shared buffer supplied) UNIT_ASSERT(sUnicode.data() == wres.data()); // same buffer #endif UNIT_ASSERT(sUnicode.size() == wres.size()); // same content -} - +} + static TString GenerateJunk(size_t seed) { TString res; - size_t hash = NumericHash(seed); - size_t size = hash % 1024; - res.reserve(size); - for (size_t i = 0; i < size; ++i) - res += static_cast<char>(NumericHash(hash + i) % 256); - return res; -} - -void TConversionTest::TestRecodeAppend() { - { + size_t hash = NumericHash(seed); + size_t size = hash % 1024; + res.reserve(size); + for (size_t i = 0; i < size; ++i) + res += static_cast<char>(NumericHash(hash + i) % 256); + return res; +} + +void TConversionTest::TestRecodeAppend() { + { TString s1, s2; NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX); - UNIT_ASSERT(s1.empty()); - - NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN); - s2 += WideToChar(UnicodeText, CODES_WIN); - UNIT_ASSERT_EQUAL(s1, s2); - - NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_YANDEX); - s2 += WideToChar(UnicodeText, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); - + UNIT_ASSERT(s1.empty()); + + NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN); + s2 += WideToChar(UnicodeText, CODES_WIN); + UNIT_ASSERT_EQUAL(s1, s2); + + NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_YANDEX); + s2 += WideToChar(UnicodeText, CODES_YANDEX); + UNIT_ASSERT_EQUAL(s1, s2); + NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); - - NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8); + UNIT_ASSERT_EQUAL(s1, s2); + + NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8); s2 += WideToUTF8(UnicodeText); - UNIT_ASSERT_EQUAL(s1, s2); + UNIT_ASSERT_EQUAL(s1, s2); - for (size_t i = 0; i < 100; ++i) { + for (size_t i = 0; i < 100; ++i) { TUtf16String junk = CharToWide(GenerateJunk(i), CODES_YANDEX); - NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8); + NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8); s2 += WideToUTF8(junk); - UNIT_ASSERT_EQUAL(s1, s2); - } - } - - { + UNIT_ASSERT_EQUAL(s1, s2); + } + } + + { TUtf16String s1, s2; NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX); - UNIT_ASSERT(s1.empty()); - - NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN); - s2 += CharToWide(YandexText, CODES_WIN); - UNIT_ASSERT_EQUAL(s1, s2); - - NDetail::RecodeAppend<char>(YandexText, s1, CODES_YANDEX); - s2 += CharToWide(YandexText, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); - + UNIT_ASSERT(s1.empty()); + + NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN); + s2 += CharToWide(YandexText, CODES_WIN); + UNIT_ASSERT_EQUAL(s1, s2); + + NDetail::RecodeAppend<char>(YandexText, s1, CODES_YANDEX); + s2 += CharToWide(YandexText, CODES_YANDEX); + UNIT_ASSERT_EQUAL(s1, s2); + NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); + UNIT_ASSERT_EQUAL(s1, s2); - NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8); + NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8); s2 += UTF8ToWide(UTF8Text); - UNIT_ASSERT_EQUAL(s1, s2); - - for (size_t i = 0; i < 100; ++i) { + UNIT_ASSERT_EQUAL(s1, s2); + + for (size_t i = 0; i < 100; ++i) { TString junk = GenerateJunk(i); - NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX); - s2 += CharToWide(junk, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); - } - } -} - + NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX); + s2 += CharToWide(junk, CODES_YANDEX); + UNIT_ASSERT_EQUAL(s1, s2); + } + } +} + template <> void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) { out << int(val); diff --git a/library/cpp/containers/comptrie/comptrie_impl.h b/library/cpp/containers/comptrie/comptrie_impl.h index f41c38311a..c36da12e13 100644 --- a/library/cpp/containers/comptrie/comptrie_impl.h +++ b/library/cpp/containers/comptrie/comptrie_impl.h @@ -26,10 +26,10 @@ namespace NCompactTrie { return (sizeof(T) - 1) * 8; } - static inline bool IsEpsilonLink(const char flags) { - return !(flags & (MT_FINAL | MT_NEXT)); - } - + static inline bool IsEpsilonLink(const char flags) { + return !(flags & (MT_FINAL | MT_NEXT)); + } + static inline void TraverseEpsilon(const char*& datapos) { const char flags = *datapos; if (!IsEpsilonLink(flags)) { @@ -41,14 +41,14 @@ namespace NCompactTrie { datapos += offset; } - static inline size_t LeftOffsetLen(const char flags) { - return (flags >> MT_LEFTSHIFT) & MT_SIZEMASK; - } - - static inline size_t RightOffsetLen(const char flags) { - return flags & MT_SIZEMASK; - } - + static inline size_t LeftOffsetLen(const char flags) { + return (flags >> MT_LEFTSHIFT) & MT_SIZEMASK; + } + + static inline size_t RightOffsetLen(const char flags) { + return flags & MT_SIZEMASK; + } + void ShowProgress(size_t n); // just print dots } @@ -100,82 +100,82 @@ namespace NCompactTrie { os.Write(buf, len); return len; } - - // Unpack the offset to the next node. The encoding scheme can store offsets - // up to 7 bytes; whether they fit into size_t is another issue. + + // Unpack the offset to the next node. The encoding scheme can store offsets + // up to 7 bytes; whether they fit into size_t is another issue. Y_FORCE_INLINE size_t UnpackOffset(const char* p, size_t len) { - size_t result = 0; - - while (len--) - result = ((result << 8) | (*(p++) & 0xFF)); - - return result; - } - - // Auxiliary function: consumes one character from the input. Advances the data pointer - // to the position immediately preceding the value for the link just traversed (if any); - // returns flags associated with the link. If no arc with the required label is present, - // zeroes the data pointer. + size_t result = 0; + + while (len--) + result = ((result << 8) | (*(p++) & 0xFF)); + + return result; + } + + // Auxiliary function: consumes one character from the input. Advances the data pointer + // to the position immediately preceding the value for the link just traversed (if any); + // returns flags associated with the link. If no arc with the required label is present, + // zeroes the data pointer. Y_FORCE_INLINE char LeapByte(const char*& datapos, const char* dataend, char label) { - while (datapos < dataend) { - size_t offsetlength, offset; - const char* startpos = datapos; - char flags = *(datapos++); - - if (IsEpsilonLink(flags)) { - // Epsilon link - jump to the specified offset without further checks. - // These links are created during minimization: original uncompressed - // tree does not need them. (If we find a way to package 3 offset lengths - // into 1 byte, we could get rid of them; but it looks like they do no harm. + while (datapos < dataend) { + size_t offsetlength, offset; + const char* startpos = datapos; + char flags = *(datapos++); + + if (IsEpsilonLink(flags)) { + // Epsilon link - jump to the specified offset without further checks. + // These links are created during minimization: original uncompressed + // tree does not need them. (If we find a way to package 3 offset lengths + // into 1 byte, we could get rid of them; but it looks like they do no harm. Y_ASSERT(datapos < dataend); - offsetlength = flags & MT_SIZEMASK; - offset = UnpackOffset(datapos, offsetlength); - if (!offset) - break; - datapos = startpos + offset; - - continue; - } - - char ch = *(datapos++); - - // Left branch - offsetlength = LeftOffsetLen(flags); - if ((unsigned char)label < (unsigned char)ch) { - offset = UnpackOffset(datapos, offsetlength); - if (!offset) - break; - - datapos = startpos + offset; - - continue; - } - - datapos += offsetlength; - - // Right branch - offsetlength = RightOffsetLen(flags); - if ((unsigned char)label > (unsigned char)ch) { - offset = UnpackOffset(datapos, offsetlength); - - if (!offset) - break; - - datapos = startpos + offset; - - continue; - } - - // Got a match; return position right before the contents for the label - datapos += offsetlength; - return flags; - } - - // if we got here, we're past the dataend - bail out ASAP + offsetlength = flags & MT_SIZEMASK; + offset = UnpackOffset(datapos, offsetlength); + if (!offset) + break; + datapos = startpos + offset; + + continue; + } + + char ch = *(datapos++); + + // Left branch + offsetlength = LeftOffsetLen(flags); + if ((unsigned char)label < (unsigned char)ch) { + offset = UnpackOffset(datapos, offsetlength); + if (!offset) + break; + + datapos = startpos + offset; + + continue; + } + + datapos += offsetlength; + + // Right branch + offsetlength = RightOffsetLen(flags); + if ((unsigned char)label > (unsigned char)ch) { + offset = UnpackOffset(datapos, offsetlength); + + if (!offset) + break; + + datapos = startpos + offset; + + continue; + } + + // Got a match; return position right before the contents for the label + datapos += offsetlength; + return flags; + } + + // if we got here, we're past the dataend - bail out ASAP datapos = nullptr; - return 0; - } - + return 0; + } + // Auxiliary function: consumes one (multibyte) symbol from the input. // Advances the data pointer to the root of the subtrie beginning after the symbol, // zeroes it if this subtrie is empty. diff --git a/library/cpp/containers/comptrie/comptrie_trie.h b/library/cpp/containers/comptrie/comptrie_trie.h index 40ec1e52b3..9bf4d61825 100644 --- a/library/cpp/containers/comptrie/comptrie_trie.h +++ b/library/cpp/containers/comptrie/comptrie_trie.h @@ -127,7 +127,7 @@ public: return FindLongestPrefix(key.data(), key.size(), prefixLen, value, hasNext); } - // Return trie, containing all tails for the given key + // Return trie, containing all tails for the given key inline TCompactTrie<T, D, S> FindTails(const TSymbol* key, size_t keylen) const; TCompactTrie<T, D, S> FindTails(const TKeyBuf& key) const { return FindTails(key.data(), key.size()); @@ -137,10 +137,10 @@ public: return FindTails(key.data(), key.size(), res); } - // same as FindTails(&key, 1), a bit faster - // return false, if no arc with @label exists + // same as FindTails(&key, 1), a bit faster + // return false, if no arc with @label exists inline bool FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const; - + class TConstIterator { private: typedef NCompactTrie::TOpaqueTrieIterator TOpaqueTrieIterator; @@ -343,10 +343,10 @@ void TCompactTrie<T, D, S>::FindPhrases(const TSymbol* key, size_t keylen, TPhra template <class T, class D, class S> inline TCompactTrie<T, D, S> TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen) const { TCompactTrie<T, D, S> ret; - FindTails(key, keylen, ret); - return ret; -} - + FindTails(key, keylen, ret); + return ret; +} + template <class T, class D, class S> bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompactTrie<T, D, S>& res) const { using namespace NCompactTrie; @@ -354,11 +354,11 @@ bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompac size_t len = DataHolder.Length(); if (!key || !len) - return false; + return false; if (!keylen) { - res = *this; - return true; + res = *this; + return true; } const char* datastart = DataHolder.AsCharPtr(); @@ -386,35 +386,35 @@ bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompac } } - return false; + return false; } template <class T, class D, class S> inline bool TCompactTrie<T, D, S>::FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const { - using namespace NCompactTrie; - + using namespace NCompactTrie; + const size_t len = DataHolder.Length(); - if (!len) - return false; - + if (!len) + return false; + const char* datastart = DataHolder.AsCharPtr(); - const char* dataend = datastart + len; - const char* datapos = datastart; + const char* dataend = datastart + len; + const char* datapos = datastart; const char* value = nullptr; if (!NCompactTrie::Advance(datapos, dataend, value, label, Packer)) return false; - + if (datapos) { Y_ASSERT(datapos >= datastart); res = TCompactTrie<T, D, S>(TBlob::NoCopy(datapos, dataend - datapos), value); } else { res = TCompactTrie<T, D, S>(value); - } - + } + return true; -} - +} + template <class T, class D, class S> typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::Begin() const { NCompactTrie::TOpaqueTrie self(DataHolder.AsCharPtr(), DataHolder.Length(), Skipper); @@ -495,30 +495,30 @@ bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keyle const char* const dataend = datapos + len; - const T* keyend = key + keylen; + const T* keyend = key + keylen; while (key != keyend) { T label = *(key++); - for (i64 i = (i64)ExtraBits<TSymbol>(); i >= 0; i -= 8) { + for (i64 i = (i64)ExtraBits<TSymbol>(); i >= 0; i -= 8) { const char flags = LeapByte(datapos, dataend, (char)(label >> i)); - if (!datapos) { - return found; // no such arc - } + if (!datapos) { + return found; // no such arc + } Y_ASSERT(datapos <= dataend); - if ((flags & MT_FINAL)) { + if ((flags & MT_FINAL)) { prefixLen = keylen - (keyend - key) - (i ? 1 : 0); valuepos = datapos; hasNext = flags & MT_NEXT; found = true; - if (!i && key == keyend) { // last byte, and got a match - return found; - } - datapos += Packer.SkipLeaf(datapos); // skip intermediate leaf nodes - } + if (!i && key == keyend) { // last byte, and got a match + return found; + } + datapos += Packer.SkipLeaf(datapos); // skip intermediate leaf nodes + } - if (!(flags & MT_NEXT)) { - return found; // no further way + if (!(flags & MT_NEXT)) { + return found; // no further way } } } diff --git a/library/cpp/containers/comptrie/comptrie_ut.cpp b/library/cpp/containers/comptrie/comptrie_ut.cpp index 74bee09b5d..1a8dca293a 100644 --- a/library/cpp/containers/comptrie/comptrie_ut.cpp +++ b/library/cpp/containers/comptrie/comptrie_ut.cpp @@ -21,7 +21,7 @@ #include <util/string/cast.h> #include "comptrie.h" -#include "set.h" +#include "set.h" #include "first_symbol_iterator.h" #include "search_iterator.h" #include "pattern_searcher.h" @@ -74,7 +74,7 @@ private: UNIT_TEST(TestIterateEmptyKey); UNIT_TEST(TestTrieSet); - + UNIT_TEST(TestTrieForVectorInt64); UNIT_TEST(TestTrieForListInt64); UNIT_TEST(TestTrieForSetInt64); @@ -209,8 +209,8 @@ public: void TestClear(); void TestIterateEmptyKey(); - - void TestTrieSet(); + + void TestTrieSet(); void TestTrieForVectorInt64(); void TestTrieForListInt64(); @@ -1060,48 +1060,48 @@ void TCompactTrieTest::TestIterateEmptyKey() { UNIT_ASSERT(it.GetValue() == 1); } -void TCompactTrieTest::TestTrieSet() { - TBuffer buffer; - { - TCompactTrieSet<char>::TBuilder builder; - UNIT_ASSERT(builder.Add("a", 0)); - UNIT_ASSERT(builder.Add("ab", 1)); - UNIT_ASSERT(builder.Add("abc", 1)); - UNIT_ASSERT(builder.Add("abcd", 0)); - UNIT_ASSERT(!builder.Add("abcd", 1)); - - TBufferStream stream(buffer); - builder.Save(stream); - } - - TCompactTrieSet<char> set(TBlob::FromBuffer(buffer)); - UNIT_ASSERT(set.Has("a")); - UNIT_ASSERT(set.Has("ab")); - UNIT_ASSERT(set.Has("abc")); - UNIT_ASSERT(set.Has("abcd")); - UNIT_ASSERT(!set.Has("abcde")); - UNIT_ASSERT(!set.Has("aa")); - UNIT_ASSERT(!set.Has("b")); - UNIT_ASSERT(!set.Has("")); - - TCompactTrieSet<char> tails; - UNIT_ASSERT(set.FindTails("a", tails)); - UNIT_ASSERT(tails.Has("b")); - UNIT_ASSERT(tails.Has("bcd")); - UNIT_ASSERT(!tails.Has("ab")); - UNIT_ASSERT(!set.Has("")); - - TCompactTrieSet<char> empty; - UNIT_ASSERT(set.FindTails("abcd", empty)); - UNIT_ASSERT(!empty.Has("a")); - UNIT_ASSERT(!empty.Has("b")); - UNIT_ASSERT(!empty.Has("c")); - UNIT_ASSERT(!empty.Has("d")); - UNIT_ASSERT(!empty.Has("d")); - +void TCompactTrieTest::TestTrieSet() { + TBuffer buffer; + { + TCompactTrieSet<char>::TBuilder builder; + UNIT_ASSERT(builder.Add("a", 0)); + UNIT_ASSERT(builder.Add("ab", 1)); + UNIT_ASSERT(builder.Add("abc", 1)); + UNIT_ASSERT(builder.Add("abcd", 0)); + UNIT_ASSERT(!builder.Add("abcd", 1)); + + TBufferStream stream(buffer); + builder.Save(stream); + } + + TCompactTrieSet<char> set(TBlob::FromBuffer(buffer)); + UNIT_ASSERT(set.Has("a")); + UNIT_ASSERT(set.Has("ab")); + UNIT_ASSERT(set.Has("abc")); + UNIT_ASSERT(set.Has("abcd")); + UNIT_ASSERT(!set.Has("abcde")); + UNIT_ASSERT(!set.Has("aa")); + UNIT_ASSERT(!set.Has("b")); + UNIT_ASSERT(!set.Has("")); + + TCompactTrieSet<char> tails; + UNIT_ASSERT(set.FindTails("a", tails)); + UNIT_ASSERT(tails.Has("b")); + UNIT_ASSERT(tails.Has("bcd")); + UNIT_ASSERT(!tails.Has("ab")); + UNIT_ASSERT(!set.Has("")); + + TCompactTrieSet<char> empty; + UNIT_ASSERT(set.FindTails("abcd", empty)); + UNIT_ASSERT(!empty.Has("a")); + UNIT_ASSERT(!empty.Has("b")); + UNIT_ASSERT(!empty.Has("c")); + UNIT_ASSERT(!empty.Has("d")); + UNIT_ASSERT(!empty.Has("d")); + UNIT_ASSERT(empty.Has("")); // contains only empty string -} - +} + // Tests for trie with vector (list, set) values TVector<TUtf16String> TCompactTrieTest::GetSampleKeys(size_t nKeys) const { diff --git a/library/cpp/containers/comptrie/set.h b/library/cpp/containers/comptrie/set.h index acd43338f0..e165e8650f 100644 --- a/library/cpp/containers/comptrie/set.h +++ b/library/cpp/containers/comptrie/set.h @@ -1,40 +1,40 @@ #pragma once -#include "comptrie_trie.h" - -template <typename T = char> +#include "comptrie_trie.h" + +template <typename T = char> class TCompactTrieSet: public TCompactTrie<T, ui8, TNullPacker<ui8>> { -public: +public: typedef TCompactTrie<T, ui8, TNullPacker<ui8>> TBase; - + using typename TBase::TBuilder; - using typename TBase::TKey; - using typename TBase::TKeyBuf; + using typename TBase::TKey; + using typename TBase::TKeyBuf; using typename TBase::TSymbol; - + TCompactTrieSet() = default; - - explicit TCompactTrieSet(const TBlob& data) - : TBase(data) - { - } - - template <typename D> + + explicit TCompactTrieSet(const TBlob& data) + : TBase(data) + { + } + + template <typename D> explicit TCompactTrieSet(const TCompactTrie<T, D, TNullPacker<D>>& trie) : TBase(trie.Data()) // should be binary compatible for any D - { - } - - TCompactTrieSet(const char* data, size_t len) - : TBase(data, len) - { - } - - bool Has(const typename TBase::TKeyBuf& key) const { + { + } + + TCompactTrieSet(const char* data, size_t len) + : TBase(data, len) + { + } + + bool Has(const typename TBase::TKeyBuf& key) const { return TBase::Find(key.data(), key.size()); - } - - bool FindTails(const typename TBase::TKeyBuf& key, TCompactTrieSet<T>& res) const { - return TBase::FindTails(key, res); - } -}; + } + + bool FindTails(const typename TBase::TKeyBuf& key, TCompactTrieSet<T>& res) const { + return TBase::FindTails(key, res); + } +}; diff --git a/library/cpp/containers/comptrie/ya.make b/library/cpp/containers/comptrie/ya.make index 81352da4b2..7a83c353bd 100644 --- a/library/cpp/containers/comptrie/ya.make +++ b/library/cpp/containers/comptrie/ya.make @@ -11,7 +11,7 @@ SRCS( first_symbol_iterator.h key_selector.h leaf_skipper.h - set.h + set.h comptrie.cpp comptrie_builder.cpp comptrie_impl.cpp diff --git a/library/cpp/containers/ring_buffer/ring_buffer.h b/library/cpp/containers/ring_buffer/ring_buffer.h index 41220dcf6b..e1f232712c 100644 --- a/library/cpp/containers/ring_buffer/ring_buffer.h +++ b/library/cpp/containers/ring_buffer/ring_buffer.h @@ -1,81 +1,81 @@ -#pragma once - -#include <util/generic/vector.h> -#include <util/system/yassert.h> - -template <typename T> -class TSimpleRingBuffer { -public: - TSimpleRingBuffer(size_t maxSize) - : MaxSize(maxSize) - { - Items.reserve(MaxSize); - } - +#pragma once + +#include <util/generic/vector.h> +#include <util/system/yassert.h> + +template <typename T> +class TSimpleRingBuffer { +public: + TSimpleRingBuffer(size_t maxSize) + : MaxSize(maxSize) + { + Items.reserve(MaxSize); + } + TSimpleRingBuffer(const TSimpleRingBuffer&) = default; TSimpleRingBuffer(TSimpleRingBuffer&&) = default; TSimpleRingBuffer& operator=(const TSimpleRingBuffer&) = default; TSimpleRingBuffer& operator=(TSimpleRingBuffer&&) = default; - // First available item - size_t FirstIndex() const { - return Begin; - } - - size_t AvailSize() const { - return Items.size(); - } - - // Total number of items inserted - size_t TotalSize() const { - return FirstIndex() + AvailSize(); - } - - bool IsAvail(size_t index) const { - return index >= FirstIndex() && index < TotalSize(); - } - - const T& operator[](size_t index) const { + // First available item + size_t FirstIndex() const { + return Begin; + } + + size_t AvailSize() const { + return Items.size(); + } + + // Total number of items inserted + size_t TotalSize() const { + return FirstIndex() + AvailSize(); + } + + bool IsAvail(size_t index) const { + return index >= FirstIndex() && index < TotalSize(); + } + + const T& operator[](size_t index) const { Y_ASSERT(IsAvail(index)); - return Items[RealIndex(index)]; - } - - T& operator[](size_t index) { + return Items[RealIndex(index)]; + } + + T& operator[](size_t index) { Y_ASSERT(IsAvail(index)); - return Items[RealIndex(index)]; - } - - void PushBack(const T& t) { - if (Items.size() < MaxSize) { - Items.push_back(t); - } else { - Items[RealIndex(Begin)] = t; - Begin += 1; - } - } - + return Items[RealIndex(index)]; + } + + void PushBack(const T& t) { + if (Items.size() < MaxSize) { + Items.push_back(t); + } else { + Items[RealIndex(Begin)] = t; + Begin += 1; + } + } + void Clear() { Items.clear(); Begin = 0; } -private: - size_t RealIndex(size_t index) const { - return index % MaxSize; - } - -private: - size_t MaxSize; - size_t Begin = 0; +private: + size_t RealIndex(size_t index) const { + return index % MaxSize; + } + +private: + size_t MaxSize; + size_t Begin = 0; TVector<T> Items; -}; - -template <typename T, size_t maxSize> -class TStaticRingBuffer: public TSimpleRingBuffer<T> { -public: - TStaticRingBuffer() - : TSimpleRingBuffer<T>(maxSize) - { - } -}; +}; + +template <typename T, size_t maxSize> +class TStaticRingBuffer: public TSimpleRingBuffer<T> { +public: + TStaticRingBuffer() + : TSimpleRingBuffer<T>(maxSize) + { + } +}; diff --git a/library/cpp/enumbitset/enumbitset.h b/library/cpp/enumbitset/enumbitset.h index 41864c3a04..9e1ae2d938 100644 --- a/library/cpp/enumbitset/enumbitset.h +++ b/library/cpp/enumbitset/enumbitset.h @@ -8,9 +8,9 @@ #include <util/string/printf.h> #include <util/system/yassert.h> -// Stack memory bitmask for TEnum values [begin, end). -// @end value is not included in the mask and is not necessarily defined as enum value. -// For example: enum EType { A, B, C } ==> TEnumBitSet<EType, A, C + 1> +// Stack memory bitmask for TEnum values [begin, end). +// @end value is not included in the mask and is not necessarily defined as enum value. +// For example: enum EType { A, B, C } ==> TEnumBitSet<EType, A, C + 1> template <typename TEnum, int mbegin, int mend> class TEnumBitSet: private TBitMap<mend - mbegin> { public: @@ -227,10 +227,10 @@ public: using TParent::Count; using TParent::Empty; - explicit operator bool() const { - return !Empty(); - } - + explicit operator bool() const { + return !Empty(); + } + void Swap(TThis& bitmap) { TParent::Swap(bitmap); } diff --git a/library/cpp/enumbitset/enumbitset_ut.cpp b/library/cpp/enumbitset/enumbitset_ut.cpp index e55b3251c3..d80d50b5d8 100644 --- a/library/cpp/enumbitset/enumbitset_ut.cpp +++ b/library/cpp/enumbitset/enumbitset_ut.cpp @@ -34,18 +34,18 @@ Y_UNIT_TEST_SUITE(TEnumBitSetTest) { ebs.SafeSet(TE_OVERFLOW); UNIT_ASSERT(!ebs.SafeTest(TE_OVERFLOW)); - } + } Y_UNIT_TEST(TestEmpty) { - TTestBitSet mask; - UNIT_ASSERT(mask.Empty()); - if (mask) - UNIT_ASSERT(false && "should be empty"); - - mask.Set(TE_FIRST); - UNIT_ASSERT(!mask.Empty()); - UNIT_ASSERT(mask.Count() == 1); - if (!mask) + TTestBitSet mask; + UNIT_ASSERT(mask.Empty()); + if (mask) + UNIT_ASSERT(false && "should be empty"); + + mask.Set(TE_FIRST); + UNIT_ASSERT(!mask.Empty()); + UNIT_ASSERT(mask.Count() == 1); + if (!mask) UNIT_ASSERT(false && "should not be empty"); } diff --git a/library/cpp/json/writer/json.cpp b/library/cpp/json/writer/json.cpp index 02370c2d79..b646d459c1 100644 --- a/library/cpp/json/writer/json.cpp +++ b/library/cpp/json/writer/json.cpp @@ -28,8 +28,8 @@ namespace NJsonWriter { Stack.reserve(64); // should be enough for most cases StackPush(JE_OUTER_SPACE); - } - + } + static const char* EntityToStr(EJsonEntity e) { switch (e) { case JE_OUTER_SPACE: @@ -44,7 +44,7 @@ namespace NJsonWriter { return "JE_unknown"; } } - + inline void TBuf::StackPush(EJsonEntity e) { Stack.push_back(e); } @@ -74,7 +74,7 @@ namespace NJsonWriter { NeedComma = true; NeedNewline = true; } - + inline void TBuf::CheckAndPop(EJsonEntity e) { if (Y_UNLIKELY(StackTop() != e)) { ythrow TError() << "JSON writer: unexpected value " @@ -91,8 +91,8 @@ namespace NJsonWriter { return; PrintWhitespaces(Max(0, indentation), true); - } - + } + void TBuf::PrintWhitespaces(size_t count, bool prependWithNewLine) { static constexpr TStringBuf whitespacesTemplate = "\n "; static_assert(whitespacesTemplate[0] == '\n'); @@ -112,13 +112,13 @@ namespace NJsonWriter { RawWriteChar(','); } NeedComma = true; - + if (NeedNewline) { PrintIndentation(false); } NeedNewline = true; } - + inline void TBuf::BeginValue() { if (Y_UNLIKELY(KeyExpected())) { ythrow TError() << "JSON writer: value written, " @@ -375,9 +375,9 @@ namespace NJsonWriter { return false; } - -#undef MATCH - + +#undef MATCH + static bool LessStrPtr(const TString* a, const TString* b) { return *a < *b; } diff --git a/library/cpp/json/writer/json.h b/library/cpp/json/writer/json.h index 0aae2531b9..07c3b9d0d1 100644 --- a/library/cpp/json/writer/json.h +++ b/library/cpp/json/writer/json.h @@ -43,8 +43,8 @@ namespace NJsonWriter { public: TBuf(EHtmlEscapeMode mode = HEM_DONT_ESCAPE_HTML, IOutputStream* stream = nullptr); - TValueContext WriteString(const TStringBuf& s, EHtmlEscapeMode hem); - TValueContext WriteString(const TStringBuf& s); + TValueContext WriteString(const TStringBuf& s, EHtmlEscapeMode hem); + TValueContext WriteString(const TStringBuf& s); TValueContext WriteInt(int i); TValueContext WriteLongLong(long long i); TValueContext WriteULongLong(unsigned long long i); @@ -58,15 +58,15 @@ namespace NJsonWriter { TBuf& EndList(); TPairContext BeginObject(); - TAfterColonContext WriteKey(const TStringBuf& key, EHtmlEscapeMode hem); - TAfterColonContext WriteKey(const TStringBuf& key); + TAfterColonContext WriteKey(const TStringBuf& key, EHtmlEscapeMode hem); + TAfterColonContext WriteKey(const TStringBuf& key); TAfterColonContext UnsafeWriteKey(const TStringBuf& key); - bool KeyExpected() const { - return Stack.back() == JE_OBJECT; - } + bool KeyExpected() const { + return Stack.back() == JE_OBJECT; + } //! deprecated, do not use in new code - TAfterColonContext CompatWriteKeyWithoutQuotes(const TStringBuf& key); + TAfterColonContext CompatWriteKeyWithoutQuotes(const TStringBuf& key); TBuf& EndObject(); @@ -102,7 +102,7 @@ namespace NJsonWriter { * j.UnsafeWriteValue("[1, 2, 3, \"o'clock\", 4, \"o'clock rock\"]"); * * As in all of the Unsafe* functions, no escaping is done. */ - void UnsafeWriteValue(const TStringBuf& s); + void UnsafeWriteValue(const TStringBuf& s); void UnsafeWriteValue(const char* s, size_t len); /*** When in the context of an object, write a literal string @@ -114,10 +114,10 @@ namespace NJsonWriter { * j.EndObject(); * * As in all of the Unsafe* functions, no escaping is done. */ - TPairContext UnsafeWritePair(const TStringBuf& s); + TPairContext UnsafeWritePair(const TStringBuf& s); /*** Copy the supplied string directly into the output stream. */ - void UnsafeWriteRawBytes(const TStringBuf& s); + void UnsafeWriteRawBytes(const TStringBuf& s); void UnsafeWriteRawBytes(const char* c, size_t len); TBufState State() const; @@ -129,21 +129,21 @@ namespace NJsonWriter { void EndValue(); void BeginKey(); void RawWriteChar(char c); - bool EscapedWriteChar(const char* b, const char* c, EHtmlEscapeMode hem); + bool EscapedWriteChar(const char* b, const char* c, EHtmlEscapeMode hem); void WriteBareString(const TStringBuf s, EHtmlEscapeMode hem); void WriteComma(); void PrintIndentation(bool closing); void PrintWhitespaces(size_t count, bool prependWithNewLine); void WriteHexEscape(unsigned char c); - void StackPush(EJsonEntity e); - void StackPop(); - void CheckAndPop(EJsonEntity e); - EJsonEntity StackTop() const; - + void StackPush(EJsonEntity e); + void StackPop(); + void CheckAndPop(EJsonEntity e); + EJsonEntity StackTop() const; + template <class TFloat> TValueContext WriteFloatImpl(TFloat f, EFloatToStringMode mode, int ndigits); - + private: IOutputStream* Stream; THolder<TStringStream> StringStream; @@ -160,8 +160,8 @@ namespace NJsonWriter { // Please don't try to instantiate the classes declared below this point. - template <typename TOutContext> - class TValueWriter { + template <typename TOutContext> + class TValueWriter { public: TOutContext WriteNull(); TOutContext WriteString(const TStringBuf&); @@ -221,19 +221,19 @@ namespace NJsonWriter { class TPairContext { public: - TAfterColonContext WriteKey(const TStringBuf& s, EHtmlEscapeMode hem) { + TAfterColonContext WriteKey(const TStringBuf& s, EHtmlEscapeMode hem) { return Buf.WriteKey(s, hem); } - TAfterColonContext WriteKey(const TStringBuf& s) { + TAfterColonContext WriteKey(const TStringBuf& s) { return Buf.WriteKey(s); } TAfterColonContext UnsafeWriteKey(const TStringBuf& s) { return Buf.UnsafeWriteKey(s); } - TAfterColonContext CompatWriteKeyWithoutQuotes(const TStringBuf& s) { + TAfterColonContext CompatWriteKeyWithoutQuotes(const TStringBuf& s) { return Buf.CompatWriteKeyWithoutQuotes(s); } - TPairContext UnsafeWritePair(const TStringBuf& s) { + TPairContext UnsafeWritePair(const TStringBuf& s) { return Buf.UnsafeWritePair(s); } TBuf& EndObject() { @@ -243,8 +243,8 @@ namespace NJsonWriter { private: TPairContext(TBuf& buf) : Buf(buf) - { - } + { + } friend class TBuf; friend class TValueWriter<TPairContext>; diff --git a/library/cpp/json/writer/json_value.cpp b/library/cpp/json/writer/json_value.cpp index c61e8d1dc4..e92099e44c 100644 --- a/library/cpp/json/writer/json_value.cpp +++ b/library/cpp/json/writer/json_value.cpp @@ -3,7 +3,7 @@ #include <util/generic/ymath.h> #include <util/generic/ylimits.h> -#include <util/generic/utility.h> +#include <util/generic/utility.h> #include <util/generic/singleton.h> #include <util/stream/str.h> #include <util/stream/output.h> @@ -286,8 +286,8 @@ namespace NJson { TArray::iterator it = Value.Array->begin() + index; Value.Array->erase(it); } - } - + } + void TJsonValue::Clear() noexcept { switch (Type) { case JSON_STRING: @@ -714,8 +714,8 @@ namespace NJson { return false; *value = Value.Array; - return true; - } + return true; + } bool TJsonValue::GetValue(const size_t index, TJsonValue* value) const { const TJsonValue* tmp = nullptr; @@ -724,7 +724,7 @@ namespace NJson { return true; } return false; - } + } bool TJsonValue::GetValue(const TStringBuf key, TJsonValue* value) const { const TJsonValue* tmp = nullptr; @@ -733,15 +733,15 @@ namespace NJson { return true; } return false; - } + } bool TJsonValue::GetValuePointer(const size_t index, const TJsonValue** value) const noexcept { if (Type == JSON_ARRAY && index < Value.Array->size()) { *value = &(*Value.Array)[index]; - return true; - } + return true; + } return false; - } + } bool TJsonValue::GetValuePointer(const TStringBuf key, const TJsonValue** value) const noexcept { if (Type == JSON_MAP) { @@ -882,8 +882,8 @@ namespace NJson { const TJsonValue* TJsonValue::GetValueByPath(const TStringBuf key, char delim) const noexcept { return GetValuePtrByPath<false>(this, key, delim); - } - + } + TJsonValue* TJsonValue::GetValueByPath(const TStringBuf key, char delim) noexcept { return GetValuePtrByPath<false>(this, key, delim); } @@ -945,11 +945,11 @@ namespace NJson { case JSON_INTEGER: { return (rhs.IsInteger() && GetInteger() == rhs.GetInteger()); } - + case JSON_UINTEGER: { return (rhs.IsUInteger() && GetUInteger() == rhs.GetUInteger()); } - + case JSON_STRING: { return (rhs.IsString() && Value.String == rhs.Value.String); } @@ -978,7 +978,7 @@ namespace NJson { } else { std::memcpy(&output.Value, &Value, sizeof(Value)); } - + output.Type = Type; Type = JSON_UNDEFINED; } diff --git a/library/cpp/json/writer/json_value.h b/library/cpp/json/writer/json_value.h index 3f0f50bc4c..e625c79eb6 100644 --- a/library/cpp/json/writer/json_value.h +++ b/library/cpp/json/writer/json_value.h @@ -99,12 +99,12 @@ namespace NJson { void EraseValue(TStringBuf key); void EraseValue(size_t index); - + TJsonValue& operator[](size_t idx); TJsonValue& operator[](const TStringBuf& key); const TJsonValue& operator[](size_t idx) const noexcept; const TJsonValue& operator[](const TStringBuf& key) const noexcept; - + bool GetBoolean() const; long long GetInteger() const; unsigned long long GetUInteger() const; @@ -164,7 +164,7 @@ namespace NJson { bool IsString() const noexcept; bool IsMap() const noexcept; bool IsArray() const noexcept; - + /// @return true if JSON_INTEGER or (JSON_UINTEGER and Value <= Max<long long>) bool IsInteger() const noexcept; @@ -192,7 +192,7 @@ namespace NJson { void Load(IInputStream* s); static const TJsonValue UNDEFINED; - + private: EJsonValueType Type = JSON_UNDEFINED; union TValueUnion { @@ -218,7 +218,7 @@ namespace NJson { @throw yexception if Back shouldn't be called on the object. */ void BackChecks() const; - }; + }; inline bool GetBoolean(const TJsonValue& jv, size_t index, bool* value) noexcept { return jv[index].GetBoolean(value); diff --git a/library/cpp/json/writer/json_value_ut.cpp b/library/cpp/json/writer/json_value_ut.cpp index dc7f6affdf..31540983c0 100644 --- a/library/cpp/json/writer/json_value_ut.cpp +++ b/library/cpp/json/writer/json_value_ut.cpp @@ -8,34 +8,34 @@ using namespace NJson; Y_UNIT_TEST_SUITE(TJsonValueTest) { Y_UNIT_TEST(UndefTest) { - TJsonValue undef; - TJsonValue null(JSON_NULL); - TJsonValue _false(false); - TJsonValue zeroInt(0); - TJsonValue zeroDouble(0.0); - TJsonValue emptyStr(""); - TJsonValue emptyArray(JSON_ARRAY); - TJsonValue emptyMap(JSON_MAP); - - UNIT_ASSERT(!undef.IsDefined()); + TJsonValue undef; + TJsonValue null(JSON_NULL); + TJsonValue _false(false); + TJsonValue zeroInt(0); + TJsonValue zeroDouble(0.0); + TJsonValue emptyStr(""); + TJsonValue emptyArray(JSON_ARRAY); + TJsonValue emptyMap(JSON_MAP); + + UNIT_ASSERT(!undef.IsDefined()); UNIT_ASSERT(!null.IsDefined()); // json NULL is undefined too! - UNIT_ASSERT(_false.IsDefined()); - UNIT_ASSERT(zeroInt.IsDefined()); - UNIT_ASSERT(zeroDouble.IsDefined()); - UNIT_ASSERT(emptyStr.IsDefined()); - UNIT_ASSERT(emptyArray.IsDefined()); - UNIT_ASSERT(emptyMap.IsDefined()); - - UNIT_ASSERT(undef == TJsonValue()); - UNIT_ASSERT(undef != null); - UNIT_ASSERT(undef != _false); - UNIT_ASSERT(undef != zeroInt); - UNIT_ASSERT(undef != zeroDouble); - UNIT_ASSERT(undef != emptyStr); - UNIT_ASSERT(undef != emptyArray); - UNIT_ASSERT(undef != emptyMap); - } - + UNIT_ASSERT(_false.IsDefined()); + UNIT_ASSERT(zeroInt.IsDefined()); + UNIT_ASSERT(zeroDouble.IsDefined()); + UNIT_ASSERT(emptyStr.IsDefined()); + UNIT_ASSERT(emptyArray.IsDefined()); + UNIT_ASSERT(emptyMap.IsDefined()); + + UNIT_ASSERT(undef == TJsonValue()); + UNIT_ASSERT(undef != null); + UNIT_ASSERT(undef != _false); + UNIT_ASSERT(undef != zeroInt); + UNIT_ASSERT(undef != zeroDouble); + UNIT_ASSERT(undef != emptyStr); + UNIT_ASSERT(undef != emptyArray); + UNIT_ASSERT(undef != emptyMap); + } + Y_UNIT_TEST(DefaultCompareTest) { { TJsonValue lhs; @@ -208,31 +208,31 @@ Y_UNIT_TEST_SUITE(TJsonValueTest) { UNIT_ASSERT(rhs != lhs); } } - + Y_UNIT_TEST(SwapTest) { - { - TJsonValue lhs; - lhs.InsertValue("a", "b"); - TJsonValue lhsCopy = lhs; - - TJsonValue rhs(JSON_NULL); - TJsonValue rhsCopy = rhs; - - UNIT_ASSERT(lhs == lhsCopy); - UNIT_ASSERT(rhs == rhsCopy); - - lhs.Swap(rhs); - - UNIT_ASSERT(rhs == lhsCopy); - UNIT_ASSERT(lhs == rhsCopy); - - lhs.Swap(rhs); - - UNIT_ASSERT(lhs == lhsCopy); - UNIT_ASSERT(rhs == rhsCopy); - } - } - + { + TJsonValue lhs; + lhs.InsertValue("a", "b"); + TJsonValue lhsCopy = lhs; + + TJsonValue rhs(JSON_NULL); + TJsonValue rhsCopy = rhs; + + UNIT_ASSERT(lhs == lhsCopy); + UNIT_ASSERT(rhs == rhsCopy); + + lhs.Swap(rhs); + + UNIT_ASSERT(rhs == lhsCopy); + UNIT_ASSERT(lhs == rhsCopy); + + lhs.Swap(rhs); + + UNIT_ASSERT(lhs == lhsCopy); + UNIT_ASSERT(rhs == rhsCopy); + } + } + Y_UNIT_TEST(GetValueByPathTest) { { TJsonValue lhs; @@ -250,15 +250,15 @@ Y_UNIT_TEST_SUITE(TJsonValueTest) { UNIT_ASSERT(!lhs.GetValueByPath("l/a/c/se", result, '/')); UNIT_ASSERT(lhs.GetValueByPath("l/a/c", result, '/')); UNIT_ASSERT(result.GetStringRobust() == "{\"e\":\"f\"}"); - - // faster TStringBuf version - UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l", '/'), last); - UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l/a", '/'), second); - UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l/a/c", '/'), first); - UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l.a.c.e", '.'), "f"); - UNIT_ASSERT_EQUAL(lhs.GetValueByPath("l/a/c/e/x", '/'), NULL); - UNIT_ASSERT_EQUAL(lhs.GetValueByPath("a/c/e/x", '/'), NULL); - UNIT_ASSERT_EQUAL(lhs.GetValueByPath("nokey", '/'), NULL); + + // faster TStringBuf version + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l", '/'), last); + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l/a", '/'), second); + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l/a/c", '/'), first); + UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("l.a.c.e", '.'), "f"); + UNIT_ASSERT_EQUAL(lhs.GetValueByPath("l/a/c/e/x", '/'), NULL); + UNIT_ASSERT_EQUAL(lhs.GetValueByPath("a/c/e/x", '/'), NULL); + UNIT_ASSERT_EQUAL(lhs.GetValueByPath("nokey", '/'), NULL); UNIT_ASSERT_EQUAL(*lhs.GetValueByPath("", '/'), lhs); // itself TJsonValue array; diff --git a/library/cpp/packers/packers.h b/library/cpp/packers/packers.h index 1bde1b59aa..43a0fa8b5d 100644 --- a/library/cpp/packers/packers.h +++ b/library/cpp/packers/packers.h @@ -132,21 +132,21 @@ namespace NPackers { } extern const ui8 SkipTable[]; - + template <> inline void TIntegralPacker<ui64>::UnpackLeaf(const char* p, ui64& result) const { unsigned char ch = *(p++); size_t taillen = SkipTable[ch] - 1; - + result = (ch & (0x7F >> taillen)); - + while (taillen--) result = ((result << 8) | (*(p++) & 0xFF)); } template <> inline size_t TIntegralPacker<ui64>::SkipLeaf(const char* p) const { - return SkipTable[(ui8)*p]; + return SkipTable[(ui8)*p]; } namespace NImpl { @@ -182,17 +182,17 @@ namespace NPackers { } template <class T> - inline void TIntegralPacker<T>::PackLeaf(char* buffer, const T& data, size_t size) const { + inline void TIntegralPacker<T>::PackLeaf(char* buffer, const T& data, size_t size) const { TIntegralPacker<ui64>().PackLeaf(buffer, ConvertIntegral<T>(data), size); } template <class T> - inline size_t TIntegralPacker<T>::MeasureLeaf(const T& data) const { + inline size_t TIntegralPacker<T>::MeasureLeaf(const T& data) const { return TIntegralPacker<ui64>().MeasureLeaf(ConvertIntegral<T>(data)); } template <class T> - inline size_t TIntegralPacker<T>::SkipLeaf(const char* p) const { + inline size_t TIntegralPacker<T>::SkipLeaf(const char* p) const { return TIntegralPacker<ui64>().SkipLeaf(p); } diff --git a/library/cpp/protobuf/json/inline.h b/library/cpp/protobuf/json/inline.h index e2d7bb6ef0..31dbf6a16c 100644 --- a/library/cpp/protobuf/json/inline.h +++ b/library/cpp/protobuf/json/inline.h @@ -1,66 +1,66 @@ -#pragma once - -// A printer from protobuf to json string, with ability to inline some string fields of given protobuf message -// into output as ready json without additional escaping. These fields should be marked using special field option. -// An example of usage: -// 1) Define a field option in your .proto to identify fields which should be inlined, e.g. -// +#pragma once + +// A printer from protobuf to json string, with ability to inline some string fields of given protobuf message +// into output as ready json without additional escaping. These fields should be marked using special field option. +// An example of usage: +// 1) Define a field option in your .proto to identify fields which should be inlined, e.g. +// // import "google/protobuf/descriptor.proto"; -// extend google.protobuf.FieldOptions { -// optional bool this_is_json = 58253; // do not forget assign some more or less unique tag -// } -// -// 2) Mark some fields of your protobuf message with this option, e.g.: -// -// message TMyObject { -// optional string A = 1 [(this_is_json) = true]; -// } -// -// 3) In the C++ code you prepare somehow an object of TMyObject type -// -// TMyObject o; -// o.Set("{\"inner\":\"value\"}"); -// -// 4) And then serialize it to json string with inlining, e.g.: -// +// extend google.protobuf.FieldOptions { +// optional bool this_is_json = 58253; // do not forget assign some more or less unique tag +// } +// +// 2) Mark some fields of your protobuf message with this option, e.g.: +// +// message TMyObject { +// optional string A = 1 [(this_is_json) = true]; +// } +// +// 3) In the C++ code you prepare somehow an object of TMyObject type +// +// TMyObject o; +// o.Set("{\"inner\":\"value\"}"); +// +// 4) And then serialize it to json string with inlining, e.g.: +// // Cout << NProtobufJson::PrintInlined(o, MakeFieldOptionFunctor(this_is_json)) << Endl; -// +// // 5) Alternatively you can specify a some more abstract functor for defining raw json fields // -// which will print following json to stdout: -// {"A":{"inner":"value"}} -// instead of -// {"A":"{\"inner\":\"value\"}"} -// which would be printed with normal Proto2Json printer. -// -// See ut/inline_ut.cpp for additional examples of usage. - +// which will print following json to stdout: +// {"A":{"inner":"value"}} +// instead of +// {"A":"{\"inner\":\"value\"}"} +// which would be printed with normal Proto2Json printer. +// +// See ut/inline_ut.cpp for additional examples of usage. + #include "config.h" #include "proto2json_printer.h" #include "json_output_create.h" - + #include <library/cpp/protobuf/util/simple_reflection.h> - + #include <util/generic/maybe.h> #include <util/generic/yexception.h> #include <util/generic/utility.h> - + #include <functional> - -namespace NProtobufJson { + +namespace NProtobufJson { template <typename TBasePrinter = TProto2JsonPrinter> // TBasePrinter is assumed to be a TProto2JsonPrinter descendant class TInliningPrinter: public TBasePrinter { public: using TFieldPredicate = std::function<bool(const NProtoBuf::Message&, const NProtoBuf::FieldDescriptor*)>; - + template <typename... TArgs> TInliningPrinter(TFieldPredicate isInlined, TArgs&&... args) : TBasePrinter(std::forward<TArgs>(args)...) , IsInlined(std::move(isInlined)) { } - + virtual void PrintField(const NProtoBuf::Message& proto, const NProtoBuf::FieldDescriptor& field, IJsonOutput& json, @@ -77,12 +77,12 @@ namespace NProtobufJson { json.WriteRawJson(f.Get<TString>(i)); json.EndList(); } - - } else { + + } else { TBasePrinter::PrintField(proto, field, json, key); - } + } } - + private: bool ShouldPrint(const NProtoBuf::TConstField& f) const { if (!f.IsString()) @@ -95,8 +95,8 @@ namespace NProtobufJson { // we may want write default value for given field in case of its absence const auto& cfg = this->GetConfig(); return (f.Field()->is_repeated() ? cfg.MissingRepeatedKeyMode : cfg.MissingSingleKeyMode) == TProto2JsonConfig::MissingKeyDefault; - } - + } + private: TFieldPredicate IsInlined; }; @@ -105,11 +105,11 @@ namespace NProtobufJson { TInliningPrinter<> printer(std::move(isInlined), config); printer.Print(msg, output); } - + inline TString PrintInlined(const NProtoBuf::Message& msg, TInliningPrinter<>::TFieldPredicate isInlined, const TProto2JsonConfig& config = TProto2JsonConfig()) { TString ret; PrintInlined(msg, std::move(isInlined), *CreateJsonMapOutput(ret, config), config); return ret; - } - -} + } + +} diff --git a/library/cpp/protobuf/json/proto2json.cpp b/library/cpp/protobuf/json/proto2json.cpp index 3d76a91686..f0f8a0c4d7 100644 --- a/library/cpp/protobuf/json/proto2json.cpp +++ b/library/cpp/protobuf/json/proto2json.cpp @@ -24,7 +24,7 @@ namespace NProtobufJson { const TProto2JsonConfig& config) { Proto2Json(proto, *CreateJsonMapOutput(json), config); } - + void Proto2Json(const NProtoBuf::Message& proto, NJson::TJsonWriter& writer, const TProto2JsonConfig& config) { Proto2Json(proto, *CreateJsonMapOutput(writer), config); diff --git a/library/cpp/protobuf/json/ut/inline_ut.cpp b/library/cpp/protobuf/json/ut/inline_ut.cpp index c29ad32e7d..a40ec4bfec 100644 --- a/library/cpp/protobuf/json/ut/inline_ut.cpp +++ b/library/cpp/protobuf/json/ut/inline_ut.cpp @@ -4,11 +4,11 @@ #include <library/cpp/protobuf/json/field_option.h> #include <library/cpp/protobuf/json/proto2json.h> #include <library/cpp/testing/unittest/registar.h> - + #include <util/generic/string.h> - -using namespace NProtobufJson; - + +using namespace NProtobufJson; + static NProtobufJsonUt::TInlineTest GetTestMsg() { NProtobufJsonUt::TInlineTest msg; msg.SetOptJson(R"({"a":1,"b":"000"})"); @@ -59,7 +59,7 @@ Y_UNIT_TEST(TestNoValues) { NProtobufJsonUt::TInlineTest msg; msg.MutableInner()->AddNumber(100); msg.MutableInner()->AddNumber(200); - + TString expInlined = R"({"Inner":{"Number":[100,200]}})"; TString myInlined = PrintInlined(msg, MakeFieldOptionFunctor(NProtobufJsonUt::inline_test)); diff --git a/library/cpp/protobuf/json/ut/inline_ut.proto b/library/cpp/protobuf/json/ut/inline_ut.proto index 76bd10232d..d77fdd2537 100644 --- a/library/cpp/protobuf/json/ut/inline_ut.proto +++ b/library/cpp/protobuf/json/ut/inline_ut.proto @@ -1,22 +1,22 @@ import "google/protobuf/descriptor.proto"; - -package NProtobufJsonUt; - -extend google.protobuf.FieldOptions { - optional bool inline_test = 58253; -} - -message TInlineTest { - optional string OptJson = 1 [(inline_test) = true]; - optional string NotJson = 2; - repeated string RepJson = 3 [(inline_test) = true]; - - message TInner { - repeated uint32 Number = 1; - optional string InnerJson = 2 [(inline_test) = true]; - } - optional TInner Inner = 4; -} + +package NProtobufJsonUt; + +extend google.protobuf.FieldOptions { + optional bool inline_test = 58253; +} + +message TInlineTest { + optional string OptJson = 1 [(inline_test) = true]; + optional string NotJson = 2; + repeated string RepJson = 3 [(inline_test) = true]; + + message TInner { + repeated uint32 Number = 1; + optional string InnerJson = 2 [(inline_test) = true]; + } + optional TInner Inner = 4; +} message TInlineTestDefaultValues { optional string OptJson = 1 [(inline_test) = true, default = "{\"default\":1}"]; diff --git a/library/cpp/protobuf/json/ut/ya.make b/library/cpp/protobuf/json/ut/ya.make index b60a6d3c17..71c2318980 100644 --- a/library/cpp/protobuf/json/ut/ya.make +++ b/library/cpp/protobuf/json/ut/ya.make @@ -6,8 +6,8 @@ SRCS( filter_ut.cpp json2proto_ut.cpp proto2json_ut.cpp - inline_ut.proto - inline_ut.cpp + inline_ut.proto + inline_ut.cpp string_transform_ut.cpp filter_ut.proto test.proto diff --git a/library/cpp/protobuf/util/cast.h b/library/cpp/protobuf/util/cast.h index 83749dfcee..40076feac8 100644 --- a/library/cpp/protobuf/util/cast.h +++ b/library/cpp/protobuf/util/cast.h @@ -1,17 +1,17 @@ -#pragma once +#pragma once #include "traits.h" - + #include <google/protobuf/descriptor.h> #include <google/protobuf/message.h> - + #include <util/generic/cast.h> - -namespace NProtoBuf { + +namespace NProtoBuf { // C++ compatible conversions of FieldDescriptor::CppType's - + using ECppType = FieldDescriptor::CppType; - + namespace NCast { template <ECppType src, ECppType dst> struct TIsCompatibleCppType { @@ -19,67 +19,67 @@ namespace NProtoBuf { Result = src == dst || (TIsNumericCppType<src>::Result && TIsNumericCppType<dst>::Result) }; - }; - + }; + template <ECppType src, ECppType dst> struct TIsEnumToNumericCppType { enum { Result = (src == FieldDescriptor::CPPTYPE_ENUM && TIsNumericCppType<dst>::Result) }; - }; - + }; + template <ECppType src, ECppType dst, bool compatible> // compatible == true struct TCompatCastBase { static const bool IsCompatible = true; - + typedef typename TCppTypeTraits<src>::T TSrc; typedef typename TCppTypeTraits<dst>::T TDst; - + static inline TDst Cast(TSrc value) { return value; } }; - + template <ECppType src, ECppType dst> // compatible == false struct TCompatCastBase<src, dst, false> { static const bool IsCompatible = false; - + typedef typename TCppTypeTraits<src>::T TSrc; typedef typename TCppTypeTraits<dst>::T TDst; - + static inline TDst Cast(TSrc) { ythrow TBadCastException() << "Incompatible FieldDescriptor::CppType conversion: #" << (size_t)src << " to #" << (size_t)dst; } }; - + template <ECppType src, ECppType dst, bool isEnumToNum> // enum -> numeric struct TCompatCastImpl { static const bool IsCompatible = true; - + typedef typename TCppTypeTraits<dst>::T TDst; - + static inline TDst Cast(const EnumValueDescriptor* value) { Y_ASSERT(value != nullptr); return value->number(); } }; - + template <ECppType src, ECppType dst> struct TCompatCastImpl<src, dst, false>: public TCompatCastBase<src, dst, TIsCompatibleCppType<src, dst>::Result> { using TCompatCastBase<src, dst, TIsCompatibleCppType<src, dst>::Result>::IsCompatible; }; - + template <ECppType src, ECppType dst> struct TCompatCast: public TCompatCastImpl<src, dst, TIsEnumToNumericCppType<src, dst>::Result> { typedef TCompatCastImpl<src, dst, TIsEnumToNumericCppType<src, dst>::Result> TBase; - + typedef typename TCppTypeTraits<src>::T TSrc; typedef typename TCppTypeTraits<dst>::T TDst; - + using TBase::Cast; using TBase::IsCompatible; - + inline bool Try(TSrc value, TDst& res) { if (IsCompatible) { res = Cast(value); @@ -88,69 +88,69 @@ namespace NProtoBuf { return false; } }; - + } - template <ECppType src, ECppType dst> + template <ECppType src, ECppType dst> inline typename TCppTypeTraits<dst>::T CompatCast(typename TCppTypeTraits<src>::T value) { return NCast::TCompatCast<src, dst>::Cast(value); } - - template <ECppType src, ECppType dst> + + template <ECppType src, ECppType dst> inline bool TryCompatCast(typename TCppTypeTraits<src>::T value, typename TCppTypeTraits<dst>::T& res) { return NCast::TCompatCast<src, dst>::Try(value, res); } - + // Message static/dynamic checked casts - + template <typename TpMessage> inline const TpMessage* TryCast(const Message* msg) { if (!msg || TpMessage::descriptor() != msg->GetDescriptor()) return NULL; return CheckedCast<const TpMessage*>(msg); } - + template <typename TpMessage> inline const TpMessage* TryCast(const Message* msg, const TpMessage*& ret) { ret = TryCast<TpMessage>(msg); return ret; } - + template <typename TpMessage> inline TpMessage* TryCast(Message* msg) { if (!msg || TpMessage::descriptor() != msg->GetDescriptor()) return nullptr; return CheckedCast<TpMessage*>(msg); } - + template <typename TpMessage> inline TpMessage* TryCast(Message* msg, TpMessage*& ret) { ret = TryCast<TpMessage>(msg); return ret; } - + // specialize for Message itself - + template <> inline const Message* TryCast<Message>(const Message* msg) { return msg; } - + template <> inline Message* TryCast<Message>(Message* msg) { return msg; } - + // Binary serialization compatible conversion inline bool TryBinaryCast(const Message* from, Message* to, TString* buffer = nullptr) { TString tmpbuf; if (!buffer) buffer = &tmpbuf; - + if (!from->SerializeToString(buffer)) return false; - + return to->ParseFromString(*buffer); } - -} + +} diff --git a/library/cpp/protobuf/util/is_equal.cpp b/library/cpp/protobuf/util/is_equal.cpp index 227408006e..f191e8bfad 100644 --- a/library/cpp/protobuf/util/is_equal.cpp +++ b/library/cpp/protobuf/util/is_equal.cpp @@ -1,16 +1,16 @@ -#include "is_equal.h" -#include "traits.h" - +#include "is_equal.h" +#include "traits.h" + #include <google/protobuf/descriptor.h> - -#include <util/generic/yexception.h> + +#include <util/generic/yexception.h> #include <util/string/cast.h> #include <util/string/vector.h> - -namespace NProtoBuf { + +namespace NProtoBuf { template <bool useDefault> static bool IsEqualImpl(const Message& m1, const Message& m2, TVector<TString>* differentPath); - + namespace { template <FieldDescriptor::CppType CppType, bool useDefault> struct TCompareValue { @@ -26,12 +26,12 @@ namespace NProtoBuf { return NProtoBuf::IsEqualImpl<useDefault>(*value1, *value2, differentPath); } }; - + template <FieldDescriptor::CppType CppType, bool useDefault> class TCompareField { typedef TCppTypeTraits<CppType> TTraits; typedef TCompareValue<CppType, useDefault> TCompare; - + public: static inline bool IsEqual(const Message& m1, const Message& m2, const FieldDescriptor& field, TVector<TString>* differentPath) { if (field.is_repeated()) @@ -39,12 +39,12 @@ namespace NProtoBuf { else return IsEqualSingle(m1, m2, &field, differentPath); } - + private: static bool IsEqualSingle(const Message& m1, const Message& m2, const FieldDescriptor* field, TVector<TString>* differentPath) { bool has1 = m1.GetReflection()->HasField(m1, field); bool has2 = m2.GetReflection()->HasField(m2, field); - + if (has1 != has2) { if (!useDefault || field->is_required()) { return false; @@ -60,7 +60,7 @@ namespace NProtoBuf { static bool IsEqualRepeated(const Message& m1, const Message& m2, const FieldDescriptor* field, TVector<TString>* differentPath) { int fieldSize = m1.GetReflection()->FieldSize(m1, field); if (fieldSize != m2.GetReflection()->FieldSize(m2, field)) - return false; + return false; for (int i = 0; i < fieldSize; ++i) if (!IsEqualRepeatedValue(m1, m2, field, i, differentPath)) { if (!!differentPath) { @@ -68,16 +68,16 @@ namespace NProtoBuf { } return false; } - return true; + return true; } - + static inline bool IsEqualRepeatedValue(const Message& m1, const Message& m2, const FieldDescriptor* field, int index, TVector<TString>* differentPath) { return TCompare::IsEqual(TTraits::GetRepeated(m1, field, index), TTraits::GetRepeated(m2, field, index), differentPath); } }; - + template <bool useDefault> bool IsEqualField(const Message& m1, const Message& m2, const FieldDescriptor& field, TVector<TString>* differentPath) { #define CASE_CPPTYPE(cpptype) \ @@ -88,7 +88,7 @@ namespace NProtoBuf { } \ return r; \ } - + switch (field.cpp_type()) { CASE_CPPTYPE(INT32) CASE_CPPTYPE(INT64) @@ -105,10 +105,10 @@ namespace NProtoBuf { } #undef CASE_CPPTYPE - } + } } - - template <bool useDefault> + + template <bool useDefault> bool IsEqualImpl(const Message& m1, const Message& m2, TVector<TString>* differentPath) { const Descriptor* descr = m1.GetDescriptor(); if (descr != m2.GetDescriptor()) { @@ -120,7 +120,7 @@ namespace NProtoBuf { } return true; } - + bool IsEqual(const Message& m1, const Message& m2) { return IsEqualImpl<false>(m1, m2, nullptr); } @@ -131,14 +131,14 @@ namespace NProtoBuf { bool r = IsEqualImpl<false>(m1, m2, differentPathVectorPtr); if (!r && differentPath) { *differentPath = JoinStrings(differentPathVector.rbegin(), differentPathVector.rend(), "/"); - } + } return r; } - + bool IsEqualDefault(const Message& m1, const Message& m2) { return IsEqualImpl<true>(m1, m2, nullptr); - } - + } + template <bool useDefault> static bool IsEqualFieldImpl( const Message& m1, @@ -147,11 +147,11 @@ namespace NProtoBuf { TVector<TString>* differentPath) { const Descriptor* descr = m1.GetDescriptor(); if (descr != m2.GetDescriptor()) { - return false; + return false; } return IsEqualField<useDefault>(m1, m2, field, differentPath); } - + bool IsEqualField(const Message& m1, const Message& m2, const FieldDescriptor& field) { return IsEqualFieldImpl<false>(m1, m2, field, nullptr); } diff --git a/library/cpp/protobuf/util/is_equal.h b/library/cpp/protobuf/util/is_equal.h index 13c0aae63d..35515639d0 100644 --- a/library/cpp/protobuf/util/is_equal.h +++ b/library/cpp/protobuf/util/is_equal.h @@ -1,7 +1,7 @@ -#pragma once - +#pragma once + #include <util/generic/fwd.h> - + namespace google { namespace protobuf { class Message; @@ -9,11 +9,11 @@ namespace google { } } -namespace NProtoBuf { +namespace NProtoBuf { using ::google::protobuf::FieldDescriptor; using ::google::protobuf::Message; } - + namespace NProtoBuf { // Reflection-based equality check for arbitrary protobuf messages @@ -21,7 +21,7 @@ namespace NProtoBuf { // a field with explicitly set default value. bool IsEqual(const Message& m1, const Message& m2); bool IsEqual(const Message& m1, const Message& m2, TString* differentPath); - + bool IsEqualField(const Message& m1, const Message& m2, const FieldDescriptor& field); // Non-strict version: optional field without explicit value is compared @@ -29,5 +29,5 @@ namespace NProtoBuf { bool IsEqualDefault(const Message& m1, const Message& m2); bool IsEqualFieldDefault(const Message& m1, const Message& m2, const FieldDescriptor& field); - -} + +} diff --git a/library/cpp/protobuf/util/is_equal_ut.cpp b/library/cpp/protobuf/util/is_equal_ut.cpp index 3ca4c90dd5..b10be7bb18 100644 --- a/library/cpp/protobuf/util/is_equal_ut.cpp +++ b/library/cpp/protobuf/util/is_equal_ut.cpp @@ -49,7 +49,7 @@ Y_UNIT_TEST_SUITE(ProtobufIsEqual) { bool equalField = NProtoBuf::IsEqualField(a, b, *InnerDescr); UNIT_ASSERT(!equalField); } - + Y_UNIT_TEST(IsEqual3) { TSampleForIsEqual a; TSampleForIsEqual b; @@ -75,14 +75,14 @@ Y_UNIT_TEST_SUITE(ProtobufIsEqual) { } Y_UNIT_TEST(IsEqualDefault) { - TSampleForIsEqual a; - TSampleForIsEqual b; - - a.SetName(""); - UNIT_ASSERT(NProtoBuf::IsEqualDefault(a, b)); - UNIT_ASSERT(!NProtoBuf::IsEqual(a, b)); + TSampleForIsEqual a; + TSampleForIsEqual b; + + a.SetName(""); + UNIT_ASSERT(NProtoBuf::IsEqualDefault(a, b)); + UNIT_ASSERT(!NProtoBuf::IsEqual(a, b)); UNIT_ASSERT(!NProtoBuf::IsEqualField(a, b, *NameDescr)); UNIT_ASSERT(NProtoBuf::IsEqualFieldDefault(a, b, *NameDescr)); - } + } } diff --git a/library/cpp/protobuf/util/merge.cpp b/library/cpp/protobuf/util/merge.cpp index dc2b9cc806..4af4431d46 100644 --- a/library/cpp/protobuf/util/merge.cpp +++ b/library/cpp/protobuf/util/merge.cpp @@ -1,30 +1,30 @@ -#include "merge.h" +#include "merge.h" #include "simple_reflection.h" #include <google/protobuf/message.h> #include <library/cpp/protobuf/util/proto/merge.pb.h> - -namespace NProtoBuf { + +namespace NProtoBuf { void RewriteMerge(const Message& src, Message& dst) { const Descriptor* d = src.GetDescriptor(); Y_ASSERT(d == dst.GetDescriptor()); - + for (int i = 0; i < d->field_count(); ++i) { if (TConstField(src, d->field(i)).Has()) TMutableField(dst, d->field(i)).Clear(); } - + dst.MergeFrom(src); - } - + } + static void ClearNonMergeable(const Message& src, Message& dst) { const Descriptor* d = src.GetDescriptor(); if (d->options().GetExtension(DontMerge)) { dst.Clear(); return; } - + for (int i = 0; i < d->field_count(); ++i) { const FieldDescriptor* fd = d->field(i); TConstField srcField(src, fd); @@ -36,11 +36,11 @@ namespace NProtoBuf { ClearNonMergeable(*srcField.Get<const Message*>(), *dstField.MutableMessage()); } } - } - + } + void CustomMerge(const Message& src, Message& dst) { ClearNonMergeable(src, dst); dst.MergeFrom(src); - } - -} + } + +} diff --git a/library/cpp/protobuf/util/merge.h b/library/cpp/protobuf/util/merge.h index 924975f141..847b65dd0e 100644 --- a/library/cpp/protobuf/util/merge.h +++ b/library/cpp/protobuf/util/merge.h @@ -1,22 +1,22 @@ -#pragma once - +#pragma once + namespace google { namespace protobuf { class Message; } } - + namespace NProtoBuf { using Message = ::google::protobuf::Message; } - -namespace NProtoBuf { + +namespace NProtoBuf { // Similiar to Message::MergeFrom, overwrites existing repeated fields // and embedded messages completely instead of recursive merging. void RewriteMerge(const Message& src, Message& dst); - + // Does standard MergeFrom() by default, except messages/fields marked with DontMerge or DontMergeField option. // Such fields are merged using RewriteMerge() (i.e. destination is cleared before merging anything from source) void CustomMerge(const Message& src, Message& dst); - + } diff --git a/library/cpp/protobuf/util/merge_ut.cpp b/library/cpp/protobuf/util/merge_ut.cpp index 22217db183..44f4db69b7 100644 --- a/library/cpp/protobuf/util/merge_ut.cpp +++ b/library/cpp/protobuf/util/merge_ut.cpp @@ -1,83 +1,83 @@ -#include "merge.h" +#include "merge.h" #include <library/cpp/protobuf/util/ut/common_ut.pb.h> - + #include <library/cpp/testing/unittest/registar.h> - -using namespace NProtoBuf; - + +using namespace NProtoBuf; + Y_UNIT_TEST_SUITE(ProtobufMerge) { static void InitProto(NProtobufUtilUt::TMergeTest & p, bool isSrc) { - size_t start = isSrc ? 0 : 100; - - p.AddMergeInt(start + 1); - p.AddMergeInt(start + 2); - - p.AddNoMergeInt(start + 3); - p.AddNoMergeInt(start + 4); - - NProtobufUtilUt::TMergeTestMerge* m = p.MutableMergeSub(); - m->SetA(start + 5); - m->AddB(start + 6); - m->AddB(start + 7); - m->AddC(start + 14); - - if (!isSrc) { - // only for dst - NProtobufUtilUt::TMergeTestMerge* mm1 = p.AddNoMergeRepSub(); - mm1->SetA(start + 8); - mm1->AddB(start + 9); - mm1->AddB(start + 10); - } - - NProtobufUtilUt::TMergeTestNoMerge* mm3 = p.MutableNoMergeOptSub(); - mm3->SetA(start + 11); - mm3->AddB(start + 12); - mm3->AddB(start + 13); - } - + size_t start = isSrc ? 0 : 100; + + p.AddMergeInt(start + 1); + p.AddMergeInt(start + 2); + + p.AddNoMergeInt(start + 3); + p.AddNoMergeInt(start + 4); + + NProtobufUtilUt::TMergeTestMerge* m = p.MutableMergeSub(); + m->SetA(start + 5); + m->AddB(start + 6); + m->AddB(start + 7); + m->AddC(start + 14); + + if (!isSrc) { + // only for dst + NProtobufUtilUt::TMergeTestMerge* mm1 = p.AddNoMergeRepSub(); + mm1->SetA(start + 8); + mm1->AddB(start + 9); + mm1->AddB(start + 10); + } + + NProtobufUtilUt::TMergeTestNoMerge* mm3 = p.MutableNoMergeOptSub(); + mm3->SetA(start + 11); + mm3->AddB(start + 12); + mm3->AddB(start + 13); + } + Y_UNIT_TEST(CustomMerge) { - NProtobufUtilUt::TMergeTest src, dst; - InitProto(src, true); - InitProto(dst, false); - + NProtobufUtilUt::TMergeTest src, dst; + InitProto(src, true); + InitProto(dst, false); + // Cerr << "\nsrc: " << src.ShortDebugString() << Endl; // Cerr << "dst: " << dst.ShortDebugString() << Endl; - NProtoBuf::CustomMerge(src, dst); + NProtoBuf::CustomMerge(src, dst); // Cerr << "dst2:" << dst.ShortDebugString() << Endl; - - // repeated uint32 MergeInt = 1; - UNIT_ASSERT_EQUAL(dst.MergeIntSize(), 4); - UNIT_ASSERT_EQUAL(dst.GetMergeInt(0), 101); - UNIT_ASSERT_EQUAL(dst.GetMergeInt(1), 102); - UNIT_ASSERT_EQUAL(dst.GetMergeInt(2), 1); - UNIT_ASSERT_EQUAL(dst.GetMergeInt(3), 2); - - // repeated uint32 NoMergeInt = 2 [(DontMergeField)=true]; - UNIT_ASSERT_EQUAL(dst.NoMergeIntSize(), 2); - UNIT_ASSERT_EQUAL(dst.GetNoMergeInt(0), 3); - UNIT_ASSERT_EQUAL(dst.GetNoMergeInt(1), 4); - - // optional TMergeTestMerge MergeSub = 3; - UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetA(), 5); - UNIT_ASSERT_EQUAL(dst.GetMergeSub().BSize(), 4); - UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetB(0), 106); - UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetB(1), 107); - UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetB(2), 6); - UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetB(3), 7); - UNIT_ASSERT_EQUAL(dst.GetMergeSub().CSize(), 1); - UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetC(0), 14); - - // repeated TMergeTestMerge NoMergeRepSub = 4 [(DontMergeField)=true]; - UNIT_ASSERT_EQUAL(dst.NoMergeRepSubSize(), 1); - UNIT_ASSERT_EQUAL(dst.GetNoMergeRepSub(0).GetA(), 108); - UNIT_ASSERT_EQUAL(dst.GetNoMergeRepSub(0).BSize(), 2); - UNIT_ASSERT_EQUAL(dst.GetNoMergeRepSub(0).GetB(0), 109); - UNIT_ASSERT_EQUAL(dst.GetNoMergeRepSub(0).GetB(1), 110); - - // optional TMergeTestNoMerge NoMergeOptSub = 5; - UNIT_ASSERT_EQUAL(dst.GetNoMergeOptSub().GetA(), 11); - UNIT_ASSERT_EQUAL(dst.GetNoMergeOptSub().BSize(), 2); - UNIT_ASSERT_EQUAL(dst.GetNoMergeOptSub().GetB(0), 12); - UNIT_ASSERT_EQUAL(dst.GetNoMergeOptSub().GetB(1), 13); - } -} + + // repeated uint32 MergeInt = 1; + UNIT_ASSERT_EQUAL(dst.MergeIntSize(), 4); + UNIT_ASSERT_EQUAL(dst.GetMergeInt(0), 101); + UNIT_ASSERT_EQUAL(dst.GetMergeInt(1), 102); + UNIT_ASSERT_EQUAL(dst.GetMergeInt(2), 1); + UNIT_ASSERT_EQUAL(dst.GetMergeInt(3), 2); + + // repeated uint32 NoMergeInt = 2 [(DontMergeField)=true]; + UNIT_ASSERT_EQUAL(dst.NoMergeIntSize(), 2); + UNIT_ASSERT_EQUAL(dst.GetNoMergeInt(0), 3); + UNIT_ASSERT_EQUAL(dst.GetNoMergeInt(1), 4); + + // optional TMergeTestMerge MergeSub = 3; + UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetA(), 5); + UNIT_ASSERT_EQUAL(dst.GetMergeSub().BSize(), 4); + UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetB(0), 106); + UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetB(1), 107); + UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetB(2), 6); + UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetB(3), 7); + UNIT_ASSERT_EQUAL(dst.GetMergeSub().CSize(), 1); + UNIT_ASSERT_EQUAL(dst.GetMergeSub().GetC(0), 14); + + // repeated TMergeTestMerge NoMergeRepSub = 4 [(DontMergeField)=true]; + UNIT_ASSERT_EQUAL(dst.NoMergeRepSubSize(), 1); + UNIT_ASSERT_EQUAL(dst.GetNoMergeRepSub(0).GetA(), 108); + UNIT_ASSERT_EQUAL(dst.GetNoMergeRepSub(0).BSize(), 2); + UNIT_ASSERT_EQUAL(dst.GetNoMergeRepSub(0).GetB(0), 109); + UNIT_ASSERT_EQUAL(dst.GetNoMergeRepSub(0).GetB(1), 110); + + // optional TMergeTestNoMerge NoMergeOptSub = 5; + UNIT_ASSERT_EQUAL(dst.GetNoMergeOptSub().GetA(), 11); + UNIT_ASSERT_EQUAL(dst.GetNoMergeOptSub().BSize(), 2); + UNIT_ASSERT_EQUAL(dst.GetNoMergeOptSub().GetB(0), 12); + UNIT_ASSERT_EQUAL(dst.GetNoMergeOptSub().GetB(1), 13); + } +} diff --git a/library/cpp/protobuf/util/proto/merge.proto b/library/cpp/protobuf/util/proto/merge.proto index a937041c07..1adfa8db1e 100644 --- a/library/cpp/protobuf/util/proto/merge.proto +++ b/library/cpp/protobuf/util/proto/merge.proto @@ -1,11 +1,11 @@ import "google/protobuf/descriptor.proto"; - -// These meta-options are used for selecting proper merging method, see merge.h - -extend google.protobuf.MessageOptions { - optional bool DontMerge = 54287; -} - -extend google.protobuf.FieldOptions { - optional bool DontMergeField = 54288; -} + +// These meta-options are used for selecting proper merging method, see merge.h + +extend google.protobuf.MessageOptions { + optional bool DontMerge = 54287; +} + +extend google.protobuf.FieldOptions { + optional bool DontMergeField = 54288; +} diff --git a/library/cpp/protobuf/util/repeated_field_utils.h b/library/cpp/protobuf/util/repeated_field_utils.h index c07bd84647..8971b8f3d2 100644 --- a/library/cpp/protobuf/util/repeated_field_utils.h +++ b/library/cpp/protobuf/util/repeated_field_utils.h @@ -1,7 +1,7 @@ #pragma once #include <google/protobuf/repeated_field.h> -#include <util/generic/vector.h> +#include <util/generic/vector.h> template <typename T> void RemoveRepeatedPtrFieldElement(google::protobuf::RepeatedPtrField<T>* repeated, unsigned index) { @@ -15,8 +15,8 @@ void RemoveRepeatedPtrFieldElement(google::protobuf::RepeatedPtrField<T>* repeat } r.Swap(repeated); } - -namespace NProtoBuf { + +namespace NProtoBuf { /// Move item to specified position template <typename TRepeated> static void MoveRepeatedFieldItem(TRepeated* field, size_t indexFrom, size_t indexTo) { @@ -32,7 +32,7 @@ namespace NProtoBuf { field->SwapElements(i, i + 1); } } - + template <typename T> static T* InsertRepeatedFieldItem(NProtoBuf::RepeatedPtrField<T>* field, size_t index) { T* ret = field->Add(); @@ -44,13 +44,13 @@ namespace NProtoBuf { static void RemoveRepeatedFieldItem(TRepeated* field, size_t index) { if ((int)index >= field->size()) return; - + for (int i = index + 1; i < field->size(); ++i) field->SwapElements(i - 1, i); - + field->RemoveLast(); } - + template <typename TRepeated, typename TPred> // suitable both for RepeatedField and RepeatedPtrField static void RemoveRepeatedFieldItemIf(TRepeated* repeated, TPred p) { auto last = std::remove_if(repeated->begin(), repeated->end(), p); @@ -60,7 +60,7 @@ namespace NProtoBuf { repeated->RemoveLast(); } } - + namespace NImpl { template <typename TRepeated> static void ShiftLeft(TRepeated* field, int begIndex, int endIndex, size_t shiftSize) { @@ -73,24 +73,24 @@ namespace NProtoBuf { } // Remove several items at once, could be more efficient compared to calling RemoveRepeatedFieldItem several times - template <typename TRepeated> + template <typename TRepeated> static void RemoveRepeatedFieldItems(TRepeated* field, const TVector<size_t>& sortedIndices) { if (sortedIndices.empty()) return; - + size_t shift = 1; for (size_t i = 1; i < sortedIndices.size(); ++i, ++shift) NImpl::ShiftLeft(field, sortedIndices[i - 1] + 1, sortedIndices[i], shift); NImpl::ShiftLeft(field, sortedIndices.back() + 1, field->size(), shift); - + for (; shift > 0; --shift) field->RemoveLast(); } - + template <typename TRepeated> static void ReverseRepeatedFieldItems(TRepeated* field) { for (int i1 = 0, i2 = field->size() - 1; i1 < i2; ++i1, --i2) field->SwapElements(i1, i2); } - -} + +} diff --git a/library/cpp/protobuf/util/simple_reflection.h b/library/cpp/protobuf/util/simple_reflection.h index 61e877a787..baee5e4a53 100644 --- a/library/cpp/protobuf/util/simple_reflection.h +++ b/library/cpp/protobuf/util/simple_reflection.h @@ -1,18 +1,18 @@ -#pragma once - +#pragma once + #include "cast.h" #include "path.h" -#include "traits.h" - +#include "traits.h" + #include <google/protobuf/descriptor.h> #include <google/protobuf/message.h> - + #include <util/generic/maybe.h> #include <util/generic/typetraits.h> #include <util/generic/vector.h> #include <util/system/defaults.h> -namespace NProtoBuf { +namespace NProtoBuf { class TConstField { public: TConstField(const Message& msg, const FieldDescriptor* fd) @@ -21,11 +21,11 @@ namespace NProtoBuf { { Y_ASSERT(Fd && Fd->containing_type() == Msg.GetDescriptor()); } - + static TMaybe<TConstField> ByPath(const Message& msg, const TStringBuf& path); static TMaybe<TConstField> ByPath(const Message& msg, const TVector<const FieldDescriptor*>& fieldsPath); static TMaybe<TConstField> ByPath(const Message& msg, const TFieldPath& fieldsPath); - + const Message& Parent() const { return Msg; } @@ -33,50 +33,50 @@ namespace NProtoBuf { const FieldDescriptor* Field() const { return Fd; } - + bool HasValue() const { return IsRepeated() ? Refl().FieldSize(Msg, Fd) > 0 : Refl().HasField(Msg, Fd); } - + // deprecated, use HasValue() instead bool Has() const { return HasValue(); } - + size_t Size() const { return IsRepeated() ? Refl().FieldSize(Msg, Fd) : (Refl().HasField(Msg, Fd) ? 1 : 0); } - + template <typename T> inline typename TSelectCppType<T>::T Get(size_t index = 0) const; - + template <typename TMsg> inline const TMsg* GetAs(size_t index = 0) const { // casting version of Get return IsMessageInstance<TMsg>() ? CheckedCast<const TMsg*>(Get<const Message*>(index)) : nullptr; } - + template <typename T> bool IsInstance() const { return CppType() == TSelectCppType<T>::Result; } - + template <typename TMsg> bool IsMessageInstance() const { return IsMessage() && Fd->message_type() == TMsg::descriptor(); } - + template <typename TMsg> bool IsInstance(std::enable_if_t<std::is_base_of<Message, TMsg>::value && !std::is_same<Message, TMsg>::value, void>* = NULL) const { // template will be selected when specifying Message children types return IsMessage() && Fd->message_type() == TMsg::descriptor(); } - + bool IsString() const { return CppType() == FieldDescriptor::CPPTYPE_STRING; } - + bool IsMessage() const { return CppType() == FieldDescriptor::CPPTYPE_MESSAGE; } @@ -95,11 +95,11 @@ namespace NProtoBuf { bool IsRepeated() const { return Fd->is_repeated(); } - + FieldDescriptor::CppType CppType() const { return Fd->cpp_type(); } - + const Reflection& Refl() const { return *Msg.GetReflection(); } @@ -107,23 +107,23 @@ namespace NProtoBuf { [[noreturn]] void RaiseUnknown() const { ythrow yexception() << "Unknown field cpp-type: " << (size_t)CppType(); } - + bool IsSameField(const TConstField& other) const { return &Parent() == &other.Parent() && Field() == other.Field(); } - + protected: const Message& Msg; const FieldDescriptor* Fd; }; - + class TMutableField: public TConstField { public: TMutableField(Message& msg, const FieldDescriptor* fd) : TConstField(msg, fd) { } - + static TMaybe<TMutableField> ByPath(Message& msg, const TStringBuf& path, bool createPath = false); static TMaybe<TMutableField> ByPath(Message& msg, const TVector<const FieldDescriptor*>& fieldsPath, bool createPath = false); static TMaybe<TMutableField> ByPath(Message& msg, const TFieldPath& fieldsPath, bool createPath = false); @@ -131,26 +131,26 @@ namespace NProtoBuf { Message* MutableParent() { return Mut(); } - + template <typename T> inline void Set(T value, size_t index = 0); - + template <typename T> inline void Add(T value); inline void MergeFrom(const TConstField& src); - + inline void Clear() { Refl().ClearField(Mut(), Fd); } /* - void Swap(TMutableField& f) { + void Swap(TMutableField& f) { Y_ASSERT(Field() == f.Field()); - - // not implemented yet, TODO: implement when Reflection::Mutable(Ptr)RepeatedField - // is ported into arcadia protobuf library from up-stream. - } -*/ + + // not implemented yet, TODO: implement when Reflection::Mutable(Ptr)RepeatedField + // is ported into arcadia protobuf library from up-stream. + } +*/ inline void RemoveLast() { Y_ASSERT(HasValue()); if (IsRepeated()) @@ -158,7 +158,7 @@ namespace NProtoBuf { else Clear(); } - + inline void SwapElements(size_t index1, size_t index2) { Y_ASSERT(IsRepeated()); Y_ASSERT(index1 < Size()); @@ -188,7 +188,7 @@ namespace NProtoBuf { return Refl().MutableMessage(Mut(), Fd); } } - + template <typename TMsg> inline TMsg* AddMessage() { return CheckedCast<TMsg*>(AddMessage()); @@ -207,9 +207,9 @@ namespace NProtoBuf { template <typename T> inline void MergeValue(T srcValue); }; - + // template implementations - + template <typename T> inline typename TSelectCppType<T>::T TConstField::Get(size_t index) const { Y_ASSERT(index < Size() || !Fd->is_repeated() && index == 0); // Get for single fields is always allowed because of default values @@ -222,8 +222,8 @@ namespace NProtoBuf { RaiseUnknown(); } #undef TMP_MACRO_FOR_CPPTYPE - } - + } + template <typename T> inline void TMutableField::Set(T value, size_t index) { Y_ASSERT(!IsRepeated() && index == 0 || index < Size()); @@ -237,8 +237,8 @@ namespace NProtoBuf { RaiseUnknown(); } #undef TMP_MACRO_FOR_CPPTYPE - } - + } + template <typename T> inline void TMutableField::Add(T value) { #define TMP_MACRO_FOR_CPPTYPE(CPPTYPE) \ @@ -251,13 +251,13 @@ namespace NProtoBuf { RaiseUnknown(); } #undef TMP_MACRO_FOR_CPPTYPE - } - + } + template <typename T> inline void TMutableField::MergeValue(T srcValue) { Add(srcValue); } - + template <> inline void TMutableField::MergeValue<const Message*>(const Message* srcValue) { if (IsRepeated()) { diff --git a/library/cpp/protobuf/util/simple_reflection_ut.cpp b/library/cpp/protobuf/util/simple_reflection_ut.cpp index 169d4703c9..347fd8d980 100644 --- a/library/cpp/protobuf/util/simple_reflection_ut.cpp +++ b/library/cpp/protobuf/util/simple_reflection_ut.cpp @@ -156,21 +156,21 @@ Y_UNIT_TEST_SUITE(ProtobufSimpleReflection) { { TMaybe<TConstField> field = TConstField::ByPath(msg, "OneStr"); UNIT_ASSERT(field); - UNIT_ASSERT(field->HasValue()); + UNIT_ASSERT(field->HasValue()); UNIT_ASSERT_VALUES_EQUAL("1", (field->Get<TString>())); } { TMaybe<TConstField> field = TConstField::ByPath(msg, "OneMsg"); UNIT_ASSERT(field); - UNIT_ASSERT(field->HasValue()); + UNIT_ASSERT(field->HasValue()); UNIT_ASSERT(field->IsMessageInstance<TInnerSample>()); } { TMaybe<TConstField> field = TConstField::ByPath(msg, "/OneMsg/RepInt"); UNIT_ASSERT(field); - UNIT_ASSERT(field->HasValue()); + UNIT_ASSERT(field->HasValue()); UNIT_ASSERT_VALUES_EQUAL(2, field->Size()); UNIT_ASSERT_VALUES_EQUAL(2, field->Get<int>(0)); UNIT_ASSERT_VALUES_EQUAL(3, field->Get<int>(1)); @@ -179,7 +179,7 @@ Y_UNIT_TEST_SUITE(ProtobufSimpleReflection) { { TMaybe<TConstField> field = TConstField::ByPath(msg, "RepMsg/RepInt"); UNIT_ASSERT(field); - UNIT_ASSERT(field->HasValue()); + UNIT_ASSERT(field->HasValue()); UNIT_ASSERT_VALUES_EQUAL(2, field->Size()); UNIT_ASSERT_VALUES_EQUAL(4, field->Get<int>(0)); UNIT_ASSERT_VALUES_EQUAL(5, field->Get<int>(1)); @@ -194,13 +194,13 @@ Y_UNIT_TEST_SUITE(ProtobufSimpleReflection) { { TMaybe<TConstField> field = TConstField::ByPath(msg, "OneStr"); UNIT_ASSERT(field); - UNIT_ASSERT(!field->HasValue()); + UNIT_ASSERT(!field->HasValue()); } { TMaybe<TConstField> field = TConstField::ByPath(msg, "OneMsg/RepInt"); UNIT_ASSERT(field); - UNIT_ASSERT(!field->HasValue()); + UNIT_ASSERT(!field->HasValue()); } { @@ -217,25 +217,25 @@ Y_UNIT_TEST_SUITE(ProtobufSimpleReflection) { { TMaybe<TMutableField> field = TMutableField::ByPath(msg, "OneStr"); UNIT_ASSERT(field); - UNIT_ASSERT(!field->HasValue()); + UNIT_ASSERT(!field->HasValue()); field->Set(TString("zz")); - UNIT_ASSERT(field->HasValue()); + UNIT_ASSERT(field->HasValue()); UNIT_ASSERT_VALUES_EQUAL("zz", msg.GetOneStr()); } { TMaybe<TMutableField> field = TMutableField::ByPath(msg, "OneStr"); UNIT_ASSERT(field); - UNIT_ASSERT(field->HasValue()); + UNIT_ASSERT(field->HasValue()); field->Set(TString("dd")); - UNIT_ASSERT(field->HasValue()); + UNIT_ASSERT(field->HasValue()); UNIT_ASSERT_VALUES_EQUAL("dd", msg.GetOneStr()); } { TMaybe<TMutableField> field = TMutableField::ByPath(msg, "OneMsg/RepInt"); UNIT_ASSERT(field); - UNIT_ASSERT(!field->HasValue()); + UNIT_ASSERT(!field->HasValue()); field->Add(10); UNIT_ASSERT_VALUES_EQUAL(10, msg.GetOneMsg().GetRepInt(0)); } @@ -253,13 +253,13 @@ Y_UNIT_TEST_SUITE(ProtobufSimpleReflection) { { TMaybe<TMutableField> field = TMutableField::ByPath(msg, "OneStr", true); UNIT_ASSERT(field); - UNIT_ASSERT(!field->HasValue()); + UNIT_ASSERT(!field->HasValue()); } { TMaybe<TMutableField> field = TMutableField::ByPath(msg, "OneMsg/RepInt", true); UNIT_ASSERT(field); - UNIT_ASSERT(!field->HasValue()); + UNIT_ASSERT(!field->HasValue()); UNIT_ASSERT(msg.HasOneMsg()); field->Add(10); UNIT_ASSERT_VALUES_EQUAL(10, msg.GetOneMsg().GetRepInt(0)); @@ -270,7 +270,7 @@ Y_UNIT_TEST_SUITE(ProtobufSimpleReflection) { TMaybe<TMutableField> fieldCopy = TMutableField::ByPath(msg, "RepMsg/RepInt", true); Y_UNUSED(fieldCopy); UNIT_ASSERT(field); - UNIT_ASSERT(!field->HasValue()); + UNIT_ASSERT(!field->HasValue()); UNIT_ASSERT_VALUES_EQUAL(1, msg.RepMsgSize()); field->Add(12); UNIT_ASSERT_VALUES_EQUAL(12, field->Get<int>()); diff --git a/library/cpp/protobuf/util/sort.h b/library/cpp/protobuf/util/sort.h index 985ba6f689..bd851b4e5a 100644 --- a/library/cpp/protobuf/util/sort.h +++ b/library/cpp/protobuf/util/sort.h @@ -1,15 +1,15 @@ -#pragma once - +#pragma once + #include <google/protobuf/message.h> - -#include <util/generic/vector.h> -#include <util/generic/algorithm.h> - -namespace NProtoBuf { + +#include <util/generic/vector.h> +#include <util/generic/algorithm.h> + +namespace NProtoBuf { // TComparePtr is something like: // typedef bool (*TComparePtr)(const Message* msg1, const Message* msg2); // typedef bool (*TComparePtr)(const TProto* msg1, const TProto* msg2); - + template <typename TProto, typename TComparePtr> void SortMessages(RepeatedPtrField<TProto>& msgs, TComparePtr cmp) { TVector<TProto*> ptrs; @@ -17,12 +17,12 @@ namespace NProtoBuf { while (msgs.size()) { ptrs.push_back(msgs.ReleaseLast()); } - + ::StableSort(ptrs.begin(), ptrs.end(), cmp); for (size_t i = 0; i < ptrs.size(); ++i) { msgs.AddAllocated(ptrs[i]); } - } - -} + } + +} diff --git a/library/cpp/protobuf/util/traits.h b/library/cpp/protobuf/util/traits.h index 50f036d0ea..4be520f22a 100644 --- a/library/cpp/protobuf/util/traits.h +++ b/library/cpp/protobuf/util/traits.h @@ -1,16 +1,16 @@ -#pragma once - -#include <util/generic/typetraits.h> - +#pragma once + +#include <util/generic/typetraits.h> + #include <google/protobuf/descriptor.h> #include <google/protobuf/message.h> - -namespace NProtoBuf { -// this nasty windows.h macro interfers with protobuf::Reflection::GetMessage() -#if defined(GetMessage) -#undef GetMessage -#endif - + +namespace NProtoBuf { +// this nasty windows.h macro interfers with protobuf::Reflection::GetMessage() +#if defined(GetMessage) +#undef GetMessage +#endif + struct TCppTypeTraitsBase { static inline bool Has(const Message& msg, const FieldDescriptor* field) { // non-repeated return msg.GetReflection()->HasField(msg, field); @@ -18,24 +18,24 @@ namespace NProtoBuf { static inline size_t Size(const Message& msg, const FieldDescriptor* field) { // repeated return msg.GetReflection()->FieldSize(msg, field); } - + static inline void Clear(Message& msg, const FieldDescriptor* field) { msg.GetReflection()->ClearField(&msg, field); } - + static inline void RemoveLast(Message& msg, const FieldDescriptor* field) { msg.GetReflection()->RemoveLast(&msg, field); } - + static inline void SwapElements(Message& msg, const FieldDescriptor* field, int index1, int index2) { msg.GetReflection()->SwapElements(&msg, field, index1, index2); } }; - + // default value accessor template <FieldDescriptor::CppType cpptype> struct TCppTypeTraitsDefault; - + #define DECLARE_CPPTYPE_DEFAULT(cpptype, method) \ template <> \ struct TCppTypeTraitsDefault<cpptype> { \ @@ -62,23 +62,23 @@ namespace NProtoBuf { template <FieldDescriptor::CppType cpptype> struct TCppTypeTraits : TCppTypeTraitsBase { static const FieldDescriptor::CppType CppType = cpptype; - + struct T {}; static T Get(const Message& msg, const FieldDescriptor* field); static T GetRepeated(const Message& msg, const FieldDescriptor* field, int index); static T GetDefault(const FieldDescriptor* field); - + static void Set(Message& msg, const FieldDescriptor* field, T value); static void AddRepeated(Message& msg, const FieldDescriptor* field, T value); static void SetRepeated(Message& msg, const FieldDescriptor* field, int index, T value); }; - + // any type T -> CppType template <typename T> struct TSelectCppType { //static const FieldDescriptor::CppType Result = FieldDescriptor::MAX_CPPTYPE; }; - + #define DECLARE_CPPTYPE_TRAITS(cpptype, type, method) \ template <> \ struct TCppTypeTraits<cpptype>: public TCppTypeTraitsBase { \ @@ -108,8 +108,8 @@ namespace NProtoBuf { struct TSelectCppType<type> { \ static const FieldDescriptor::CppType Result = cpptype; \ typedef type T; \ - }; - + }; + DECLARE_CPPTYPE_TRAITS(FieldDescriptor::CPPTYPE_INT32, i32, Int32); DECLARE_CPPTYPE_TRAITS(FieldDescriptor::CPPTYPE_INT64, i64, Int64); DECLARE_CPPTYPE_TRAITS(FieldDescriptor::CPPTYPE_UINT32, ui32, UInt32); @@ -120,15 +120,15 @@ namespace NProtoBuf { DECLARE_CPPTYPE_TRAITS(FieldDescriptor::CPPTYPE_ENUM, const EnumValueDescriptor*, Enum); DECLARE_CPPTYPE_TRAITS(FieldDescriptor::CPPTYPE_STRING, TString, String); //DECLARE_CPPTYPE_TRAITS(FieldDescriptor::CPPTYPE_MESSAGE, const Message&, Message); - -#undef DECLARE_CPPTYPE_TRAITS - + +#undef DECLARE_CPPTYPE_TRAITS + // specialization for message pointer template <> struct TCppTypeTraits<FieldDescriptor::CPPTYPE_MESSAGE>: public TCppTypeTraitsBase { typedef const Message* T; static const FieldDescriptor::CppType CppType = FieldDescriptor::CPPTYPE_MESSAGE; - + static inline T Get(const Message& msg, const FieldDescriptor* field) { return &(msg.GetReflection()->GetMessage(msg, field)); } @@ -151,29 +151,29 @@ namespace NProtoBuf { return ret; } }; - + template <> struct TSelectCppType<const Message*> { static const FieldDescriptor::CppType Result = FieldDescriptor::CPPTYPE_MESSAGE; typedef const Message* T; }; - + template <> struct TSelectCppType<Message> { static const FieldDescriptor::CppType Result = FieldDescriptor::CPPTYPE_MESSAGE; typedef const Message* T; }; - + template <FieldDescriptor::CppType CppType, bool Repeated> struct TFieldTraits { typedef TCppTypeTraits<CppType> TBaseTraits; typedef typename TBaseTraits::T T; - + static inline T Get(const Message& msg, const FieldDescriptor* field, size_t index = 0) { Y_ASSERT(index == 0); return TBaseTraits::Get(msg, field); } - + static inline T GetDefault(const FieldDescriptor* field) { return TBaseTraits::GetDefault(field); } @@ -181,11 +181,11 @@ namespace NProtoBuf { static inline bool Has(const Message& msg, const FieldDescriptor* field) { return TBaseTraits::Has(msg, field); } - + static inline size_t Size(const Message& msg, const FieldDescriptor* field) { return Has(msg, field); } - + static inline void Set(Message& msg, const FieldDescriptor* field, T value, size_t index = 0) { Y_ASSERT(index == 0); TBaseTraits::Set(msg, field, value); @@ -195,28 +195,28 @@ namespace NProtoBuf { TBaseTraits::Set(msg, field, value); } }; - + template <FieldDescriptor::CppType CppType> struct TFieldTraits<CppType, true> { typedef TCppTypeTraits<CppType> TBaseTraits; typedef typename TBaseTraits::T T; - + static inline T Get(const Message& msg, const FieldDescriptor* field, size_t index = 0) { return TBaseTraits::GetRepeated(msg, field, index); } - + static inline T GetDefault(const FieldDescriptor* field) { return TBaseTraits::GetDefault(field); } - + static inline size_t Size(const Message& msg, const FieldDescriptor* field) { return TBaseTraits::Size(msg, field); } - + static inline bool Has(const Message& msg, const FieldDescriptor* field) { return Size(msg, field) > 0; } - + static inline void Set(Message& msg, const FieldDescriptor* field, T value, size_t index = 0) { TBaseTraits::SetRepeated(msg, field, index, value); } @@ -225,28 +225,28 @@ namespace NProtoBuf { TBaseTraits::AddRepeated(msg, field, value); } }; - + // Simpler interface at the cost of checking is_repeated() on each call template <FieldDescriptor::CppType CppType> struct TSimpleFieldTraits { typedef TFieldTraits<CppType, true> TRepeated; typedef TFieldTraits<CppType, false> TSingle; typedef typename TRepeated::T T; - + static inline size_t Size(const Message& msg, const FieldDescriptor* field) { if (field->is_repeated()) return TRepeated::Size(msg, field); else return TSingle::Size(msg, field); } - + static inline bool Has(const Message& msg, const FieldDescriptor* field) { if (field->is_repeated()) return TRepeated::Has(msg, field); else return TSingle::Has(msg, field); } - + static inline T Get(const Message& msg, const FieldDescriptor* field, size_t index = 0) { Y_ASSERT(index < Size(msg, field) || !field->is_repeated() && index == 0); // Get for single fields is always allowed because of default values if (field->is_repeated()) @@ -254,11 +254,11 @@ namespace NProtoBuf { else return TSingle::Get(msg, field, index); } - + static inline T GetDefault(const FieldDescriptor* field) { return TSingle::GetDefault(field); } - + static inline void Set(Message& msg, const FieldDescriptor* field, T value, size_t index = 0) { Y_ASSERT(!field->is_repeated() && index == 0 || index < Size(msg, field)); if (field->is_repeated()) @@ -266,7 +266,7 @@ namespace NProtoBuf { else TSingle::Set(msg, field, value, index); } - + static inline void Add(Message& msg, const FieldDescriptor* field, T value) { if (field->is_repeated()) TRepeated::Add(msg, field, value); @@ -274,9 +274,9 @@ namespace NProtoBuf { TSingle::Add(msg, field, value); } }; - + // some cpp-type groups - + template <FieldDescriptor::CppType CppType> struct TIsIntegerCppType { enum { @@ -285,16 +285,16 @@ namespace NProtoBuf { CppType == FieldDescriptor::CPPTYPE_UINT32 || CppType == FieldDescriptor::CPPTYPE_UINT64 }; - }; - + }; + template <FieldDescriptor::CppType CppType> struct TIsFloatCppType { enum { Result = CppType == FieldDescriptor::CPPTYPE_FLOAT || CppType == FieldDescriptor::CPPTYPE_DOUBLE }; - }; - + }; + template <FieldDescriptor::CppType CppType> struct TIsNumericCppType { enum { @@ -302,19 +302,19 @@ namespace NProtoBuf { TIsIntegerCppType<CppType>::Result || TIsFloatCppType<CppType>::Result }; - }; - + }; + // a helper macro for splitting flow by cpp-type (e.g. in a switch) - + #define APPLY_TMP_MACRO_FOR_ALL_CPPTYPES() \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_INT32) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_INT64) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_UINT32) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_UINT64) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_DOUBLE) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_FLOAT) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_BOOL) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_ENUM) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_STRING) \ - TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_MESSAGE) + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_INT32) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_INT64) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_UINT32) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_UINT64) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_DOUBLE) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_FLOAT) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_BOOL) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_ENUM) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_STRING) \ + TMP_MACRO_FOR_CPPTYPE(NProtoBuf::FieldDescriptor::CPPTYPE_MESSAGE) } diff --git a/library/cpp/protobuf/util/ut/common_ut.proto b/library/cpp/protobuf/util/ut/common_ut.proto index 9cf803ffbf..871b1b9832 100644 --- a/library/cpp/protobuf/util/ut/common_ut.proto +++ b/library/cpp/protobuf/util/ut/common_ut.proto @@ -1,29 +1,29 @@ import "google/protobuf/descriptor.proto"; import "library/cpp/protobuf/util/proto/merge.proto"; - -package NProtobufUtilUt; - -extend google.protobuf.FieldOptions { - optional bool XXX = 53772; -} - -message TWalkTest { - optional uint32 OptInt = 1 [(XXX)=true]; - repeated uint32 RepInt = 2; - - optional string OptStr = 3; - repeated string RepStr = 4 [(XXX)=true]; - - optional TWalkTest OptSub = 5 [(XXX)=true]; - repeated TWalkTest RepSub = 6; -} - + +package NProtobufUtilUt; + +extend google.protobuf.FieldOptions { + optional bool XXX = 53772; +} + +message TWalkTest { + optional uint32 OptInt = 1 [(XXX)=true]; + repeated uint32 RepInt = 2; + + optional string OptStr = 3; + repeated string RepStr = 4 [(XXX)=true]; + + optional TWalkTest OptSub = 5 [(XXX)=true]; + repeated TWalkTest RepSub = 6; +} + message TWalkTestCyclic { optional TNested OptNested = 1; repeated uint64 OptInt64 = 2; optional TWalkTestCyclic OptSub = 3; optional TEnum OptEnum = 4; - + message TNested { optional uint32 OptInt32 = 1; optional TWalkTestCyclic OptSubNested = 2; @@ -37,27 +37,27 @@ message TWalkTestCyclic { } } -message TMergeTestNoMerge { - option (DontMerge) = true; - - optional uint32 A = 1; - repeated uint32 B = 2; -} - -message TMergeTestMerge { - optional uint32 A = 1; - repeated uint32 B = 2; - repeated uint32 C = 3 [(DontMergeField)=true]; -} - -message TMergeTest { - repeated uint32 MergeInt = 1; - repeated uint32 NoMergeInt = 2 [(DontMergeField)=true]; - - optional TMergeTestMerge MergeSub = 3; - repeated TMergeTestMerge NoMergeRepSub = 4 [(DontMergeField)=true]; - optional TMergeTestNoMerge NoMergeOptSub = 5; -} +message TMergeTestNoMerge { + option (DontMerge) = true; + + optional uint32 A = 1; + repeated uint32 B = 2; +} + +message TMergeTestMerge { + optional uint32 A = 1; + repeated uint32 B = 2; + repeated uint32 C = 3 [(DontMergeField)=true]; +} + +message TMergeTest { + repeated uint32 MergeInt = 1; + repeated uint32 NoMergeInt = 2 [(DontMergeField)=true]; + + optional TMergeTestMerge MergeSub = 3; + repeated TMergeTestMerge NoMergeRepSub = 4 [(DontMergeField)=true]; + optional TMergeTestNoMerge NoMergeOptSub = 5; +} message TTextTest { optional uint32 Foo = 1; diff --git a/library/cpp/protobuf/util/ut/sample_for_simple_reflection.proto b/library/cpp/protobuf/util/ut/sample_for_simple_reflection.proto index cca1dd869a..e9c5c569af 100644 --- a/library/cpp/protobuf/util/ut/sample_for_simple_reflection.proto +++ b/library/cpp/protobuf/util/ut/sample_for_simple_reflection.proto @@ -11,15 +11,15 @@ message TSample { repeated string RepStr = 4; optional string AnotherOneStr = 5; - optional int32 OneInt = 6; - repeated int32 RepInt = 7; - - enum EEnum { - V1 = 1; - V2 = 2; - } - optional EEnum OneEnum = 8; - repeated EEnum RepEnum = 9; - + optional int32 OneInt = 6; + repeated int32 RepInt = 7; + + enum EEnum { + V1 = 1; + V2 = 2; + } + optional EEnum OneEnum = 8; + repeated EEnum RepEnum = 9; + extensions 100 to 199; } diff --git a/library/cpp/protobuf/util/ut/ya.make b/library/cpp/protobuf/util/ut/ya.make index 701ba9a8c8..182871179b 100644 --- a/library/cpp/protobuf/util/ut/ya.make +++ b/library/cpp/protobuf/util/ut/ya.make @@ -6,14 +6,14 @@ SRCS( extensions.proto sample_for_is_equal.proto sample_for_simple_reflection.proto - common_ut.proto + common_ut.proto pb_io_ut.cpp is_equal_ut.cpp iterators_ut.cpp simple_reflection_ut.cpp repeated_field_utils_ut.cpp walk_ut.cpp - merge_ut.cpp + merge_ut.cpp ) END() diff --git a/library/cpp/protobuf/util/walk.h b/library/cpp/protobuf/util/walk.h index d15d76562d..944a80dc95 100644 --- a/library/cpp/protobuf/util/walk.h +++ b/library/cpp/protobuf/util/walk.h @@ -1,13 +1,13 @@ -#pragma once - -#include "simple_reflection.h" - +#pragma once + +#include "simple_reflection.h" + #include <google/protobuf/message.h> #include <google/protobuf/descriptor.h> - + #include <functional> -namespace NProtoBuf { +namespace NProtoBuf { // Apply @onField processor to each field in @msg (even empty) // Do not walk deeper the field if the field is an empty message // Returned bool defines if we should walk down deeper to current node children (true), or not (false) diff --git a/library/cpp/protobuf/util/walk_ut.cpp b/library/cpp/protobuf/util/walk_ut.cpp index 2ea6071b17..0597867b32 100644 --- a/library/cpp/protobuf/util/walk_ut.cpp +++ b/library/cpp/protobuf/util/walk_ut.cpp @@ -1,56 +1,56 @@ -#include "walk.h" -#include "simple_reflection.h" +#include "walk.h" +#include "simple_reflection.h" #include <library/cpp/protobuf/util/ut/common_ut.pb.h> - + #include <library/cpp/testing/unittest/registar.h> - -using namespace NProtoBuf; - + +using namespace NProtoBuf; + Y_UNIT_TEST_SUITE(ProtobufWalk) { static void InitProto(NProtobufUtilUt::TWalkTest & p, int level = 0) { - p.SetOptInt(1); - p.AddRepInt(2); - p.AddRepInt(3); - - p.SetOptStr("123"); - p.AddRepStr("*"); - p.AddRepStr("abcdef"); - p.AddRepStr("1234"); - - if (level == 0) { - InitProto(*p.MutableOptSub(), 1); - InitProto(*p.AddRepSub(), 1); - InitProto(*p.AddRepSub(), 1); - } - } - + p.SetOptInt(1); + p.AddRepInt(2); + p.AddRepInt(3); + + p.SetOptStr("123"); + p.AddRepStr("*"); + p.AddRepStr("abcdef"); + p.AddRepStr("1234"); + + if (level == 0) { + InitProto(*p.MutableOptSub(), 1); + InitProto(*p.AddRepSub(), 1); + InitProto(*p.AddRepSub(), 1); + } + } + static bool IncreaseInts(Message & msg, const FieldDescriptor* fd) { - TMutableField f(msg, fd); - if (f.IsInstance<ui32>()) { - for (size_t i = 0; i < f.Size(); ++i) + TMutableField f(msg, fd); + if (f.IsInstance<ui32>()) { + for (size_t i = 0; i < f.Size(); ++i) f.Set(f.Get<ui64>(i) + 1, i); // ui64 should be ok! - } - return true; - } - + } + return true; + } + static bool RepeatString1(Message & msg, const FieldDescriptor* fd) { - TMutableField f(msg, fd); - if (f.IsString()) { - for (size_t i = 0; i < f.Size(); ++i) + TMutableField f(msg, fd); + if (f.IsString()) { + for (size_t i = 0; i < f.Size(); ++i) if (f.Get<TString>(i).StartsWith('1')) f.Set(f.Get<TString>(i) + f.Get<TString>(i), i); - } - return true; - } - + } + return true; + } + static bool ClearXXX(Message & msg, const FieldDescriptor* fd) { - const FieldOptions& opt = fd->options(); - if (opt.HasExtension(NProtobufUtilUt::XXX) && opt.GetExtension(NProtobufUtilUt::XXX)) - TMutableField(msg, fd).Clear(); - - return true; - } - + const FieldOptions& opt = fd->options(); + if (opt.HasExtension(NProtobufUtilUt::XXX) && opt.GetExtension(NProtobufUtilUt::XXX)) + TMutableField(msg, fd).Clear(); + + return true; + } + struct TestStruct { bool Ok = false; @@ -62,67 +62,67 @@ Y_UNIT_TEST_SUITE(ProtobufWalk) { }; Y_UNIT_TEST(TestWalkRefl) { - NProtobufUtilUt::TWalkTest p; - InitProto(p); - - { - UNIT_ASSERT_EQUAL(p.GetOptInt(), 1); - UNIT_ASSERT_EQUAL(p.RepIntSize(), 2); - UNIT_ASSERT_EQUAL(p.GetRepInt(0), 2); - UNIT_ASSERT_EQUAL(p.GetRepInt(1), 3); - - WalkReflection(p, IncreaseInts); - - UNIT_ASSERT_EQUAL(p.GetOptInt(), 2); - UNIT_ASSERT_EQUAL(p.RepIntSize(), 2); - UNIT_ASSERT_EQUAL(p.GetRepInt(0), 3); - UNIT_ASSERT_EQUAL(p.GetRepInt(1), 4); - - UNIT_ASSERT_EQUAL(p.GetOptSub().GetOptInt(), 2); - UNIT_ASSERT_EQUAL(p.GetOptSub().RepIntSize(), 2); - UNIT_ASSERT_EQUAL(p.GetOptSub().GetRepInt(0), 3); - UNIT_ASSERT_EQUAL(p.GetOptSub().GetRepInt(1), 4); - - UNIT_ASSERT_EQUAL(p.RepSubSize(), 2); - UNIT_ASSERT_EQUAL(p.GetRepSub(1).GetOptInt(), 2); - UNIT_ASSERT_EQUAL(p.GetRepSub(1).RepIntSize(), 2); - UNIT_ASSERT_EQUAL(p.GetRepSub(1).GetRepInt(0), 3); - UNIT_ASSERT_EQUAL(p.GetRepSub(1).GetRepInt(1), 4); - } - { - UNIT_ASSERT_EQUAL(p.GetOptStr(), "123"); - UNIT_ASSERT_EQUAL(p.GetRepStr(2), "1234"); - - WalkReflection(p, RepeatString1); - - UNIT_ASSERT_EQUAL(p.GetOptStr(), "123123"); - UNIT_ASSERT_EQUAL(p.RepStrSize(), 3); - UNIT_ASSERT_EQUAL(p.GetRepStr(0), "*"); - UNIT_ASSERT_EQUAL(p.GetRepStr(1), "abcdef"); - UNIT_ASSERT_EQUAL(p.GetRepStr(2), "12341234"); - - UNIT_ASSERT_EQUAL(p.RepSubSize(), 2); - UNIT_ASSERT_EQUAL(p.GetRepSub(0).GetOptStr(), "123123"); - UNIT_ASSERT_EQUAL(p.GetRepSub(0).RepStrSize(), 3); - UNIT_ASSERT_EQUAL(p.GetRepSub(0).GetRepStr(0), "*"); - UNIT_ASSERT_EQUAL(p.GetRepSub(0).GetRepStr(1), "abcdef"); - UNIT_ASSERT_EQUAL(p.GetRepSub(0).GetRepStr(2), "12341234"); - } - { - UNIT_ASSERT(p.HasOptInt()); - UNIT_ASSERT(p.RepStrSize() == 3); - UNIT_ASSERT(p.HasOptSub()); - - WalkReflection(p, ClearXXX); - - UNIT_ASSERT(!p.HasOptInt()); - UNIT_ASSERT(p.RepIntSize() == 2); - UNIT_ASSERT(p.HasOptStr()); - UNIT_ASSERT(p.RepStrSize() == 0); - UNIT_ASSERT(!p.HasOptSub()); - UNIT_ASSERT(p.RepSubSize() == 2); - } - } + NProtobufUtilUt::TWalkTest p; + InitProto(p); + + { + UNIT_ASSERT_EQUAL(p.GetOptInt(), 1); + UNIT_ASSERT_EQUAL(p.RepIntSize(), 2); + UNIT_ASSERT_EQUAL(p.GetRepInt(0), 2); + UNIT_ASSERT_EQUAL(p.GetRepInt(1), 3); + + WalkReflection(p, IncreaseInts); + + UNIT_ASSERT_EQUAL(p.GetOptInt(), 2); + UNIT_ASSERT_EQUAL(p.RepIntSize(), 2); + UNIT_ASSERT_EQUAL(p.GetRepInt(0), 3); + UNIT_ASSERT_EQUAL(p.GetRepInt(1), 4); + + UNIT_ASSERT_EQUAL(p.GetOptSub().GetOptInt(), 2); + UNIT_ASSERT_EQUAL(p.GetOptSub().RepIntSize(), 2); + UNIT_ASSERT_EQUAL(p.GetOptSub().GetRepInt(0), 3); + UNIT_ASSERT_EQUAL(p.GetOptSub().GetRepInt(1), 4); + + UNIT_ASSERT_EQUAL(p.RepSubSize(), 2); + UNIT_ASSERT_EQUAL(p.GetRepSub(1).GetOptInt(), 2); + UNIT_ASSERT_EQUAL(p.GetRepSub(1).RepIntSize(), 2); + UNIT_ASSERT_EQUAL(p.GetRepSub(1).GetRepInt(0), 3); + UNIT_ASSERT_EQUAL(p.GetRepSub(1).GetRepInt(1), 4); + } + { + UNIT_ASSERT_EQUAL(p.GetOptStr(), "123"); + UNIT_ASSERT_EQUAL(p.GetRepStr(2), "1234"); + + WalkReflection(p, RepeatString1); + + UNIT_ASSERT_EQUAL(p.GetOptStr(), "123123"); + UNIT_ASSERT_EQUAL(p.RepStrSize(), 3); + UNIT_ASSERT_EQUAL(p.GetRepStr(0), "*"); + UNIT_ASSERT_EQUAL(p.GetRepStr(1), "abcdef"); + UNIT_ASSERT_EQUAL(p.GetRepStr(2), "12341234"); + + UNIT_ASSERT_EQUAL(p.RepSubSize(), 2); + UNIT_ASSERT_EQUAL(p.GetRepSub(0).GetOptStr(), "123123"); + UNIT_ASSERT_EQUAL(p.GetRepSub(0).RepStrSize(), 3); + UNIT_ASSERT_EQUAL(p.GetRepSub(0).GetRepStr(0), "*"); + UNIT_ASSERT_EQUAL(p.GetRepSub(0).GetRepStr(1), "abcdef"); + UNIT_ASSERT_EQUAL(p.GetRepSub(0).GetRepStr(2), "12341234"); + } + { + UNIT_ASSERT(p.HasOptInt()); + UNIT_ASSERT(p.RepStrSize() == 3); + UNIT_ASSERT(p.HasOptSub()); + + WalkReflection(p, ClearXXX); + + UNIT_ASSERT(!p.HasOptInt()); + UNIT_ASSERT(p.RepIntSize() == 2); + UNIT_ASSERT(p.HasOptStr()); + UNIT_ASSERT(p.RepStrSize() == 0); + UNIT_ASSERT(!p.HasOptSub()); + UNIT_ASSERT(p.RepSubSize() == 2); + } + } Y_UNIT_TEST(TestMutableCallable) { TestStruct testStruct; @@ -155,4 +155,4 @@ Y_UNIT_TEST_SUITE(ProtobufWalk) { UNIT_ASSERT_STRINGS_EQUAL(printedSchema, schema); } -} +} diff --git a/library/cpp/protobuf/util/ya.make b/library/cpp/protobuf/util/ya.make index b62028af58..6908416823 100644 --- a/library/cpp/protobuf/util/ya.make +++ b/library/cpp/protobuf/util/ya.make @@ -1,7 +1,7 @@ LIBRARY() -OWNER(mowgli) - +OWNER(mowgli) + PEERDIR( contrib/libs/protobuf library/cpp/binsaver @@ -12,7 +12,7 @@ PEERDIR( SRCS( is_equal.cpp iterators.h - merge.cpp + merge.cpp path.cpp pb_io.cpp pb_utils.h diff --git a/library/cpp/streams/lz/lz.cpp b/library/cpp/streams/lz/lz.cpp index b65bb3ed96..bb6ca0e759 100644 --- a/library/cpp/streams/lz/lz.cpp +++ b/library/cpp/streams/lz/lz.cpp @@ -27,8 +27,8 @@ struct TCommonData { static const size_t overhead = sizeof(ui16) + sizeof(ui8); }; -const size_t SIGNATURE_SIZE = 4; - +const size_t SIGNATURE_SIZE = 4; + template <class TCompressor, class TBase> class TCompressorBase: public TAdditionalStorage<TCompressorBase<TCompressor, TBase>>, public TCompressor, public TCommonData { public: @@ -148,35 +148,35 @@ static inline T GLoad(IInputStream* input) { return LittleToHost(t); } -class TDecompressSignature { +class TDecompressSignature { public: inline TDecompressSignature(IInputStream* input) { if (input->Load(Buffer_, SIGNATURE_SIZE) != SIGNATURE_SIZE) { ythrow TDecompressorError() << "can not load stream signature"; - } + } } - + template <class TDecompressor> inline bool Check() const { static_assert(sizeof(TDecompressor::signature) - 1 == SIGNATURE_SIZE, "expect sizeof(TDecompressor::signature) - 1 == SIGNATURE_SIZE"); return memcmp(TDecompressor::signature, Buffer_, SIGNATURE_SIZE) == 0; } - + private: char Buffer_[SIGNATURE_SIZE]; -}; - -template <class TDecompressor> +}; + +template <class TDecompressor> static inline IInputStream* ConsumeSignature(IInputStream* input) { - TDecompressSignature sign(input); - if (!sign.Check<TDecompressor>()) { - ythrow TDecompressorError() << "incorrect signature"; - } - return input; -} - -template <class TDecompressor> -class TDecompressorBaseImpl: public TDecompressor, public TCommonData { + TDecompressSignature sign(input); + if (!sign.Check<TDecompressor>()) { + ythrow TDecompressorError() << "incorrect signature"; + } + return input; +} + +template <class TDecompressor> +class TDecompressorBaseImpl: public TDecompressor, public TCommonData { public: static inline ui32 CheckVer(ui32 v) { if (v != 1) { @@ -276,18 +276,18 @@ protected: char* Out_; }; -template <class TDecompressor, class TBase> -class TDecompressorBase: public TDecompressorBaseImpl<TDecompressor> { +template <class TDecompressor, class TBase> +class TDecompressorBase: public TDecompressorBaseImpl<TDecompressor> { public: inline TDecompressorBase(IInputStream* slave) : TDecompressorBaseImpl<TDecompressor>(ConsumeSignature<TDecompressor>(slave)) { } - + inline ~TDecompressorBase() { } -}; - +}; + #define DEF_COMPRESSOR_COMMON(rname, name) \ rname::~rname() { \ try { \ @@ -617,7 +617,7 @@ TLzqCompress::TLzqCompress(IOutputStream* slave, ui16 blockSize, EVersion ver, u DEF_COMPRESSOR_COMMON(TLzqCompress, TQuickLZCompress) DEF_DECOMPRESSOR(TLzqDecompress, TQuickLZDecompress) - + namespace { template <class T> struct TInputHolder { @@ -640,58 +640,58 @@ namespace { // Decompressing input streams without signature verification template <class TInput, class TDecompressor> class TLzDecompressInput: public TInputHolder<TInput>, public IInputStream { - public: + public: inline TLzDecompressInput(TInput in) : Impl_(this->Set(in)) - { - } - - private: + { + } + + private: size_t DoRead(void* buf, size_t len) override { - return Impl_.Read(buf, len); - } - - private: - TDecompressorBaseImpl<TDecompressor> Impl_; + return Impl_.Read(buf, len); + } + + private: + TDecompressorBaseImpl<TDecompressor> Impl_; }; } - + template <class T> static TAutoPtr<IInputStream> TryOpenLzDecompressorX(const TDecompressSignature& s, T input) { - if (s.Check<TLZ4>()) + if (s.Check<TLZ4>()) return new TLzDecompressInput<T, TLZ4>(input); - - if (s.Check<TSnappy>()) + + if (s.Check<TSnappy>()) return new TLzDecompressInput<T, TSnappy>(input); - - if (s.Check<TMiniLzo>()) + + if (s.Check<TMiniLzo>()) return new TLzDecompressInput<T, TMiniLzoDecompressor>(input); - - if (s.Check<TFastLZ>()) + + if (s.Check<TFastLZ>()) return new TLzDecompressInput<T, TFastLZ>(input); - - if (s.Check<TQuickLZDecompress>()) + + if (s.Check<TQuickLZDecompress>()) return new TLzDecompressInput<T, TQuickLZDecompress>(input); - + return nullptr; -} - +} + template <class T> static inline TAutoPtr<IInputStream> TryOpenLzDecompressorImpl(const TStringBuf& signature, T input) { - if (signature.size() == SIGNATURE_SIZE) { + if (signature.size() == SIGNATURE_SIZE) { TMemoryInput mem(signature.data(), signature.size()); - TDecompressSignature s(&mem); + TDecompressSignature s(&mem); return TryOpenLzDecompressorX(s, input); - } - + } + return nullptr; -} - +} + template <class T> static inline TAutoPtr<IInputStream> TryOpenLzDecompressorImpl(T input) { TDecompressSignature s(&*input); - + return TryOpenLzDecompressorX(s, input); } @@ -700,11 +700,11 @@ static inline TAutoPtr<IInputStream> OpenLzDecompressorImpl(T input) { TAutoPtr<IInputStream> ret = TryOpenLzDecompressorImpl(input); if (!ret) { - ythrow TDecompressorError() << "Unknown compression format"; + ythrow TDecompressorError() << "Unknown compression format"; } - - return ret; -} + + return ret; +} TAutoPtr<IInputStream> OpenLzDecompressor(IInputStream* input) { return OpenLzDecompressorImpl(input); diff --git a/library/cpp/streams/lz/lz.h b/library/cpp/streams/lz/lz.h index 3a2eaad88b..4cbd752421 100644 --- a/library/cpp/streams/lz/lz.h +++ b/library/cpp/streams/lz/lz.h @@ -222,7 +222,7 @@ private: }; /** @} */ - + /** * Reads a compression signature from the provided input stream and returns a * corresponding decompressing stream. diff --git a/library/cpp/streams/lz/lz_ut.cpp b/library/cpp/streams/lz/lz_ut.cpp index 6876f070fc..f2d91e2299 100644 --- a/library/cpp/streams/lz/lz_ut.cpp +++ b/library/cpp/streams/lz/lz_ut.cpp @@ -166,28 +166,28 @@ public: : Slave_(OpenLzDecompressor(input).Release()) { } - + private: size_t DoRead(void* buf, size_t len) override { return Slave_->Read(buf, len); } - + private: THolder<IInputStream> Slave_; -}; - -template <class C> -static inline void TestMixedDecompress() { - TestCompress<C>(); - TestDecompress<TMixedDecompress>(); -} - -template <class D, class C> -static inline void TestDecompressError() { - TestCompress<C>(); - UNIT_ASSERT_EXCEPTION(TestDecompress<D>(), TDecompressorError); -} - +}; + +template <class C> +static inline void TestMixedDecompress() { + TestCompress<C>(); + TestDecompress<TMixedDecompress>(); +} + +template <class D, class C> +static inline void TestDecompressError() { + TestCompress<C>(); + UNIT_ASSERT_EXCEPTION(TestDecompress<D>(), TDecompressorError); +} + Y_UNIT_TEST_SUITE(TLzTest) { Y_UNIT_TEST(TestLzo) { TestCompress<TLzoCompress>(); @@ -243,23 +243,23 @@ Y_UNIT_TEST_SUITE(TLzTest) { TestCompress<TSnappyCompress>(); TestDecompress<TSnappyDecompress>(); } - + Y_UNIT_TEST(TestGeneric) { - TestMixedDecompress<TLzoCompress>(); - TestMixedDecompress<TLzfCompress>(); - TestMixedDecompress<TLzqCompress>(); - TestMixedDecompress<TLz4Compress>(); - TestMixedDecompress<TSnappyCompress>(); - } - + TestMixedDecompress<TLzoCompress>(); + TestMixedDecompress<TLzfCompress>(); + TestMixedDecompress<TLzqCompress>(); + TestMixedDecompress<TLz4Compress>(); + TestMixedDecompress<TSnappyCompress>(); + } + Y_UNIT_TEST(TestDecompressorError) { - TestDecompressError<TLzoDecompress, TLzfCompress>(); - TestDecompressError<TLzfDecompress, TLzqCompress>(); - TestDecompressError<TLzqDecompress, TLz4Compress>(); - TestDecompressError<TLz4Decompress, TSnappyCompress>(); - TestDecompressError<TSnappyDecompress, TBufferedOutput>(); - TestDecompressError<TMixedDecompress, TBufferedOutput>(); - } + TestDecompressError<TLzoDecompress, TLzfCompress>(); + TestDecompressError<TLzfDecompress, TLzqCompress>(); + TestDecompressError<TLzqDecompress, TLz4Compress>(); + TestDecompressError<TLz4Decompress, TSnappyCompress>(); + TestDecompressError<TSnappyDecompress, TBufferedOutput>(); + TestDecompressError<TMixedDecompress, TBufferedOutput>(); + } Y_UNIT_TEST(TestFactory) { TStringStream ss; diff --git a/library/cpp/string_utils/base64/base64.h b/library/cpp/string_utils/base64/base64.h index f778a6425a..cb2d201681 100644 --- a/library/cpp/string_utils/base64/base64.h +++ b/library/cpp/string_utils/base64/base64.h @@ -28,14 +28,14 @@ inline TStringBuf Base64Decode(const TStringBuf src, void* dst) { } inline void Base64Decode(const TStringBuf src, TString& dst) { - dst.ReserveAndResize(Base64DecodeBufSize(src.size())); - dst.resize(Base64Decode(src, dst.begin()).size()); -} - + dst.ReserveAndResize(Base64DecodeBufSize(src.size())); + dst.resize(Base64Decode(src, dst.begin()).size()); +} + //WARNING: can process not whole input silently, use Base64StrictDecode instead of this function inline TString Base64Decode(const TStringBuf s) { TString ret; - Base64Decode(s, ret); + Base64Decode(s, ret); return ret; } @@ -108,23 +108,23 @@ inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* tmp) { } inline void Base64Encode(const TStringBuf src, TString& dst) { - dst.ReserveAndResize(Base64EncodeBufSize(src.size())); - dst.resize(Base64Encode(src, dst.begin()).size()); -} - + dst.ReserveAndResize(Base64EncodeBufSize(src.size())); + dst.resize(Base64Encode(src, dst.begin()).size()); +} + inline void Base64EncodeUrl(const TStringBuf src, TString& dst) { - dst.ReserveAndResize(Base64EncodeBufSize(src.size())); - dst.resize(Base64EncodeUrl(src, dst.begin()).size()); -} - + dst.ReserveAndResize(Base64EncodeBufSize(src.size())); + dst.resize(Base64EncodeUrl(src, dst.begin()).size()); +} + inline TString Base64Encode(const TStringBuf s) { TString ret; - Base64Encode(s, ret); + Base64Encode(s, ret); return ret; } inline TString Base64EncodeUrl(const TStringBuf s) { TString ret; - Base64EncodeUrl(s, ret); + Base64EncodeUrl(s, ret); return ret; } diff --git a/library/cpp/string_utils/base64/base64_ut.cpp b/library/cpp/string_utils/base64/base64_ut.cpp index bcc1e65879..6a54e010a6 100644 --- a/library/cpp/string_utils/base64/base64_ut.cpp +++ b/library/cpp/string_utils/base64/base64_ut.cpp @@ -165,20 +165,20 @@ void Out<NB64Etalon::TImpls::EImpl>(IOutputStream& o, typename TTypeTraits<NB64E static void TestEncodeDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) { TString a, b; - - Base64Encode(plain, a); - UNIT_ASSERT_VALUES_EQUAL(a, encoded); - - Base64Decode(a, b); - UNIT_ASSERT_VALUES_EQUAL(b, plain); - - Base64EncodeUrl(plain, a); - UNIT_ASSERT_VALUES_EQUAL(a, encodedUrl); - - Base64Decode(a, b); - UNIT_ASSERT_VALUES_EQUAL(b, plain); -} - + + Base64Encode(plain, a); + UNIT_ASSERT_VALUES_EQUAL(a, encoded); + + Base64Decode(a, b); + UNIT_ASSERT_VALUES_EQUAL(b, plain); + + Base64EncodeUrl(plain, a); + UNIT_ASSERT_VALUES_EQUAL(a, encodedUrl); + + Base64Decode(a, b); + UNIT_ASSERT_VALUES_EQUAL(b, plain); +} + static void TestEncodeStrictDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) { TString a, b; @@ -204,11 +204,11 @@ Y_UNIT_TEST_SUITE(TBase64) { } Y_UNIT_TEST(TestIntoString) { - { + { TString str; - for (size_t i = 0; i < 256; ++i) - str += char(i); - + for (size_t i = 0; i < 256; ++i) + str += char(i); + const TString base64 = "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy" "gpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9Q" @@ -225,22 +225,22 @@ Y_UNIT_TEST_SUITE(TBase64) { "oqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIyc" "rLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy" "8_T19vf4-fr7_P3-_w,,"; - - TestEncodeDecodeIntoString(str, base64, base64Url); + + TestEncodeDecodeIntoString(str, base64, base64Url); TestEncodeStrictDecodeIntoString(str, base64, base64Url); - } - - { + } + + { const TString str = "http://yandex.ru:1234/request?param=value&lll=fff#fragment"; - + const TString base64 = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q/cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA=="; const TString base64Url = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q_cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA,,"; - - TestEncodeDecodeIntoString(str, base64, base64Url); + + TestEncodeDecodeIntoString(str, base64, base64Url); TestEncodeStrictDecodeIntoString(str, base64, base64Url); - } - } - + } + } + Y_UNIT_TEST(TestDecode) { UNIT_ASSERT_EXCEPTION(Base64Decode("a"), yexception); UNIT_ASSERT_EXCEPTION(Base64StrictDecode("a"), yexception); diff --git a/library/cpp/string_utils/url/url.cpp b/library/cpp/string_utils/url/url.cpp index 85f4ac5d69..0744ae5640 100644 --- a/library/cpp/string_utils/url/url.cpp +++ b/library/cpp/string_utils/url/url.cpp @@ -264,17 +264,17 @@ TStringBuf GetDomain(const TStringBuf host) noexcept { } TStringBuf GetParentDomain(const TStringBuf host, size_t level) noexcept { - size_t pos = host.size(); - for (size_t i = 0; i < level; ++i) { - pos = host.rfind('.', pos); + size_t pos = host.size(); + for (size_t i = 0; i < level; ++i) { + pos = host.rfind('.', pos); if (pos == TString::npos) - return host; - } - return host.SubStr(pos + 1); -} - + return host; + } + return host.SubStr(pos + 1); +} + TStringBuf GetZone(const TStringBuf host) noexcept { - return GetParentDomain(host, 1); + return GetParentDomain(host, 1); } TStringBuf CutWWWPrefix(const TStringBuf url) noexcept { diff --git a/library/cpp/string_utils/url/url_ut.cpp b/library/cpp/string_utils/url/url_ut.cpp index 1588013893..829fbe217f 100644 --- a/library/cpp/string_utils/url/url_ut.cpp +++ b/library/cpp/string_utils/url/url_ut.cpp @@ -49,19 +49,19 @@ Y_UNIT_TEST_SUITE(TUtilUrlTest) { } Y_UNIT_TEST(TestGetParentDomain) { - UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("www.ya.ru", 0)); - UNIT_ASSERT_VALUES_EQUAL("ru", GetParentDomain("www.ya.ru", 1)); - UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetParentDomain("www.ya.ru", 2)); - UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 3)); - UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 4)); - UNIT_ASSERT_VALUES_EQUAL("com", GetParentDomain("ya.com", 1)); - UNIT_ASSERT_VALUES_EQUAL("ya.com", GetParentDomain("ya.com", 2)); - UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 1)); - UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 2)); - UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 0)); - UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 1)); - } - + UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("www.ya.ru", 0)); + UNIT_ASSERT_VALUES_EQUAL("ru", GetParentDomain("www.ya.ru", 1)); + UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetParentDomain("www.ya.ru", 2)); + UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 3)); + UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 4)); + UNIT_ASSERT_VALUES_EQUAL("com", GetParentDomain("ya.com", 1)); + UNIT_ASSERT_VALUES_EQUAL("ya.com", GetParentDomain("ya.com", 2)); + UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 1)); + UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 2)); + UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 0)); + UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 1)); + } + Y_UNIT_TEST(TestGetZone) { UNIT_ASSERT_VALUES_EQUAL("ru", GetZone("www.ya.ru")); UNIT_ASSERT_VALUES_EQUAL("com", GetZone("ya.com")); diff --git a/library/cpp/tld/tld.cpp b/library/cpp/tld/tld.cpp index e31f3f0322..d67291676e 100644 --- a/library/cpp/tld/tld.cpp +++ b/library/cpp/tld/tld.cpp @@ -5,7 +5,7 @@ #include <util/generic/hash_set.h> #include <util/generic/singleton.h> -namespace NTld { +namespace NTld { namespace { #include <library/cpp/tld/tld.inc> @@ -18,18 +18,18 @@ namespace NTld { } } }; - + struct TVeryGoodTld: public TCiHash { TVeryGoodTld() { auto domains = { "am", "az", "biz", "by", "com", "cz", "de", "ec", "fr", "ge", "gov", "gr", "il", "info", "kg", "kz", "mobi", "net", "nu", "org", "lt", "lv", "md", "ru", "su", "tr", "ua", "uk", "uz", "ws", "xn--p1ai", "рф"}; - + for (auto d : domains) { insert(d); } - } + } }; } diff --git a/library/cpp/tld/tld.h b/library/cpp/tld/tld.h index 9e241de090..7c2061717a 100644 --- a/library/cpp/tld/tld.h +++ b/library/cpp/tld/tld.h @@ -17,12 +17,12 @@ namespace NTld { inline bool InTld(const TStringBuf& host) { return IsTld(FindTld(host)); } - + // check if @s belongs to a "good" subset of reliable TLDs, defined in tld.cpp bool IsVeryGoodTld(const TStringBuf& tld); - + inline bool InVeryGoodTld(const TStringBuf& host) { return IsVeryGoodTld(FindTld(host)); } - -} + +} diff --git a/library/cpp/tld/tld_ut.cpp b/library/cpp/tld/tld_ut.cpp index 733200f2b5..61a6779673 100644 --- a/library/cpp/tld/tld_ut.cpp +++ b/library/cpp/tld/tld_ut.cpp @@ -8,20 +8,20 @@ using namespace NTld; Y_UNIT_TEST_SUITE(TTldTest) { Y_UNIT_TEST(TestFindTld) { - UNIT_ASSERT(FindTld("yandex.ru") == "ru"); - UNIT_ASSERT(FindTld("YandeX.Ru") == "Ru"); - UNIT_ASSERT(FindTld("yandex.com.tr") == "tr"); - UNIT_ASSERT(FindTld("com.tr") == "tr"); - UNIT_ASSERT(FindTld("abc.def.ghi") == "ghi"); - UNIT_ASSERT(FindTld("abc.def.aaaaaaaaaa") == "aaaaaaaaaa"); - UNIT_ASSERT(FindTld("a.b.c.d.e.f.g") == "g"); - - UNIT_ASSERT(FindTld(".diff") == "diff"); - UNIT_ASSERT(FindTld(".") == ""); - UNIT_ASSERT(FindTld("ru") == ""); - UNIT_ASSERT(FindTld("") == ""); - } - + UNIT_ASSERT(FindTld("yandex.ru") == "ru"); + UNIT_ASSERT(FindTld("YandeX.Ru") == "Ru"); + UNIT_ASSERT(FindTld("yandex.com.tr") == "tr"); + UNIT_ASSERT(FindTld("com.tr") == "tr"); + UNIT_ASSERT(FindTld("abc.def.ghi") == "ghi"); + UNIT_ASSERT(FindTld("abc.def.aaaaaaaaaa") == "aaaaaaaaaa"); + UNIT_ASSERT(FindTld("a.b.c.d.e.f.g") == "g"); + + UNIT_ASSERT(FindTld(".diff") == "diff"); + UNIT_ASSERT(FindTld(".") == ""); + UNIT_ASSERT(FindTld("ru") == ""); + UNIT_ASSERT(FindTld("") == ""); + } + Y_UNIT_TEST(TestTLDs) { UNIT_ASSERT(IsTld("ru")); UNIT_ASSERT(IsTld("Ru")); @@ -36,24 +36,24 @@ Y_UNIT_TEST_SUITE(TTldTest) { UNIT_ASSERT(!InTld("ru.")); UNIT_ASSERT(!InTld("ru.xn")); } - + Y_UNIT_TEST(TestVeryGoodTlds) { - UNIT_ASSERT(IsVeryGoodTld("ru")); - UNIT_ASSERT(IsVeryGoodTld("Ru")); - UNIT_ASSERT(!IsVeryGoodTld("BMW")); - UNIT_ASSERT(!IsVeryGoodTld("TiReS")); - UNIT_ASSERT(IsVeryGoodTld("рф")); + UNIT_ASSERT(IsVeryGoodTld("ru")); + UNIT_ASSERT(IsVeryGoodTld("Ru")); + UNIT_ASSERT(!IsVeryGoodTld("BMW")); + UNIT_ASSERT(!IsVeryGoodTld("TiReS")); + UNIT_ASSERT(IsVeryGoodTld("рф")); UNIT_ASSERT(!IsVeryGoodTld("РФ")); // note that uppercase non-ascii tlds cannot be found UNIT_ASSERT(IsVeryGoodTld("xn--p1ai")); // "рф" UNIT_ASSERT(!IsVeryGoodTld("xn--p1ag")); // "ру" - UNIT_ASSERT(!IsVeryGoodTld("YaHOO")); - UNIT_ASSERT(!IsVeryGoodTld("xn")); - - UNIT_ASSERT(InVeryGoodTld("ru.ru")); - UNIT_ASSERT(InVeryGoodTld("яндекс.рф")); - UNIT_ASSERT(InVeryGoodTld("http://xn--d1acpjx3f.xn--p1ai")); - UNIT_ASSERT(!InVeryGoodTld("ru")); - UNIT_ASSERT(!InVeryGoodTld("ru.")); - UNIT_ASSERT(!InVeryGoodTld("ru.xn")); - } + UNIT_ASSERT(!IsVeryGoodTld("YaHOO")); + UNIT_ASSERT(!IsVeryGoodTld("xn")); + + UNIT_ASSERT(InVeryGoodTld("ru.ru")); + UNIT_ASSERT(InVeryGoodTld("яндекс.рф")); + UNIT_ASSERT(InVeryGoodTld("http://xn--d1acpjx3f.xn--p1ai")); + UNIT_ASSERT(!InVeryGoodTld("ru")); + UNIT_ASSERT(!InVeryGoodTld("ru.")); + UNIT_ASSERT(!InVeryGoodTld("ru.xn")); + } } diff --git a/library/cpp/unicode/punycode/punycode.cpp b/library/cpp/unicode/punycode/punycode.cpp index 800d1f19fe..fc13a55436 100644 --- a/library/cpp/unicode/punycode/punycode.cpp +++ b/library/cpp/unicode/punycode/punycode.cpp @@ -1,143 +1,143 @@ -#include "punycode.h" -#include <contrib/libs/libidn/idna.h> -#include <contrib/libs/libidn/punycode.h> -#include <util/charset/wide.h> -#include <util/generic/ptr.h> -#include <util/generic/vector.h> - -#include <cstdlib> - -static inline void CheckPunycodeResult(int rc) { - if (rc != PUNYCODE_SUCCESS) - ythrow TPunycodeError() << punycode_strerror(static_cast<Punycode_status>(rc)); -} - -static inline void CheckIdnaResult(int rc) { - if (rc != IDNA_SUCCESS) - ythrow TPunycodeError() << idna_strerror(static_cast<Idna_rc>(rc)); -} - -// UTF-32 helpers - +#include "punycode.h" +#include <contrib/libs/libidn/idna.h> +#include <contrib/libs/libidn/punycode.h> +#include <util/charset/wide.h> +#include <util/generic/ptr.h> +#include <util/generic/vector.h> + +#include <cstdlib> + +static inline void CheckPunycodeResult(int rc) { + if (rc != PUNYCODE_SUCCESS) + ythrow TPunycodeError() << punycode_strerror(static_cast<Punycode_status>(rc)); +} + +static inline void CheckIdnaResult(int rc) { + if (rc != IDNA_SUCCESS) + ythrow TPunycodeError() << idna_strerror(static_cast<Idna_rc>(rc)); +} + +// UTF-32 helpers + static inline void AppendWideToUtf32(const TWtringBuf& in, TVector<ui32>& out) { - out.reserve(out.size() + in.size() + 1); - - const wchar16* b = in.begin(); - const wchar16* e = in.end(); - while (b < e) { - out.push_back(ReadSymbolAndAdvance(b, e)); - } -} - + out.reserve(out.size() + in.size() + 1); + + const wchar16* b = in.begin(); + const wchar16* e = in.end(); + while (b < e) { + out.push_back(ReadSymbolAndAdvance(b, e)); + } +} + static inline void AppendUtf32ToWide(const ui32* in, size_t len, TUtf16String& out) { - out.reserve(out.size() + len); - + out.reserve(out.size() + len); + const ui32* b = in; const ui32* e = in + len; - for (; b != e; ++b) { + for (; b != e; ++b) { WriteSymbol(wchar32(*b), out); - } -} - + } +} + TStringBuf WideToPunycode(const TWtringBuf& in16, TString& out) { TVector<ui32> in32; - AppendWideToUtf32(in16, in32); - size_t outlen = in32.size(); - - int rc; - do { - outlen *= 2; - out.ReserveAndResize(outlen); + AppendWideToUtf32(in16, in32); + size_t outlen = in32.size(); + + int rc; + do { + outlen *= 2; + out.ReserveAndResize(outlen); rc = punycode_encode(in32.size(), in32.data(), nullptr, &outlen, out.begin()); - } while (rc == PUNYCODE_BIG_OUTPUT); - - CheckPunycodeResult(rc); - - out.resize(outlen); - return out; -} - + } while (rc == PUNYCODE_BIG_OUTPUT); + + CheckPunycodeResult(rc); + + out.resize(outlen); + return out; +} + TWtringBuf PunycodeToWide(const TStringBuf& in, TUtf16String& out16) { - size_t outlen = in.size(); + size_t outlen = in.size(); TVector<ui32> out32(outlen); - + int rc = punycode_decode(in.size(), in.data(), &outlen, out32.begin(), nullptr); - CheckPunycodeResult(rc); - - AppendUtf32ToWide(out32.begin(), outlen, out16); - return out16; -} - -namespace { - template <typename TChar> - struct TIdnaResult { - TChar* Data = nullptr; - - ~TIdnaResult() { - free(Data); - } - }; -} - + CheckPunycodeResult(rc); + + AppendUtf32ToWide(out32.begin(), outlen, out16); + return out16; +} + +namespace { + template <typename TChar> + struct TIdnaResult { + TChar* Data = nullptr; + + ~TIdnaResult() { + free(Data); + } + }; +} + TString HostNameToPunycode(const TWtringBuf& unicodeHost) { TVector<ui32> in32; - AppendWideToUtf32(unicodeHost, in32); - in32.push_back(0); - - TIdnaResult<char> out; - int rc = idna_to_ascii_4z(in32.begin(), &out.Data, 0); - CheckIdnaResult(rc); - - return out.Data; -} - + AppendWideToUtf32(unicodeHost, in32); + in32.push_back(0); + + TIdnaResult<char> out; + int rc = idna_to_ascii_4z(in32.begin(), &out.Data, 0); + CheckIdnaResult(rc); + + return out.Data; +} + TUtf16String PunycodeToHostName(const TStringBuf& punycodeHost) { - if (!IsStringASCII(punycodeHost.begin(), punycodeHost.end())) - ythrow TPunycodeError() << "Non-ASCII punycode input"; - - size_t len = punycodeHost.size(); + if (!IsStringASCII(punycodeHost.begin(), punycodeHost.end())) + ythrow TPunycodeError() << "Non-ASCII punycode input"; + + size_t len = punycodeHost.size(); TVector<ui32> in32(len + 1, 0); - for (size_t i = 0; i < len; ++i) - in32[i] = static_cast<ui8>(punycodeHost[i]); - in32[len] = 0; - + for (size_t i = 0; i < len; ++i) + in32[i] = static_cast<ui8>(punycodeHost[i]); + in32[len] = 0; + TIdnaResult<ui32> out; - int rc = idna_to_unicode_4z4z(in32.begin(), &out.Data, 0); - CheckIdnaResult(rc); - + int rc = idna_to_unicode_4z4z(in32.begin(), &out.Data, 0); + CheckIdnaResult(rc); + TUtf16String decoded; AppendUtf32ToWide(out.Data, std::char_traits<ui32>::length(out.Data), decoded); - return decoded; -} - + return decoded; +} + TString ForceHostNameToPunycode(const TWtringBuf& unicodeHost) { - try { - return HostNameToPunycode(unicodeHost); - } catch (const TPunycodeError&) { - return WideToUTF8(unicodeHost); - } -} - + try { + return HostNameToPunycode(unicodeHost); + } catch (const TPunycodeError&) { + return WideToUTF8(unicodeHost); + } +} + TUtf16String ForcePunycodeToHostName(const TStringBuf& punycodeHost) { - try { - return PunycodeToHostName(punycodeHost); - } catch (const TPunycodeError&) { - return UTF8ToWide(punycodeHost); - } -} - -bool CanBePunycodeHostName(const TStringBuf& host) { - if (!IsStringASCII(host.begin(), host.end())) - return false; - + try { + return PunycodeToHostName(punycodeHost); + } catch (const TPunycodeError&) { + return UTF8ToWide(punycodeHost); + } +} + +bool CanBePunycodeHostName(const TStringBuf& host) { + if (!IsStringASCII(host.begin(), host.end())) + return false; + static constexpr TStringBuf ACE = "xn--"; - - TStringBuf tail(host); - while (tail) { - const TStringBuf label = tail.NextTok('.'); + + TStringBuf tail(host); + while (tail) { + const TStringBuf label = tail.NextTok('.'); if (label.StartsWith(ACE)) - return true; - } - - return false; -} + return true; + } + + return false; +} diff --git a/library/cpp/unicode/punycode/punycode.h b/library/cpp/unicode/punycode/punycode.h index af4acc25c1..9d4517fede 100644 --- a/library/cpp/unicode/punycode/punycode.h +++ b/library/cpp/unicode/punycode/punycode.h @@ -1,46 +1,46 @@ -#pragma once +#pragma once #include <util/generic/string.h> -#include <util/generic/strbuf.h> -#include <util/generic/yexception.h> - -// Simplified arcadia wrappers for contrib/libs/libidn/ - -// Raw strings encoder/decoder: does not prepend with ACE prefix ("xn--"), -// does not limit input length. Throws TPunycodeError on any internal error. -// Returned strbuf points to @out data. +#include <util/generic/strbuf.h> +#include <util/generic/yexception.h> + +// Simplified arcadia wrappers for contrib/libs/libidn/ + +// Raw strings encoder/decoder: does not prepend with ACE prefix ("xn--"), +// does not limit input length. Throws TPunycodeError on any internal error. +// Returned strbuf points to @out data. TStringBuf WideToPunycode(const TWtringBuf& in, TString& out); TWtringBuf PunycodeToWide(const TStringBuf& in, TUtf16String& out); - + inline TString WideToPunycode(const TWtringBuf& in) { TString out; - WideToPunycode(in, out); - return out; -} - + WideToPunycode(in, out); + return out; +} + inline TUtf16String PunycodeToWide(const TStringBuf& in) { TUtf16String out; - PunycodeToWide(in, out); - return out; -} - -// Encode a sequence of point-separated domain labels -// into a sequence of corresponding punycode labels. -// Labels containing non-ASCII characters are prefixed with ACE prefix ("xn--"). -// Limits maximal encoded domain label length to IDNA_LABEL_MAX_LENGTH (255 by default). -// Throws TPunycodeError on failure. + PunycodeToWide(in, out); + return out; +} + +// Encode a sequence of point-separated domain labels +// into a sequence of corresponding punycode labels. +// Labels containing non-ASCII characters are prefixed with ACE prefix ("xn--"). +// Limits maximal encoded domain label length to IDNA_LABEL_MAX_LENGTH (255 by default). +// Throws TPunycodeError on failure. TString HostNameToPunycode(const TWtringBuf& unicodeHost); TUtf16String PunycodeToHostName(const TStringBuf& punycodeHost); - -// Robust versions: on failure return original input, converted to/from UTF8 + +// Robust versions: on failure return original input, converted to/from UTF8 TString ForceHostNameToPunycode(const TWtringBuf& unicodeHost); TUtf16String ForcePunycodeToHostName(const TStringBuf& punycodeHost); - -// True if @host looks like punycode domain label sequence, -// containing at least one ACE-prefixed label. -// Note that this function does not check all requied IDNA constraints -// (max label length, empty non-root domains, etc.) -bool CanBePunycodeHostName(const TStringBuf& host); - -class TPunycodeError: public yexception { -}; + +// True if @host looks like punycode domain label sequence, +// containing at least one ACE-prefixed label. +// Note that this function does not check all requied IDNA constraints +// (max label length, empty non-root domains, etc.) +bool CanBePunycodeHostName(const TStringBuf& host); + +class TPunycodeError: public yexception { +}; diff --git a/library/cpp/unicode/punycode/punycode_ut.cpp b/library/cpp/unicode/punycode/punycode_ut.cpp index 97271cf0d8..d8a2848d47 100644 --- a/library/cpp/unicode/punycode/punycode_ut.cpp +++ b/library/cpp/unicode/punycode/punycode_ut.cpp @@ -1,8 +1,8 @@ -#include "punycode.h" - +#include "punycode.h" + #include <library/cpp/testing/unittest/registar.h> -#include <util/charset/wide.h> - +#include <util/charset/wide.h> + namespace { template<typename T1, typename T2> inline bool HasSameBuffer(const T1& s1, const T2& s2) { @@ -16,111 +16,111 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) { TString buf1; TUtf16String buf2; return HasSameBuffer(WideToPunycode(unicode, buf1), buf1) && buf1 == punycode && HasSameBuffer(PunycodeToWide(punycode, buf2), buf2) && buf2 == unicode && WideToPunycode(unicode) == punycode && PunycodeToWide(punycode) == unicode; - } - + } + Y_UNIT_TEST(RawEncodeDecode) { - UNIT_ASSERT(TestRaw("", "")); - UNIT_ASSERT(TestRaw(" ", " -")); - UNIT_ASSERT(TestRaw("-", "--")); - UNIT_ASSERT(TestRaw("!@#$%", "!@#$%-")); - UNIT_ASSERT(TestRaw("xn-", "xn--")); - UNIT_ASSERT(TestRaw("xn--", "xn---")); - UNIT_ASSERT(TestRaw("abc", "abc-")); - UNIT_ASSERT(TestRaw("Latin123", "Latin123-")); - - UNIT_ASSERT(TestRaw("München", "Mnchen-3ya")); - UNIT_ASSERT(TestRaw("bücher", "bcher-kva")); - UNIT_ASSERT(TestRaw("BüüchEr", "BchEr-kvaa")); - - UNIT_ASSERT(TestRaw("президент", "d1abbgf6aiiy")); - UNIT_ASSERT(TestRaw("Президент", "r0a6bcbig1bsy")); - UNIT_ASSERT(TestRaw("ПРЕЗИДЕНТ", "g0abbgf6aiiy")); - UNIT_ASSERT(TestRaw("рф", "p1ai")); - UNIT_ASSERT(TestRaw("пример", "e1afmkfd")); - - { + UNIT_ASSERT(TestRaw("", "")); + UNIT_ASSERT(TestRaw(" ", " -")); + UNIT_ASSERT(TestRaw("-", "--")); + UNIT_ASSERT(TestRaw("!@#$%", "!@#$%-")); + UNIT_ASSERT(TestRaw("xn-", "xn--")); + UNIT_ASSERT(TestRaw("xn--", "xn---")); + UNIT_ASSERT(TestRaw("abc", "abc-")); + UNIT_ASSERT(TestRaw("Latin123", "Latin123-")); + + UNIT_ASSERT(TestRaw("München", "Mnchen-3ya")); + UNIT_ASSERT(TestRaw("bücher", "bcher-kva")); + UNIT_ASSERT(TestRaw("BüüchEr", "BchEr-kvaa")); + + UNIT_ASSERT(TestRaw("президент", "d1abbgf6aiiy")); + UNIT_ASSERT(TestRaw("Президент", "r0a6bcbig1bsy")); + UNIT_ASSERT(TestRaw("ПРЕЗИДЕНТ", "g0abbgf6aiiy")); + UNIT_ASSERT(TestRaw("рф", "p1ai")); + UNIT_ASSERT(TestRaw("пример", "e1afmkfd")); + + { const wchar16 tmp[] = {0x82, 0x81, 0x80, 0}; UNIT_ASSERT(PunycodeToWide("abc") == tmp); // "abc" is still valid punycode - } - - UNIT_ASSERT_EXCEPTION(PunycodeToWide(" "), TPunycodeError); - UNIT_ASSERT_EXCEPTION(PunycodeToWide("абвгд"), TPunycodeError); + } + + UNIT_ASSERT_EXCEPTION(PunycodeToWide(" "), TPunycodeError); + UNIT_ASSERT_EXCEPTION(PunycodeToWide("абвгд"), TPunycodeError); UNIT_ASSERT_EXCEPTION(PunycodeToWide("-"), TPunycodeError); - - { + + { TString longIn; - for (size_t i = 0; i < 1024; ++i) - longIn += "Qй"; - + for (size_t i = 0; i < 1024; ++i) + longIn += "Qй"; + TString longOut = "QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQ-lo11fbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; - UNIT_ASSERT(TestRaw(longIn, longOut)); - } - } - + UNIT_ASSERT(TestRaw(longIn, longOut)); + } + } + static bool TestHostName(const TString& utf8, const TString& punycode, bool canBePunycode = false) { TUtf16String unicode = UTF8ToWide(utf8); TString buf1; TUtf16String buf2; - //Cerr << "Testing " << utf8 << Endl; + //Cerr << "Testing " << utf8 << Endl; return HostNameToPunycode(unicode) == punycode && HostNameToPunycode(UTF8ToWide(punycode)) == punycode // repeated encoding should give same result && PunycodeToHostName(punycode) == unicode && CanBePunycodeHostName(punycode) == canBePunycode; - } - + } + static bool TestForced(const TString& bad) { return ForceHostNameToPunycode(UTF8ToWide(bad)) == bad && ForcePunycodeToHostName(bad) == UTF8ToWide(bad); - } - + } + Y_UNIT_TEST(HostNameEncodeDecode) { - UNIT_ASSERT(TestHostName("президент.рф", "xn--d1abbgf6aiiy.xn--p1ai", true)); + UNIT_ASSERT(TestHostName("президент.рф", "xn--d1abbgf6aiiy.xn--p1ai", true)); UNIT_ASSERT(TestHostName("яндекс.ru", "xn--d1acpjx3f.ru", true)); UNIT_ASSERT(TestHostName("пример", "xn--e1afmkfd", true)); UNIT_ASSERT(TestHostName("ascii.test", "ascii.test")); - - UNIT_ASSERT(TestHostName("", "")); - UNIT_ASSERT(TestHostName(".", ".")); + + UNIT_ASSERT(TestHostName("", "")); + UNIT_ASSERT(TestHostName(".", ".")); UNIT_ASSERT(TestHostName("a.", "a.")); // empty root domain is ok - UNIT_ASSERT(TestHostName("a.b.c.д.e.f", "a.b.c.xn--d1a.e.f", true)); - UNIT_ASSERT(TestHostName("а.б.в.г.д", "xn--80a.xn--90a.xn--b1a.xn--c1a.xn--d1a", true)); - - UNIT_ASSERT(TestHostName("-", "-")); - UNIT_ASSERT(TestHostName("xn--", "xn--", true)); - UNIT_ASSERT(TestHostName("xn--aaa.-", "xn--aaa.-", true)); - UNIT_ASSERT(TestHostName("xn--xn--d1acpjx3f.xn--ru", "xn--xn--d1acpjx3f.xn--ru", true)); - - { - // non-ascii + UNIT_ASSERT(TestHostName("a.b.c.д.e.f", "a.b.c.xn--d1a.e.f", true)); + UNIT_ASSERT(TestHostName("а.б.в.г.д", "xn--80a.xn--90a.xn--b1a.xn--c1a.xn--d1a", true)); + + UNIT_ASSERT(TestHostName("-", "-")); + UNIT_ASSERT(TestHostName("xn--", "xn--", true)); + UNIT_ASSERT(TestHostName("xn--aaa.-", "xn--aaa.-", true)); + UNIT_ASSERT(TestHostName("xn--xn--d1acpjx3f.xn--ru", "xn--xn--d1acpjx3f.xn--ru", true)); + + { + // non-ascii TString bad = "президент.рф"; - UNIT_ASSERT_EXCEPTION(PunycodeToHostName("президент.рф"), TPunycodeError); - UNIT_ASSERT(ForcePunycodeToHostName(bad) == UTF8ToWide(bad)); - } - { - // too long domain label + UNIT_ASSERT_EXCEPTION(PunycodeToHostName("президент.рф"), TPunycodeError); + UNIT_ASSERT(ForcePunycodeToHostName(bad) == UTF8ToWide(bad)); + } + { + // too long domain label TString bad(500, 'a'); - UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); + UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); UNIT_ASSERT(TestForced(bad)); // but can decode it - } - { - // already has ACE prefix + } + { + // already has ACE prefix TString bad("xn--яндекс.xn--рф"); - UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); - UNIT_ASSERT(TestForced(bad)); - } - { - // empty non-root domain is not allowed (?) + UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); + UNIT_ASSERT(TestForced(bad)); + } + { + // empty non-root domain is not allowed (?) TString bad(".яндекс.рф"); - UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); - UNIT_ASSERT(TestForced(bad)); - } - - UNIT_ASSERT(CanBePunycodeHostName("xn--")); - UNIT_ASSERT(CanBePunycodeHostName("yandex.xn--p1ai")); - UNIT_ASSERT(CanBePunycodeHostName("xn--d1acpjx3f.xn--p1ai")); - UNIT_ASSERT(CanBePunycodeHostName("a.b.c.d.xn--e")); - UNIT_ASSERT(CanBePunycodeHostName("xn--a.b.c.xn--d.e")); - UNIT_ASSERT(!CanBePunycodeHostName("yandex.ru")); // no xn-- - UNIT_ASSERT(!CanBePunycodeHostName("яндекс.рф")); // non-ascii - UNIT_ASSERT(!CanBePunycodeHostName("яндекс.xn--p1ai")); // non-ascii - UNIT_ASSERT(!CanBePunycodeHostName("")); + UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); + UNIT_ASSERT(TestForced(bad)); + } + + UNIT_ASSERT(CanBePunycodeHostName("xn--")); + UNIT_ASSERT(CanBePunycodeHostName("yandex.xn--p1ai")); + UNIT_ASSERT(CanBePunycodeHostName("xn--d1acpjx3f.xn--p1ai")); + UNIT_ASSERT(CanBePunycodeHostName("a.b.c.d.xn--e")); + UNIT_ASSERT(CanBePunycodeHostName("xn--a.b.c.xn--d.e")); + UNIT_ASSERT(!CanBePunycodeHostName("yandex.ru")); // no xn-- + UNIT_ASSERT(!CanBePunycodeHostName("яндекс.рф")); // non-ascii + UNIT_ASSERT(!CanBePunycodeHostName("яндекс.xn--p1ai")); // non-ascii + UNIT_ASSERT(!CanBePunycodeHostName("")); UNIT_ASSERT(!CanBePunycodeHostName("http://xn--a.b")); // scheme prefix is not detected here - } -} + } +} diff --git a/library/cpp/unicode/punycode/ut/ya.make b/library/cpp/unicode/punycode/ut/ya.make index 74272102a8..a9dd5f7c34 100644 --- a/library/cpp/unicode/punycode/ut/ya.make +++ b/library/cpp/unicode/punycode/ut/ya.make @@ -1,13 +1,13 @@ UNITTEST_FOR(library/cpp/unicode/punycode) - -OWNER( + +OWNER( g:base g:middle g:upper -) - -SRCS( - punycode_ut.cpp -) - -END() +) + +SRCS( + punycode_ut.cpp +) + +END() diff --git a/library/cpp/unicode/punycode/ya.make b/library/cpp/unicode/punycode/ya.make index 62b41b07b7..4dce86fbf5 100644 --- a/library/cpp/unicode/punycode/ya.make +++ b/library/cpp/unicode/punycode/ya.make @@ -1,17 +1,17 @@ -LIBRARY() - -OWNER( +LIBRARY() + +OWNER( g:base g:middle g:upper -) - -PEERDIR( - contrib/libs/libidn -) - -SRCS( - punycode.cpp -) - -END() +) + +PEERDIR( + contrib/libs/libidn +) + +SRCS( + punycode.cpp +) + +END() |