diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
commit | 8cbc307de0221f84c80c42dcbe07d40727537e2c (patch) | |
tree | 625d5a673015d1df891e051033e9fcde5c7be4e5 /util/charset/wide.h | |
parent | 30d1ef3941e0dc835be7609de5ebee66958f215a (diff) | |
download | ydb-8cbc307de0221f84c80c42dcbe07d40727537e2c.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'util/charset/wide.h')
-rw-r--r-- | util/charset/wide.h | 282 |
1 files changed, 141 insertions, 141 deletions
diff --git a/util/charset/wide.h b/util/charset/wide.h index 04e6928aab..81e7282f90 100644 --- a/util/charset/wide.h +++ b/util/charset/wide.h @@ -5,12 +5,12 @@ #include "utf8.h" #include "wide_specific.h" -#include <util/generic/algorithm.h> +#include <util/generic/algorithm.h> #include <util/generic/string.h> #include <util/generic/yexception.h> #include <util/memory/tempbuf.h> -#include <util/system/compiler.h> -#include <util/system/cpu_id.h> +#include <util/system/compiler.h> +#include <util/system/cpu_id.h> #include <util/system/yassert.h> #include <cstring> @@ -79,7 +79,7 @@ inline const wchar32* SkipSymbol(const wchar32* begin, const wchar32* end) noexc } inline wchar32 ReadSymbol(const wchar16* begin, const wchar16* end) noexcept { - Y_ASSERT(begin < end); + Y_ASSERT(begin < end); if (IsW16SurrogateLead(*begin)) { if (begin + 1 < end && IsW16SurrogateTail(*(begin + 1))) return ::NDetail::ReadSurrogatePair(begin); @@ -99,10 +99,10 @@ inline wchar32 ReadSymbol(const wchar32* begin, const wchar32* end) noexcept { //! presuming input data is either big enought of null terminated inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin) noexcept { - Y_ASSERT(*begin); + Y_ASSERT(*begin); if (IsW16SurrogateLead(begin[0])) { if (IsW16SurrogateTail(begin[1])) { - Y_ASSERT(begin[1] != 0); + Y_ASSERT(begin[1] != 0); const wchar32 c = ::NDetail::ReadSurrogatePair(begin); begin += 2; return c; @@ -123,7 +123,7 @@ inline wchar32 ReadSymbolAndAdvance(const wchar32*& begin) noexcept { } inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin, const wchar16* end) noexcept { - Y_ASSERT(begin < end); + Y_ASSERT(begin < end); if (IsW16SurrogateLead(begin[0])) { if (begin + 1 != end && IsW16SurrogateTail(begin[1])) { const wchar32 c = ::NDetail::ReadSurrogatePair(begin); @@ -140,7 +140,7 @@ inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin, const wchar16* end) n } inline wchar32 ReadSymbolAndAdvance(const wchar32*& begin, const wchar32* end) noexcept { - Y_ASSERT(begin < end); + Y_ASSERT(begin < end); return *(begin++); } @@ -165,7 +165,7 @@ inline size_t WriteSymbol(wchar32 s, T& dest) noexcept { } inline bool WriteSymbol(wchar32 s, wchar16*& dest, const wchar16* destEnd) noexcept { - Y_ASSERT(dest < destEnd); + Y_ASSERT(dest < destEnd); if (s > 0xFFFF) { if (s >= NUnicode::UnicodeInstancesLimit()) { @@ -190,7 +190,7 @@ inline size_t WriteSymbol(wchar32 s, wchar32*& dest) noexcept { } inline bool WriteSymbol(wchar32 s, wchar32*& dest, const wchar32* destEnd) noexcept { - Y_ASSERT(dest < destEnd); + Y_ASSERT(dest < destEnd); *(dest++) = s; @@ -200,7 +200,7 @@ inline bool WriteSymbol(wchar32 s, wchar32*& dest, const wchar32* destEnd) noexc template <class T> inline void ::NDetail::WriteSurrogatePair(wchar32 s, T& dest) noexcept { const wchar32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10); - Y_ASSERT(s > 0xFFFF && s < ::NUnicode::UnicodeInstancesLimit()); + Y_ASSERT(s > 0xFFFF && s < ::NUnicode::UnicodeInstancesLimit()); wchar16 lead = LEAD_OFFSET + (static_cast<wchar16>(s >> 10)); wchar16 tail = 0xDC00 + static_cast<wchar16>(s & 0x3FF); @@ -327,7 +327,7 @@ inline TUtf16String UTF8ToWide(const char* text, size_t len) { size_t pos = UTF8ToWideImpl<robust>(text, len, w.begin(), written); if (pos != len) ythrow yexception() << "failed to decode UTF-8 string at pos " << pos << ::NDetail::InStringMsg(text, len); - Y_ASSERT(w.size() >= written); + Y_ASSERT(w.size() >= written); w.remove(written); return w; } @@ -400,7 +400,7 @@ inline void WideToUTF8(const TCharType* text, size_t len, char* dest, size_t& wr size_t runeLen; for (const TCharType* cur = text; cur != last;) { WriteUTF8Char(ReadSymbolAndAdvance(cur, last), runeLen, p); - Y_ASSERT(runeLen <= 4); + Y_ASSERT(runeLen <= 4); p += runeLen; } written = p - reinterpret_cast<unsigned char*>(dest); @@ -414,7 +414,7 @@ inline TStringBuf WideToUTF8(const TWtringBuf src, TString& dst) { dst.ReserveAndResize(WideToUTF8BufferSize(src.size())); size_t written = 0; WideToUTF8(src.data(), src.size(), dst.begin(), written); - Y_ASSERT(dst.size() >= written); + Y_ASSERT(dst.size() >= written); dst.remove(written); return dst; } @@ -423,7 +423,7 @@ inline TString WideToUTF8(const wchar16* text, size_t len) { TString s = TString::Uninitialized(WideToUTF8BufferSize(len)); size_t written = 0; WideToUTF8(text, len, s.begin(), written); - Y_ASSERT(s.size() >= written); + Y_ASSERT(s.size() >= written); s.remove(written); return s; } @@ -469,49 +469,49 @@ namespace NDetail { return !(reinterpret_cast<TMachineWord>(pointer) & kMachineWordAlignmentMask); } - template <typename T> + template <typename T> inline T* AlignToMachineWord(T* pointer) { return reinterpret_cast<T*>(reinterpret_cast<TMachineWord>(pointer) & ~kMachineWordAlignmentMask); } - template <size_t size, typename CharacterType> + template <size_t size, typename CharacterType> struct NonASCIIMask; - template <> - struct - NonASCIIMask<4, wchar16> { - static constexpr ui32 Value() { - return 0xFF80FF80U; - } + template <> + struct + NonASCIIMask<4, wchar16> { + static constexpr ui32 Value() { + return 0xFF80FF80U; + } }; - template <> - struct - NonASCIIMask<4, char> { - static constexpr ui32 Value() { - return 0x80808080U; - } + template <> + struct + NonASCIIMask<4, char> { + static constexpr ui32 Value() { + return 0x80808080U; + } }; - template <> - struct - NonASCIIMask<8, wchar16> { - static constexpr ui64 Value() { - return 0xFF80FF80FF80FF80ULL; - } + template <> + struct + NonASCIIMask<8, wchar16> { + static constexpr ui64 Value() { + return 0xFF80FF80FF80FF80ULL; + } }; - template <> - struct - NonASCIIMask<8, char> { - static constexpr ui64 Value() { - return 0x8080808080808080ULL; - } + template <> + struct + NonASCIIMask<8, char> { + static constexpr ui64 Value() { + return 0x8080808080808080ULL; + } }; template <typename TChar> inline bool DoIsStringASCIISlow(const TChar* first, const TChar* last) { - using TUnsignedChar = std::make_unsigned_t<TChar>; + using TUnsignedChar = std::make_unsigned_t<TChar>; Y_ASSERT(first <= last); for (; first != last; ++first) { if (static_cast<TUnsignedChar>(*first) > 0x7F) { @@ -573,7 +573,7 @@ namespace NDetail { int asciiMask = _mm_movemask_epi8(chunk); if (asciiMask) { return false; - } + } first += 16; } @@ -630,7 +630,7 @@ inline void Copy(const TChar1* first, size_t len, TChar2* result) { //! and the family of template member functions: append, assign, insert, replace. template <typename TStringType, typename TChar> inline TStringType CopyTo(const TChar* first, const TChar* last) { - Y_ASSERT(first <= last); + Y_ASSERT(first <= last); TStringType str = TStringType::Uninitialized(last - first); Copy(first, last, str.begin()); return str; @@ -644,12 +644,12 @@ inline TStringType CopyTo(const TChar* s, size_t n) { } inline TString WideToASCII(const TWtringBuf w) { - Y_ASSERT(IsStringASCII(w.begin(), w.end())); + Y_ASSERT(IsStringASCII(w.begin(), w.end())); return CopyTo<TString>(w.begin(), w.end()); } inline TUtf16String ASCIIToWide(const TStringBuf s) { - Y_ASSERT(IsStringASCII(s.begin(), s.end())); + Y_ASSERT(IsStringASCII(s.begin(), s.end())); return CopyTo<TUtf16String>(s.begin(), s.end()); } @@ -663,7 +663,7 @@ inline bool IsSpace(const wchar16* s, size_t n) { if (n == 0) return false; - Y_ASSERT(s); + Y_ASSERT(s); const wchar16* const e = s + n; for (const wchar16* p = s; p != e; ++p) { @@ -674,7 +674,7 @@ inline bool IsSpace(const wchar16* s, size_t n) { } //! returns @c true if string contains whitespace characters only -inline bool IsSpace(const TWtringBuf s) { +inline bool IsSpace(const TWtringBuf s) { return IsSpace(s.data(), s.length()); } @@ -684,61 +684,61 @@ void Collapse(TUtf16String& w); //! @return new length size_t Collapse(wchar16* s, size_t n); -//! Removes leading whitespace characters -TWtringBuf StripLeft(const TWtringBuf text) noexcept Y_WARN_UNUSED_RESULT; -void StripLeft(TUtf16String& text); - -//! Removes trailing whitespace characters -TWtringBuf StripRight(const TWtringBuf text) noexcept Y_WARN_UNUSED_RESULT; -void StripRight(TUtf16String& text); - -//! Removes leading and trailing whitespace characters -TWtringBuf Strip(const TWtringBuf text) noexcept Y_WARN_UNUSED_RESULT; -void Strip(TUtf16String& text); - -/* Check if given word is lowercase/uppercase. Will return false if string contains any - * non-alphabetical symbols. It is expected that `text` is a correct UTF-16 string. - * - * For example `IsLowerWord("hello")` will return `true`, when `IsLowerWord("hello there")` will - * return false because of the space in the middle of the string. Empty string is also considered - * lowercase. - */ -bool IsLowerWord(const TWtringBuf text) noexcept; -bool IsUpperWord(const TWtringBuf text) noexcept; - -/* Will check if given word starts with capital letter and the rest of the word is lowercase. Will - * return `false` for empty string. See also `IsLowerWord`. - */ -bool IsTitleWord(const TWtringBuf text) noexcept; - -/* Check if given string is lowercase/uppercase. Will return `true` if all alphabetic symbols are - * in proper case, all other symbols are ignored. It is expected that `text` is a correct UTF-16 - * string. - * - * For example `IsLowerWord("hello")` will return `true` and `IsLowerWord("hello there")` will - * also return true because. Empty string is also considered lowercase. - * - * NOTE: for any case where `IsLowerWord` returns `true` `IsLower` will also return `true`. - */ -bool IsLower(const TWtringBuf text) noexcept; -bool IsUpper(const TWtringBuf text) noexcept; - -/* Lowercase/uppercase given string inplace. Any alphabetic symbol will be converted to a proper - * case, the rest of the symbols will be kept the same. It is expected that `text` is a correct - * UTF-16 string. - * - * For example `ToLower("heLLo")` will return `"hello"`. - * - * @param text String to modify - * @param pos Position of the first character to modify - * @param count Length of the substring - * @returns `true` if `text` was changed - * - * NOTE: `pos` and `count` are measured in `wchar16`, not in codepoints. - */ -bool ToLower(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); -bool ToUpper(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); - +//! Removes leading whitespace characters +TWtringBuf StripLeft(const TWtringBuf text) noexcept Y_WARN_UNUSED_RESULT; +void StripLeft(TUtf16String& text); + +//! Removes trailing whitespace characters +TWtringBuf StripRight(const TWtringBuf text) noexcept Y_WARN_UNUSED_RESULT; +void StripRight(TUtf16String& text); + +//! Removes leading and trailing whitespace characters +TWtringBuf Strip(const TWtringBuf text) noexcept Y_WARN_UNUSED_RESULT; +void Strip(TUtf16String& text); + +/* Check if given word is lowercase/uppercase. Will return false if string contains any + * non-alphabetical symbols. It is expected that `text` is a correct UTF-16 string. + * + * For example `IsLowerWord("hello")` will return `true`, when `IsLowerWord("hello there")` will + * return false because of the space in the middle of the string. Empty string is also considered + * lowercase. + */ +bool IsLowerWord(const TWtringBuf text) noexcept; +bool IsUpperWord(const TWtringBuf text) noexcept; + +/* Will check if given word starts with capital letter and the rest of the word is lowercase. Will + * return `false` for empty string. See also `IsLowerWord`. + */ +bool IsTitleWord(const TWtringBuf text) noexcept; + +/* Check if given string is lowercase/uppercase. Will return `true` if all alphabetic symbols are + * in proper case, all other symbols are ignored. It is expected that `text` is a correct UTF-16 + * string. + * + * For example `IsLowerWord("hello")` will return `true` and `IsLowerWord("hello there")` will + * also return true because. Empty string is also considered lowercase. + * + * NOTE: for any case where `IsLowerWord` returns `true` `IsLower` will also return `true`. + */ +bool IsLower(const TWtringBuf text) noexcept; +bool IsUpper(const TWtringBuf text) noexcept; + +/* Lowercase/uppercase given string inplace. Any alphabetic symbol will be converted to a proper + * case, the rest of the symbols will be kept the same. It is expected that `text` is a correct + * UTF-16 string. + * + * For example `ToLower("heLLo")` will return `"hello"`. + * + * @param text String to modify + * @param pos Position of the first character to modify + * @param count Length of the substring + * @returns `true` if `text` was changed + * + * NOTE: `pos` and `count` are measured in `wchar16`, not in codepoints. + */ +bool ToLower(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); +bool ToUpper(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); + /* Lowercase/uppercase given string inplace. Any alphabetic symbol will be converted to a proper * case, the rest of the symbols will be kept the same. It is expected that `text` is a correct * UTF-32 string. @@ -755,54 +755,54 @@ bool ToUpper(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::np bool ToLower(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); bool ToUpper(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); -/* Titlecase first symbol and lowercase the rest, see `ToLower` for more details. - */ -bool ToTitle(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); - +/* Titlecase first symbol and lowercase the rest, see `ToLower` for more details. + */ +bool ToTitle(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); + /* Titlecase first symbol and lowercase the rest, see `ToLower` for more details. */ bool ToTitle(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); -/* @param text Pointer to the string to modify - * @param length Length of the string to modify - * @param out Pointer to the character array to write to - * - * NOTE: [text, text+length) and [out, out+length) should not interleave. - * - * TODO(yazevnul): replace these functions with `bool(const TWtringBuf, const TArrayRef<wchar16>)` - * overload. - */ -bool ToLower(const wchar16* text, size_t length, wchar16* out) noexcept; -bool ToUpper(const wchar16* text, size_t length, wchar16* out) noexcept; -bool ToTitle(const wchar16* text, size_t length, wchar16* out) noexcept; - +/* @param text Pointer to the string to modify + * @param length Length of the string to modify + * @param out Pointer to the character array to write to + * + * NOTE: [text, text+length) and [out, out+length) should not interleave. + * + * TODO(yazevnul): replace these functions with `bool(const TWtringBuf, const TArrayRef<wchar16>)` + * overload. + */ +bool ToLower(const wchar16* text, size_t length, wchar16* out) noexcept; +bool ToUpper(const wchar16* text, size_t length, wchar16* out) noexcept; +bool ToTitle(const wchar16* text, size_t length, wchar16* out) noexcept; + bool ToLower(const wchar32* text, size_t length, wchar32* out) noexcept; bool ToUpper(const wchar32* text, size_t length, wchar32* out) noexcept; bool ToTitle(const wchar32* text, size_t length, wchar32* out) noexcept; -/* @param text Pointer to the string to modify - * @param length Length of the string to modify - * - * TODO(yazevnul): replace these functions with `bool(const TArrayRef<wchar16>)` overload. - */ -bool ToLower(wchar16* text, size_t length) noexcept; -bool ToUpper(wchar16* text, size_t length) noexcept; -bool ToTitle(wchar16* text, size_t length) noexcept; - +/* @param text Pointer to the string to modify + * @param length Length of the string to modify + * + * TODO(yazevnul): replace these functions with `bool(const TArrayRef<wchar16>)` overload. + */ +bool ToLower(wchar16* text, size_t length) noexcept; +bool ToUpper(wchar16* text, size_t length) noexcept; +bool ToTitle(wchar16* text, size_t length) noexcept; + bool ToLower(wchar32* text, size_t length) noexcept; bool ToUpper(wchar32* text, size_t length) noexcept; bool ToTitle(wchar32* text, size_t length) noexcept; -/* Convenience wrappers for `ToLower`, `ToUpper` and `ToTitle`. - */ -TUtf16String ToLowerRet(TUtf16String text, size_t pos = 0, size_t count = TUtf16String::npos) Y_WARN_UNUSED_RESULT; -TUtf16String ToUpperRet(TUtf16String text, size_t pos = 0, size_t count = TUtf16String::npos) Y_WARN_UNUSED_RESULT; -TUtf16String ToTitleRet(TUtf16String text, size_t pos = 0, size_t count = TUtf16String::npos) Y_WARN_UNUSED_RESULT; - -TUtf16String ToLowerRet(const TWtringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; -TUtf16String ToUpperRet(const TWtringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; -TUtf16String ToTitleRet(const TWtringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; - +/* Convenience wrappers for `ToLower`, `ToUpper` and `ToTitle`. + */ +TUtf16String ToLowerRet(TUtf16String text, size_t pos = 0, size_t count = TUtf16String::npos) Y_WARN_UNUSED_RESULT; +TUtf16String ToUpperRet(TUtf16String text, size_t pos = 0, size_t count = TUtf16String::npos) Y_WARN_UNUSED_RESULT; +TUtf16String ToTitleRet(TUtf16String text, size_t pos = 0, size_t count = TUtf16String::npos) Y_WARN_UNUSED_RESULT; + +TUtf16String ToLowerRet(const TWtringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; +TUtf16String ToUpperRet(const TWtringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; +TUtf16String ToTitleRet(const TWtringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; + TUtf32String ToLowerRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; TUtf32String ToUpperRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; TUtf32String ToTitleRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; @@ -823,7 +823,7 @@ inline size_t CountWideChars(const wchar16* b, const wchar16* e) { return count; } -inline size_t CountWideChars(const TWtringBuf str) { +inline size_t CountWideChars(const TWtringBuf str) { return CountWideChars(str.begin(), str.end()); } @@ -838,6 +838,6 @@ inline bool IsValidUTF16(const wchar16* b, const wchar16* e) { return true; } -inline bool IsValidUTF16(const TWtringBuf str) { +inline bool IsValidUTF16(const TWtringBuf str) { return IsValidUTF16(str.begin(), str.end()); } |