diff options
author | alzobnin <alzobnin@yandex-team.ru> | 2022-02-10 16:46:50 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:50 +0300 |
commit | c9317148cc3e9f1b0bc0ce95172f47e099f2c554 (patch) | |
tree | 1e426d905ba97d8c281c5cc53389faaced3832c7 /util/charset/utf8.cpp | |
parent | 6170310e8721e225f64ddabf7a7358253d7a1249 (diff) | |
download | ydb-c9317148cc3e9f1b0bc0ce95172f47e099f2c554.tar.gz |
Restoring authorship annotation for <alzobnin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'util/charset/utf8.cpp')
-rw-r--r-- | util/charset/utf8.cpp | 112 |
1 files changed, 56 insertions, 56 deletions
diff --git a/util/charset/utf8.cpp b/util/charset/utf8.cpp index efe3a52f61..0a1453274c 100644 --- a/util/charset/utf8.cpp +++ b/util/charset/utf8.cpp @@ -1,6 +1,6 @@ -#include "unidata.h" +#include "unidata.h" #include "utf8.h" - + namespace { enum class ECaseConversion { ToUpper, @@ -82,72 +82,72 @@ namespace { } } // namespace -extern const wchar32 BROKEN_RUNE = 0xFFFD; - -static const char* SkipUTF8Chars(const char* begin, const char* end, size_t numChars) { - const unsigned char* uEnd = reinterpret_cast<const unsigned char*>(end); - while (begin != end && numChars > 0) { - const unsigned char* uBegin = reinterpret_cast<const unsigned char*>(begin); - size_t runeLen; - if (GetUTF8CharLen(runeLen, uBegin, uEnd) != RECODE_OK) { - ythrow yexception() << "invalid UTF-8 char"; - } - begin += runeLen; +extern const wchar32 BROKEN_RUNE = 0xFFFD; + +static const char* SkipUTF8Chars(const char* begin, const char* end, size_t numChars) { + const unsigned char* uEnd = reinterpret_cast<const unsigned char*>(end); + while (begin != end && numChars > 0) { + const unsigned char* uBegin = reinterpret_cast<const unsigned char*>(begin); + size_t runeLen; + if (GetUTF8CharLen(runeLen, uBegin, uEnd) != RECODE_OK) { + ythrow yexception() << "invalid UTF-8 char"; + } + begin += runeLen; Y_ASSERT(begin <= end); - --numChars; - } - return begin; -} - + --numChars; + } + return begin; +} + TStringBuf SubstrUTF8(const TStringBuf str, size_t pos, size_t len) { - const char* start = SkipUTF8Chars(str.begin(), str.end(), pos); - const char* end = SkipUTF8Chars(start, str.end(), len); - return TStringBuf(start, end - start); -} - -EUTF8Detect UTF8Detect(const char* s, size_t len) { - const unsigned char* s0 = (const unsigned char*)s; - const unsigned char* send = s0 + len; - wchar32 rune; - size_t rune_len; - EUTF8Detect res = ASCII; - - while (s0 < send) { - RECODE_RESULT rr = SafeReadUTF8Char(rune, rune_len, s0, send); - - if (rr != RECODE_OK) { - return NotUTF8; - } - - if (rune_len > 1) { - res = UTF8; - } - - s0 += rune_len; - } - - return res; -} - + const char* start = SkipUTF8Chars(str.begin(), str.end(), pos); + const char* end = SkipUTF8Chars(start, str.end(), len); + return TStringBuf(start, end - start); +} + +EUTF8Detect UTF8Detect(const char* s, size_t len) { + const unsigned char* s0 = (const unsigned char*)s; + const unsigned char* send = s0 + len; + wchar32 rune; + size_t rune_len; + EUTF8Detect res = ASCII; + + while (s0 < send) { + RECODE_RESULT rr = SafeReadUTF8Char(rune, rune_len, s0, send); + + if (rr != RECODE_OK) { + return NotUTF8; + } + + if (rune_len > 1) { + res = UTF8; + } + + s0 += rune_len; + } + + return res; +} + bool ToLowerUTF8Impl(const char* beg, size_t n, TString& newString) { return ConvertCaseUTF8Impl(ECaseConversion::ToLower, beg, n, newString); -} - +} + TString ToLowerUTF8(const TString& s) { TString newString; bool changed = ToLowerUTF8Impl(s.data(), s.size(), newString); - return changed ? newString : s; -} - + return changed ? newString : s; +} + TString ToLowerUTF8(TStringBuf s) { TString newString; bool changed = ToLowerUTF8Impl(s.data(), s.size(), newString); return changed ? newString : TString(s.data(), s.size()); -} - +} + TString ToLowerUTF8(const char* s) { - return ToLowerUTF8(TStringBuf(s)); -} + return ToLowerUTF8(TStringBuf(s)); +} bool ToUpperUTF8Impl(const char* beg, size_t n, TString& newString) { return ConvertCaseUTF8Impl(ECaseConversion::ToUpper, beg, n, newString); |