aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/charset/wide.h
diff options
context:
space:
mode:
authormowgli <mowgli@yandex-team.ru>2022-02-10 16:49:25 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:49:25 +0300
commit56c39b3cf908e7202b1f7551a1653681e8015607 (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/charset/wide.h
parent89afbbe4ca0e02e386dd4df08f7945f190dc1b84 (diff)
downloadydb-56c39b3cf908e7202b1f7551a1653681e8015607.tar.gz
Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/charset/wide.h')
-rw-r--r--library/cpp/charset/wide.h186
1 files changed, 93 insertions, 93 deletions
diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h
index c8f78a9eb4b..32d30e849e9 100644
--- a/library/cpp/charset/wide.h
+++ b/library/cpp/charset/wide.h
@@ -47,61 +47,61 @@ inline void CharToWide(const char* text, size_t len, TCharType* dest, const Code
}
}
-namespace NDetail {
- namespace NBaseOps {
- // Template interface base recoding drivers, do not perform any memory management,
- // do not care about buffer size, so supplied @dst
- // should have enough room for the result (with proper reserve for the worst case)
-
- // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string.
-
+namespace NDetail {
+ namespace NBaseOps {
+ // Template interface base recoding drivers, do not perform any memory management,
+ // do not care about buffer size, so supplied @dst
+ // should have enough room for the result (with proper reserve for the worst case)
+
+ // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string.
+
template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeSingleByteChar(const TStringBuf src, TCharType* dst, const CodePage& cp) {
Y_ASSERT(cp.SingleByteCodepage());
::CharToWide(src.data(), src.size(), dst, cp);
return TBasicStringBuf<TCharType>(dst, src.size());
- }
-
+ }
+
template <typename TCharType>
inline TStringBuf RecodeSingleByteChar(const TBasicStringBuf<TCharType> src, char* dst, const CodePage& cp) {
Y_ASSERT(cp.SingleByteCodepage());
::WideToChar(src.data(), src.size(), dst, cp.CPEnum);
return TStringBuf(dst, src.size());
- }
-
+ }
+
template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeMultiByteChar(const TStringBuf src, TCharType* dst, ECharset encoding) {
Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));
- size_t read = 0;
- size_t written = 0;
+ size_t read = 0;
+ size_t written = 0;
::NICONVPrivate::RecodeToUnicode(encoding, src.data(), dst, src.size(), src.size(), read, written);
return TBasicStringBuf<TCharType>(dst, written);
- }
-
+ }
+
template <typename TCharType>
inline TStringBuf RecodeMultiByteChar(const TBasicStringBuf<TCharType> src, char* dst, ECharset encoding) {
Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));
- size_t read = 0;
- size_t written = 0;
+ size_t read = 0;
+ size_t written = 0;
::NICONVPrivate::RecodeFromUnicode(encoding, src.data(), dst, src.size(), src.size() * 3, read, written);
- return TStringBuf(dst, written);
- }
-
+ return TStringBuf(dst, written);
+ }
+
template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeUtf8(const TStringBuf src, TCharType* dst) {
- size_t len = 0;
+ size_t len = 0;
if (!::UTF8ToWide(src.data(), src.size(), dst, len))
- ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\"");
+ ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\"");
return TBasicStringBuf<TCharType>(dst, len);
- }
-
+ }
+
template <typename TCharType>
inline TStringBuf RecodeUtf8(const TBasicStringBuf<TCharType> src, char* dst) {
- size_t len = 0;
+ size_t len = 0;
::WideToUTF8(src.data(), src.size(), dst, len);
- return TStringBuf(dst, len);
- }
-
+ return TStringBuf(dst, len);
+ }
+
// Select one of re-coding methods from above, based on provided @encoding
template <typename TCharFrom, typename TCharTo>
@@ -115,73 +115,73 @@ namespace NDetail {
}
}
-
- template <typename TCharFrom>
- struct TRecodeTraits;
-
- template <>
- struct TRecodeTraits<char> {
+
+ template <typename TCharFrom>
+ struct TRecodeTraits;
+
+ template <>
+ struct TRecodeTraits<char> {
using TCharTo = wchar16;
using TStringBufTo = TWtringBuf;
using TStringTo = TUtf16String;
enum { ReserveSize = 4 }; // How many TCharFrom characters we should reserve for one TCharTo character in worst case
// Here an unicode character can be converted up to 4 bytes of UTF8
- };
-
- template <>
- struct TRecodeTraits<wchar16> {
+ };
+
+ template <>
+ struct TRecodeTraits<wchar16> {
using TCharTo = char;
using TStringBufTo = TStringBuf;
using TStringTo = TString;
enum { ReserveSize = 2 }; // possible surrogate pairs ?
- };
-
- // Operations with destination buffer where recoded string will be written
- template <typename TResult>
- struct TRecodeResultOps {
+ };
+
+ // Operations with destination buffer where recoded string will be written
+ template <typename TResult>
+ struct TRecodeResultOps {
// default implementation will work with TString and TUtf16String - 99% of usage
using TResultChar = typename TResult::char_type;
-
- static inline size_t Size(const TResult& dst) {
- return dst.size();
- }
-
- static inline TResultChar* Reserve(TResult& dst, size_t len) {
- dst.ReserveAndResize(len);
- return dst.begin();
- }
-
- static inline void Truncate(TResult& dst, size_t len) {
- dst.resize(len);
- }
- };
-
- // Main template interface for recoding in both directions
-
- template <typename TCharFrom, typename TResult>
+
+ static inline size_t Size(const TResult& dst) {
+ return dst.size();
+ }
+
+ static inline TResultChar* Reserve(TResult& dst, size_t len) {
+ dst.ReserveAndResize(len);
+ return dst.begin();
+ }
+
+ static inline void Truncate(TResult& dst, size_t len) {
+ dst.resize(len);
+ }
+ };
+
+ // Main template interface for recoding in both directions
+
+ template <typename TCharFrom, typename TResult>
typename TRecodeTraits<TCharFrom>::TStringBufTo Recode(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) {
using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo;
- // make enough room for re-coded string
- TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize);
- // do re-coding
+ // make enough room for re-coded string
+ TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize);
+ // do re-coding
TBasicStringBuf<TCharTo> res = NBaseOps::Recode(src, dstbuf, encoding);
- // truncate result back to proper size
- TRecodeResultOps<TResult>::Truncate(dst, res.size());
- return res;
- }
-
- // appending version of Recode()
- template <typename TCharFrom, typename TResult>
+ // truncate result back to proper size
+ TRecodeResultOps<TResult>::Truncate(dst, res.size());
+ return res;
+ }
+
+ // appending version of Recode()
+ template <typename TCharFrom, typename TResult>
typename TRecodeTraits<TCharFrom>::TStringBufTo RecodeAppend(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) {
using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo;
- size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst);
- TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize);
+ size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst);
+ TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize);
TBasicStringBuf<TCharTo> appended = NBaseOps::Recode(src, dstbuf + dstOrigSize, encoding);
- size_t dstFinalSize = dstOrigSize + appended.size();
- TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize);
+ size_t dstFinalSize = dstOrigSize + appended.size();
+ TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize);
return TBasicStringBuf<TCharTo>(dstbuf, dstFinalSize);
- }
-
+ }
+
// special implementation for robust utf8 functions
template <typename TResult>
TWtringBuf RecodeUTF8Robust(const TStringBuf src, TResult& dst) {
@@ -197,31 +197,31 @@ namespace NDetail {
return TWtringBuf(dstbuf, written);
}
- template <typename TCharFrom>
+ template <typename TCharFrom>
inline typename TRecodeTraits<TCharFrom>::TStringTo Recode(const TBasicStringBuf<TCharFrom> src, ECharset encoding) {
- typename TRecodeTraits<TCharFrom>::TStringTo res;
- Recode<TCharFrom>(src, res, encoding);
- return res;
- }
+ typename TRecodeTraits<TCharFrom>::TStringTo res;
+ Recode<TCharFrom>(src, res, encoding);
+ return res;
+ }
}
-
-// Write result into @dst. Return string-buffer pointing to re-coded content of @dst.
-
+
+// Write result into @dst. Return string-buffer pointing to re-coded content of @dst.
+
template <bool robust>
inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) {
if (robust && CODES_UTF8 == encoding)
return ::NDetail::RecodeUTF8Robust(src, dst);
return ::NDetail::Recode<char>(src, dst, encoding);
-}
-
+}
+
inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) {
return ::NDetail::Recode<char>(src, dst, encoding);
}
inline TStringBuf WideToChar(const TWtringBuf src, TString& dst, ECharset encoding) {
return ::NDetail::Recode<wchar16>(src, dst, encoding);
-}
-
+}
+
//! calls either to @c WideToUTF8 or @c WideToChar depending on the encoding type
inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) {
if (NCodepagePrivate::NativeCodepage(enc)) {
@@ -301,6 +301,6 @@ inline TUtf16String CharToWide(const TStringBuf s, const CodePage& cp) {
return CharToWide(s.data(), s.size(), cp);
}
-// true if @text can be fully encoded to specified @encoding,
-// with possibility to recover exact original text after decoding
-bool CanBeEncoded(TWtringBuf text, ECharset encoding);
+// true if @text can be fully encoded to specified @encoding,
+// with possibility to recover exact original text after decoding
+bool CanBeEncoded(TWtringBuf text, ECharset encoding);