diff options
author | Vlad Yaroslavlev <vladon@vladon.com> | 2022-02-10 16:46:23 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:23 +0300 |
commit | 706b83ed7de5a473436620367af31fc0ceecde07 (patch) | |
tree | 103305d30dec77e8f6367753367f59b3cd68f9f1 /library/cpp/charset | |
parent | 918e8a1574070d0ec733f0b76cfad8f8892ad2e5 (diff) | |
download | ydb-706b83ed7de5a473436620367af31fc0ceecde07.tar.gz |
Restoring authorship annotation for Vlad Yaroslavlev <vladon@vladon.com>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/charset')
-rw-r--r-- | library/cpp/charset/ci_string.cpp | 52 | ||||
-rw-r--r-- | library/cpp/charset/ci_string.h | 528 | ||||
-rw-r--r-- | library/cpp/charset/ci_string_ut.cpp | 42 | ||||
-rw-r--r-- | library/cpp/charset/codepage.cpp | 42 | ||||
-rw-r--r-- | library/cpp/charset/codepage.h | 8 | ||||
-rw-r--r-- | library/cpp/charset/codepage_ut.cpp | 2 | ||||
-rw-r--r-- | library/cpp/charset/iconv_ut.cpp | 8 | ||||
-rw-r--r-- | library/cpp/charset/recyr.hh | 16 | ||||
-rw-r--r-- | library/cpp/charset/recyr_int.hh | 2 | ||||
-rw-r--r-- | library/cpp/charset/wide.h | 26 | ||||
-rw-r--r-- | library/cpp/charset/wide_ut.cpp | 62 | ||||
-rw-r--r-- | library/cpp/charset/ya.make | 2 |
12 files changed, 395 insertions, 395 deletions
diff --git a/library/cpp/charset/ci_string.cpp b/library/cpp/charset/ci_string.cpp index 6097e40131..9efa3e21cd 100644 --- a/library/cpp/charset/ci_string.cpp +++ b/library/cpp/charset/ci_string.cpp @@ -1,41 +1,41 @@ -#include "ci_string.h" - -int TCiString::compare(const TCiString& s1, const TCiString& s2, const CodePage& cp) { +#include "ci_string.h" + +int TCiString::compare(const TCiString& s1, const TCiString& s2, const CodePage& cp) { return cp.stricmp(s1.data(), s2.data()); -} - -int TCiString::compare(const char* p, const TCiString& s2, const CodePage& cp) { +} + +int TCiString::compare(const char* p, const TCiString& s2, const CodePage& cp) { return cp.stricmp(p, s2.data()); -} - -int TCiString::compare(const TCiString& s1, const char* p, const CodePage& cp) { +} + +int TCiString::compare(const TCiString& s1, const char* p, const CodePage& cp) { return cp.stricmp(s1.data(), p); -} - +} + int TCiString::compare(const TStringBuf& p1, const TStringBuf& p2, const CodePage& cp) { int rv = cp.strnicmp(p1.data(), p2.data(), Min(p1.size(), p2.size())); return rv ? rv : p1.size() < p2.size() ? -1 : p1.size() == p2.size() ? 0 : 1; -} - +} + bool TCiString::is_prefix(const TStringBuf& what, const TStringBuf& of, const CodePage& cp) { size_t len = what.size(); return len <= of.size() && cp.strnicmp(what.data(), of.data(), len) == 0; -} - +} + bool TCiString::is_suffix(const TStringBuf& what, const TStringBuf& of, const CodePage& cp) { size_t len = what.size(); size_t slen = of.size(); return (len <= slen) && (0 == cp.strnicmp(what.data(), of.data() + slen - len, len)); -} - -size_t TCiString::hashVal(const char* s, size_t len, const CodePage& cp) { - size_t h = len; - for (; /* (*s) && */ len--; ++s) - h = 5 * h + cp.ToLower(*s); - return h; -} - -template <> +} + +size_t TCiString::hashVal(const char* s, size_t len, const CodePage& cp) { + size_t h = len; + for (; /* (*s) && */ len--; ++s) + h = 5 * h + cp.ToLower(*s); + return h; +} + +template <> void Out<TCiString>(IOutputStream& o, const TCiString& p) { o.Write(p.data(), p.size()); -} +} diff --git a/library/cpp/charset/ci_string.h b/library/cpp/charset/ci_string.h index edf24c1b6f..aaa1e5627c 100644 --- a/library/cpp/charset/ci_string.h +++ b/library/cpp/charset/ci_string.h @@ -1,280 +1,280 @@ -#pragma once - -#include "codepage.h" - -#include <util/generic/string.h> -#include <util/str_stl.h> - -// Same as TString but uses CASE INSENSITIVE comparator and hash. Use with care. -class TCiString: public TString { -public: - TCiString() { - } - - TCiString(const TString& s) - : TString(s) - { - } - - TCiString(const TString& s, size_t pos, size_t n) - : TString(s, pos, n) - { - } - - TCiString(const char* pc) - : TString(pc) - { - } - - TCiString(const char* pc, size_t n) - : TString(pc, n) - { - } - - TCiString(const char* pc, size_t pos, size_t n) - : TString(pc, pos, n) - { - } - - TCiString(size_t n, char c) - : TString(n, c) - { - } - - TCiString(const TUninitialized& uninitialized) - : TString(uninitialized) - { - } - - TCiString(const char* b, const char* e) - : TString(b, e) - { - } - +#pragma once + +#include "codepage.h" + +#include <util/generic/string.h> +#include <util/str_stl.h> + +// Same as TString but uses CASE INSENSITIVE comparator and hash. Use with care. +class TCiString: public TString { +public: + TCiString() { + } + + TCiString(const TString& s) + : TString(s) + { + } + + TCiString(const TString& s, size_t pos, size_t n) + : TString(s, pos, n) + { + } + + TCiString(const char* pc) + : TString(pc) + { + } + + TCiString(const char* pc, size_t n) + : TString(pc, n) + { + } + + TCiString(const char* pc, size_t pos, size_t n) + : TString(pc, pos, n) + { + } + + TCiString(size_t n, char c) + : TString(n, c) + { + } + + TCiString(const TUninitialized& uninitialized) + : TString(uninitialized) + { + } + + TCiString(const char* b, const char* e) + : TString(b, e) + { + } + explicit TCiString(const TStringBuf& s) - : TString(s) - { - } - - // ~~~ Comparison ~~~ : FAMILY0(int, compare) - static int compare(const TCiString& s1, const TCiString& s2, const CodePage& cp = csYandex); - static int compare(const char* p, const TCiString& s2, const CodePage& cp = csYandex); - static int compare(const TCiString& s1, const char* p, const CodePage& cp = csYandex); + : TString(s) + { + } + + // ~~~ Comparison ~~~ : FAMILY0(int, compare) + static int compare(const TCiString& s1, const TCiString& s2, const CodePage& cp = csYandex); + static int compare(const char* p, const TCiString& s2, const CodePage& cp = csYandex); + static int compare(const TCiString& s1, const char* p, const CodePage& cp = csYandex); static int compare(const TStringBuf& p1, const TStringBuf& p2, const CodePage& cp = csYandex); - + // TODO: implement properly in TString via enum ECaseSensitivity static bool is_prefix(const TStringBuf& what, const TStringBuf& of, const CodePage& cp = csYandex); static bool is_suffix(const TStringBuf& what, const TStringBuf& of, const CodePage& cp = csYandex); - + bool StartsWith(const TStringBuf& s, const CodePage& cp = csYandex) const { - return is_prefix(s, *this, cp); - } - + return is_prefix(s, *this, cp); + } + bool EndsWith(const TStringBuf& s, const CodePage& cp = csYandex) const { - return is_suffix(s, *this, cp); - } - - friend bool operator==(const TCiString& s1, const TCiString& s2) { - return TCiString::compare(s1, s2) == 0; - } - - friend bool operator==(const TCiString& s, const char* pc) { - return TCiString::compare(s, pc) == 0; - } - - friend bool operator==(const char* pc, const TCiString& s) { - return TCiString::compare(pc, s) == 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator==(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { - return TCiString::compare(s, pc) == 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator==(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { - return TCiString::compare(pc, s) == 0; - } - - friend bool operator!=(const TCiString& s1, const TCiString& s2) { - return TCiString::compare(s1, s2) != 0; - } - - friend bool operator!=(const TCiString& s, const char* pc) { - return TCiString::compare(s, pc) != 0; - } - - friend bool operator!=(const char* pc, const TCiString& s) { - return TCiString::compare(pc, s) != 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator!=(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { - return TCiString::compare(s, pc) != 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator!=(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { - return TCiString::compare(pc, s) != 0; - } - - friend bool operator<(const TCiString& s1, const TCiString& s2) { - return TCiString::compare(s1, s2) < 0; - } - - friend bool operator<(const TCiString& s, const char* pc) { - return TCiString::compare(s, pc) < 0; - } - - friend bool operator<(const char* pc, const TCiString& s) { - return TCiString::compare(pc, s) < 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator<(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { - return TCiString::compare(s, pc) < 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator<(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { - return TCiString::compare(pc, s) < 0; - } - - friend bool operator<=(const TCiString& s1, const TCiString& s2) { - return TCiString::compare(s1, s2) <= 0; - } - - friend bool operator<=(const TCiString& s, const char* pc) { - return TCiString::compare(s, pc) <= 0; - } - - friend bool operator<=(const char* pc, const TCiString& s) { - return TCiString::compare(pc, s) <= 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator<=(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { - return TCiString::compare(s, pc) <= 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator<=(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { - return TCiString::compare(pc, s) <= 0; - } - - friend bool operator>(const TCiString& s1, const TCiString& s2) { - return TCiString::compare(s1, s2) > 0; - } - - friend bool operator>(const TCiString& s, const char* pc) { - return TCiString::compare(s, pc) > 0; - } - - friend bool operator>(const char* pc, const TCiString& s) { - return TCiString::compare(pc, s) > 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator>(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) noexcept { - return TCiString::compare(s, pc) > 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator>(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) noexcept { - return TCiString::compare(pc, s) > 0; - } - - friend bool operator>=(const TCiString& s1, const TCiString& s2) { - return TCiString::compare(s1, s2) >= 0; - } - - friend bool operator>=(const TCiString& s, const char* pc) { - return TCiString::compare(s, pc) >= 0; - } - - friend bool operator>=(const char* pc, const TCiString& s) { - return TCiString::compare(pc, s) >= 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator>=(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { - return TCiString::compare(s, pc) >= 0; - } - - template <typename TDerived2, typename TTraits2> - friend bool operator>=(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { - return TCiString::compare(pc, s) >= 0; - } - - static size_t hashVal(const char* pc, size_t len, const CodePage& cp = csYandex); - - size_t hash() const { + return is_suffix(s, *this, cp); + } + + friend bool operator==(const TCiString& s1, const TCiString& s2) { + return TCiString::compare(s1, s2) == 0; + } + + friend bool operator==(const TCiString& s, const char* pc) { + return TCiString::compare(s, pc) == 0; + } + + friend bool operator==(const char* pc, const TCiString& s) { + return TCiString::compare(pc, s) == 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator==(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { + return TCiString::compare(s, pc) == 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator==(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { + return TCiString::compare(pc, s) == 0; + } + + friend bool operator!=(const TCiString& s1, const TCiString& s2) { + return TCiString::compare(s1, s2) != 0; + } + + friend bool operator!=(const TCiString& s, const char* pc) { + return TCiString::compare(s, pc) != 0; + } + + friend bool operator!=(const char* pc, const TCiString& s) { + return TCiString::compare(pc, s) != 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator!=(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { + return TCiString::compare(s, pc) != 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator!=(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { + return TCiString::compare(pc, s) != 0; + } + + friend bool operator<(const TCiString& s1, const TCiString& s2) { + return TCiString::compare(s1, s2) < 0; + } + + friend bool operator<(const TCiString& s, const char* pc) { + return TCiString::compare(s, pc) < 0; + } + + friend bool operator<(const char* pc, const TCiString& s) { + return TCiString::compare(pc, s) < 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator<(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { + return TCiString::compare(s, pc) < 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator<(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { + return TCiString::compare(pc, s) < 0; + } + + friend bool operator<=(const TCiString& s1, const TCiString& s2) { + return TCiString::compare(s1, s2) <= 0; + } + + friend bool operator<=(const TCiString& s, const char* pc) { + return TCiString::compare(s, pc) <= 0; + } + + friend bool operator<=(const char* pc, const TCiString& s) { + return TCiString::compare(pc, s) <= 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator<=(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { + return TCiString::compare(s, pc) <= 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator<=(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { + return TCiString::compare(pc, s) <= 0; + } + + friend bool operator>(const TCiString& s1, const TCiString& s2) { + return TCiString::compare(s1, s2) > 0; + } + + friend bool operator>(const TCiString& s, const char* pc) { + return TCiString::compare(s, pc) > 0; + } + + friend bool operator>(const char* pc, const TCiString& s) { + return TCiString::compare(pc, s) > 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator>(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) noexcept { + return TCiString::compare(s, pc) > 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator>(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) noexcept { + return TCiString::compare(pc, s) > 0; + } + + friend bool operator>=(const TCiString& s1, const TCiString& s2) { + return TCiString::compare(s1, s2) >= 0; + } + + friend bool operator>=(const TCiString& s, const char* pc) { + return TCiString::compare(s, pc) >= 0; + } + + friend bool operator>=(const char* pc, const TCiString& s) { + return TCiString::compare(pc, s) >= 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator>=(const TCiString& s, const TStringBase<TDerived2, TChar, TTraits2>& pc) { + return TCiString::compare(s, pc) >= 0; + } + + template <typename TDerived2, typename TTraits2> + friend bool operator>=(const TStringBase<TDerived2, TChar, TTraits2>& pc, const TCiString& s) { + return TCiString::compare(pc, s) >= 0; + } + + static size_t hashVal(const char* pc, size_t len, const CodePage& cp = csYandex); + + size_t hash() const { return TCiString::hashVal(data(), length()); - } -}; - -struct ci_hash { - inline size_t operator()(const char* s) const { - return TCiString::hashVal(s, strlen(s)); - } + } +}; + +struct ci_hash { + inline size_t operator()(const char* s) const { + return TCiString::hashVal(s, strlen(s)); + } inline size_t operator()(const TStringBuf& s) const { return TCiString::hashVal(s.data(), s.size()); - } -}; - -struct ci_hash32 { // not the same as ci_hash under 64-bit - inline ui32 operator()(const char* s) const { - return (ui32)TCiString::hashVal(s, strlen(s)); - } -}; - -//template <class T> struct hash; - -template <> -struct hash<TCiString>: public ci_hash { -}; - -template <class T> -struct TCIHash { -}; - -template <> -struct TCIHash<const char*> { + } +}; + +struct ci_hash32 { // not the same as ci_hash under 64-bit + inline ui32 operator()(const char* s) const { + return (ui32)TCiString::hashVal(s, strlen(s)); + } +}; + +//template <class T> struct hash; + +template <> +struct hash<TCiString>: public ci_hash { +}; + +template <class T> +struct TCIHash { +}; + +template <> +struct TCIHash<const char*> { inline size_t operator()(const TStringBuf& s) const { return TCiString::hashVal(s.data(), s.size()); - } -}; - -template <> -struct TCIHash<TStringBuf> { - inline size_t operator()(const TStringBuf& s) const { + } +}; + +template <> +struct TCIHash<TStringBuf> { + inline size_t operator()(const TStringBuf& s) const { return TCiString::hashVal(s.data(), s.size()); - } -}; - -template <> -struct TCIHash<TString> { - inline size_t operator()(const TString& s) const { + } +}; + +template <> +struct TCIHash<TString> { + inline size_t operator()(const TString& s) const { return TCiString::hashVal(s.data(), s.size()); - } -}; - -struct ci_less { - inline bool operator()(const char* x, const char* y) const { - return csYandex.stricmp(x, y) < 0; - } -}; - -struct ci_equal_to { - inline bool operator()(const char* x, const char* y) const { - return csYandex.stricmp(x, y) == 0; - } - // this implementation is not suitable for strings with zero characters inside, sorry + } +}; + +struct ci_less { + inline bool operator()(const char* x, const char* y) const { + return csYandex.stricmp(x, y) < 0; + } +}; + +struct ci_equal_to { + inline bool operator()(const char* x, const char* y) const { + return csYandex.stricmp(x, y) == 0; + } + // this implementation is not suitable for strings with zero characters inside, sorry bool operator()(const TStringBuf& x, const TStringBuf& y) const { return x.size() == y.size() && csYandex.strnicmp(x.data(), y.data(), y.size()) == 0; - } -}; - -template <> -struct TEqualTo<TCiString>: public ci_equal_to { -}; + } +}; + +template <> +struct TEqualTo<TCiString>: public ci_equal_to { +}; diff --git a/library/cpp/charset/ci_string_ut.cpp b/library/cpp/charset/ci_string_ut.cpp index 3d2a53d5fe..693427184d 100644 --- a/library/cpp/charset/ci_string_ut.cpp +++ b/library/cpp/charset/ci_string_ut.cpp @@ -1,23 +1,23 @@ -#include "ci_string.h" - +#include "ci_string.h" + #include <util/generic/hash.h> -#include <util/generic/string_ut.h> - -class TCaseStringTest: public TTestBase, private TStringTestImpl<TCiString, TTestData<char>> { -public: - void TestSpecial() { - TCiString ss = Data._0123456(); // type 'TCiString' is used as is +#include <util/generic/string_ut.h> + +class TCaseStringTest: public TTestBase, private TStringTestImpl<TCiString, TTestData<char>> { +public: + void TestSpecial() { + TCiString ss = Data._0123456(); // type 'TCiString' is used as is size_t hash_val = ComputeHash(ss); - UNIT_ASSERT(hash_val == 1489244); - } - -public: - UNIT_TEST_SUITE(TCaseStringTest); - UNIT_TEST(TestOperators); - UNIT_TEST(TestOperatorsCI); - - UNIT_TEST(TestSpecial); - UNIT_TEST_SUITE_END(); -}; - -UNIT_TEST_SUITE_REGISTRATION(TCaseStringTest); + UNIT_ASSERT(hash_val == 1489244); + } + +public: + UNIT_TEST_SUITE(TCaseStringTest); + UNIT_TEST(TestOperators); + UNIT_TEST(TestOperatorsCI); + + UNIT_TEST(TestSpecial); + UNIT_TEST_SUITE_END(); +}; + +UNIT_TEST_SUITE_REGISTRATION(TCaseStringTest); diff --git a/library/cpp/charset/codepage.cpp b/library/cpp/charset/codepage.cpp index 0431bef31b..43c0650970 100644 --- a/library/cpp/charset/codepage.cpp +++ b/library/cpp/charset/codepage.cpp @@ -1,4 +1,4 @@ -#include "ci_string.h" +#include "ci_string.h" #include "wide.h" #include "recyr.hh" #include "codepage.h" @@ -9,7 +9,7 @@ #include <util/system/hi_lo.h> #include <util/system/yassert.h> #include <util/generic/hash.h> -#include <util/generic/string.h> +#include <util/generic/string.h> #include <util/generic/vector.h> #include <util/generic/hash_set.h> #include <util/generic/singleton.h> @@ -128,13 +128,13 @@ const NCodepagePrivate::TCodepagesMap& NCodepagePrivate::TCodepagesMap::Instance class TCodePageHash { private: - using TData = THashMap<TStringBuf, ECharset, ci_hash, ci_equal_to>; + using TData = THashMap<TStringBuf, ECharset, ci_hash, ci_equal_to>; TData Data; TMemoryPool Pool; private: - inline void AddNameWithCheck(const TString& name, ECharset code) { + inline void AddNameWithCheck(const TString& name, ECharset code) { if (Data.find(name.c_str()) == Data.end()) { Data.insert(TData::value_type(Pool.Append(name.data(), name.size() + 1), code)); } else { @@ -142,10 +142,10 @@ private: } } - inline void AddName(const TString& name, ECharset code) { + inline void AddName(const TString& name, ECharset code) { AddNameWithCheck(name, code); - TString temp = name; + TString temp = name; RemoveAll(temp, '-'); RemoveAll(temp, '_'); AddNameWithCheck(temp, code); @@ -163,7 +163,7 @@ public: inline TCodePageHash() : Pool(20 * 1024) /* Currently used: 17KB. */ { - TString xPrefix = "x-"; + TString xPrefix = "x-"; const char* name; for (size_t i = 0; i != CODES_MAX; ++i) { @@ -285,7 +285,7 @@ void DoDecodeUnknownPlane(TxChar* str, TxChar*& ee, const ECharset enc) { TxChar* s = str; TxChar* d = str; - TVector<char> buf; + TVector<char> buf; size_t read = 0; size_t written = 0; @@ -319,19 +319,19 @@ void DecodeUnknownPlane(wchar32* str, wchar32*& ee, const ECharset enc) { } namespace { - class THashSetType: public THashSet<TString> { + class THashSetType: public THashSet<TString> { public: - inline void Add(const TString& s) { + inline void Add(const TString& s) { insert(s); } - inline bool Has(const TString& s) const noexcept { + inline bool Has(const TString& s) const noexcept { return find(s) != end(); } }; } -class TWindowsPrefixesHashSet: public THashSetType { +class TWindowsPrefixesHashSet: public THashSetType { public: inline TWindowsPrefixesHashSet() { Add("win"); @@ -345,7 +345,7 @@ public: } }; -class TCpPrefixesHashSet: public THashSetType { +class TCpPrefixesHashSet: public THashSetType { public: inline TCpPrefixesHashSet() { Add("microsoft"); @@ -354,7 +354,7 @@ public: } }; -class TIsoPrefixesHashSet: public THashSetType { +class TIsoPrefixesHashSet: public THashSetType { public: inline TIsoPrefixesHashSet() { Add("iso"); @@ -363,7 +363,7 @@ public: } }; -class TLatinToIsoHash: public THashMap<const char*, TString, ci_hash, ci_equal_to> { +class TLatinToIsoHash: public THashMap<const char*, TString, ci_hash, ci_equal_to> { public: inline TLatinToIsoHash() { insert(value_type("latin1", "iso-8859-1")); @@ -379,12 +379,12 @@ public: } }; -static inline void NormalizeEncodingPrefixes(TString& enc) { +static inline void NormalizeEncodingPrefixes(TString& enc) { size_t preflen = enc.find_first_of("0123456789"); - if (preflen == TString::npos) + if (preflen == TString::npos) return; - TString prefix = enc.substr(0, preflen); + TString prefix = enc.substr(0, preflen); for (size_t i = 0; i < prefix.length(); ++i) { if (prefix[i] == '-') { prefix.remove(i--); @@ -410,7 +410,7 @@ static inline void NormalizeEncodingPrefixes(TString& enc) { if (Singleton<TIsoPrefixesHashSet>()->Has(prefix)) { if (enc.length() == preflen + 1 || enc.length() == preflen + 2) { - TString enccopy = enc.substr(preflen); + TString enccopy = enc.substr(preflen); enccopy.prepend("latin"); const TLatinToIsoHash* latinhash = Singleton<TLatinToIsoHash>(); TLatinToIsoHash::const_iterator it = latinhash->find(enccopy.data()); @@ -425,7 +425,7 @@ static inline void NormalizeEncodingPrefixes(TString& enc) { } } -class TEncodingNamesHashSet: public THashSetType { +class TEncodingNamesHashSet: public THashSetType { public: TEncodingNamesHashSet() { Add("iso-8859-1"); @@ -492,7 +492,7 @@ ECharset EncodingHintByName(const char* encname) { --lastpos; // Do some normalization - TString enc(encname, lastpos - encname + 1); + TString enc(encname, lastpos - encname + 1); enc.to_lower(); for (char* p = enc.begin(); p != enc.end(); ++p) { if (*p == ' ' || *p == '=' || *p == '_') diff --git a/library/cpp/charset/codepage.h b/library/cpp/charset/codepage.h index 30a02a4610..8af579bdb5 100644 --- a/library/cpp/charset/codepage.h +++ b/library/cpp/charset/codepage.h @@ -5,7 +5,7 @@ #include <util/charset/recode_result.h> #include <util/charset/unidata.h> // all wchar32 functions #include <util/charset/utf8.h> -#include <util/generic/string.h> +#include <util/generic/string.h> #include <util/generic/ylimits.h> #include <util/generic/yexception.h> #include <util/system/yassert.h> @@ -303,17 +303,17 @@ inline void ToUpper(char* s, size_t n, const CodePage& cp = csYandex) { *s = cp.ToUpper(*s); } -inline TString ToLower(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) { +inline TString ToLower(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) { s.Transform([&cp](size_t, char c) { return cp.ToLower(c); }, pos, n); return s; } -inline TString ToUpper(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) { +inline TString ToUpper(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) { s.Transform([&cp](size_t, char c) { return cp.ToUpper(c); }, pos, n); return s; } -inline TString ToTitle(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) { +inline TString ToTitle(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) { s.Transform( [pos, &cp](size_t i, char c) { return i == pos ? cp.ToTitle(c) : cp.ToLower(c); diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp index c3ac3ac478..9015535324 100644 --- a/library/cpp/charset/codepage_ut.cpp +++ b/library/cpp/charset/codepage_ut.cpp @@ -253,7 +253,7 @@ void TCodepageTest::TestUTFFromUnknownPlane() { // test TChar version // bytebuffer of len writtenbytes contains sampletext of len samplelen chars in utf8 - TUtf16String wtr = CharToWide(TStringBuf(bytebuffer, writtenbytes), CODES_UNKNOWNPLANE); + TUtf16String wtr = CharToWide(TStringBuf(bytebuffer, writtenbytes), CODES_UNKNOWNPLANE); TChar* strend = wtr.begin() + wtr.size(); DecodeUnknownPlane(wtr.begin(), strend, CODES_UTF8); wtr.resize(strend - wtr.data(), 'Q'); diff --git a/library/cpp/charset/iconv_ut.cpp b/library/cpp/charset/iconv_ut.cpp index e8c56f6d49..eb6a0f64e0 100644 --- a/library/cpp/charset/iconv_ut.cpp +++ b/library/cpp/charset/iconv_ut.cpp @@ -4,13 +4,13 @@ #include <library/cpp/testing/unittest/registar.h> -static void TestIconv(const TString& utf8, const TString& other, ECharset enc) { - TUtf16String wide0 = CharToWide(utf8, CODES_UTF8); - TUtf16String wide1 = CharToWide(other, enc); +static void TestIconv(const TString& utf8, const TString& other, ECharset enc) { + TUtf16String wide0 = CharToWide(utf8, CODES_UTF8); + TUtf16String wide1 = CharToWide(other, enc); UNIT_ASSERT(wide0 == wide1); - TString temp = WideToUTF8(wide0); + TString temp = WideToUTF8(wide0); UNIT_ASSERT(temp == utf8); temp = WideToChar(wide0, enc); diff --git a/library/cpp/charset/recyr.hh b/library/cpp/charset/recyr.hh index 5ec8734bcf..e0af996ae9 100644 --- a/library/cpp/charset/recyr.hh +++ b/library/cpp/charset/recyr.hh @@ -110,7 +110,7 @@ inline RECODE_RESULT Recode(ECharset from, ECharset to, const char* in, char* ou * @return false if conversion was not attempted (charsets were the same), * true if successful */ -inline bool Recode(ECharset from, ECharset to, const TStringBuf& in, TString& out) { +inline bool Recode(ECharset from, ECharset to, const TStringBuf& in, TString& out) { if (to == from) return false; @@ -132,23 +132,23 @@ inline bool Recode(ECharset from, ECharset to, const TStringBuf& in, TString& ou } /////////////////////////////////////////////////////////////////////////////////////// -// TString -> TString // +// TString -> TString // /////////////////////////////////////////////////////////////////////////////////////// -inline TString Recode(ECharset from, ECharset to, const TString& in) { - TString out; +inline TString Recode(ECharset from, ECharset to, const TString& in) { + TString out; return to != from && Recode(from, to, in, out) ? out : in; } -inline TString RecodeToYandex(ECharset from, const TString& in) { +inline TString RecodeToYandex(ECharset from, const TString& in) { return Recode(from, CODES_YANDEX, in); } -inline TString RecodeFromYandex(ECharset to, const TString& in) { +inline TString RecodeFromYandex(ECharset to, const TString& in) { return Recode(CODES_YANDEX, to, in); } -inline TString RecodeToHTMLEntities(ECharset from, const TString& in) { +inline TString RecodeToHTMLEntities(ECharset from, const TString& in) { RECODE_RESULT res; size_t outWritten, inRead; - TString out; + TString out; out.resize(in.length() * (4 + 4)); res = NCodepagePrivate::_recodeToHTMLEntities(from, in.c_str(), out.begin(), in.length(), out.length(), inRead, outWritten); if (res == RECODE_EOOUTPUT) { //input contains many 8-byte characters? diff --git a/library/cpp/charset/recyr_int.hh b/library/cpp/charset/recyr_int.hh index 353af53305..61239206bc 100644 --- a/library/cpp/charset/recyr_int.hh +++ b/library/cpp/charset/recyr_int.hh @@ -3,7 +3,7 @@ #include <util/charset/recode_result.h> #include <util/charset/utf8.h> #include <util/generic/ptr.h> -#include <util/generic/string.h> +#include <util/generic/string.h> #include <util/system/defaults.h> #include "codepage.h" diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h index 32d30e849e..46f7b16d76 100644 --- a/library/cpp/charset/wide.h +++ b/library/cpp/charset/wide.h @@ -7,7 +7,7 @@ #include <util/charset/unidata.h> #include <util/charset/utf8.h> #include <util/charset/wide.h> -#include <util/generic/string.h> +#include <util/generic/string.h> #include <util/generic/algorithm.h> #include <util/generic/yexception.h> #include <util/memory/tempbuf.h> @@ -123,7 +123,7 @@ namespace NDetail { struct TRecodeTraits<char> { using TCharTo = wchar16; using TStringBufTo = TWtringBuf; - using TStringTo = TUtf16String; + using TStringTo = TUtf16String; enum { ReserveSize = 4 }; // How many TCharFrom characters we should reserve for one TCharTo character in worst case // Here an unicode character can be converted up to 4 bytes of UTF8 }; @@ -132,14 +132,14 @@ namespace NDetail { struct TRecodeTraits<wchar16> { using TCharTo = char; using TStringBufTo = TStringBuf; - using TStringTo = TString; + using TStringTo = TString; enum { ReserveSize = 2 }; // possible surrogate pairs ? }; // Operations with destination buffer where recoded string will be written template <typename TResult> struct TRecodeResultOps { - // default implementation will work with TString and TUtf16String - 99% of usage + // default implementation will work with TString and TUtf16String - 99% of usage using TResultChar = typename TResult::char_type; static inline size_t Size(const TResult& dst) { @@ -223,18 +223,18 @@ inline TStringBuf WideToChar(const TWtringBuf src, TString& dst, ECharset encodi } //! calls either to @c WideToUTF8 or @c WideToChar depending on the encoding type -inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) { +inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) { if (NCodepagePrivate::NativeCodepage(enc)) { if (enc == CODES_UTF8) return WideToUTF8(text, len); - TString s = TString::Uninitialized(len); + TString s = TString::Uninitialized(len); s.remove(WideToChar(text, len, s.begin(), enc)); return s; } - TString s = TString::Uninitialized(len * 3); + TString s = TString::Uninitialized(len * 3); size_t read = 0; size_t written = 0; @@ -244,15 +244,15 @@ inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) { return s; } -inline TUtf16String CharToWide(const char* text, size_t len, const CodePage& cp) { - TUtf16String w = TUtf16String::Uninitialized(len); +inline TUtf16String CharToWide(const char* text, size_t len, const CodePage& cp) { + TUtf16String w = TUtf16String::Uninitialized(len); CharToWide(text, len, w.begin(), cp); return w; } //! calls either to @c UTF8ToWide or @c CharToWide depending on the encoding type template <bool robust> -inline TUtf16String CharToWide(const char* text, size_t len, ECharset enc) { +inline TUtf16String CharToWide(const char* text, size_t len, ECharset enc) { if (NCodepagePrivate::NativeCodepage(enc)) { if (enc == CODES_UTF8) return UTF8ToWide<robust>(text, len); @@ -260,7 +260,7 @@ inline TUtf16String CharToWide(const char* text, size_t len, ECharset enc) { return CharToWide(text, len, *CodePageByCharset(enc)); } - TUtf16String w = TUtf16String::Uninitialized(len * 2); + TUtf16String w = TUtf16String::Uninitialized(len * 2); size_t read = 0; size_t written = 0; @@ -274,8 +274,8 @@ inline TUtf16String CharToWide(const char* text, size_t len, ECharset enc) { //! @param text text to be converted //! @param len length of the text in characters //! @param cp a codepage that is used in case of failed conversion from UTF8 -inline TUtf16String UTF8ToWide(const char* text, size_t len, const CodePage& cp) { - TUtf16String w = TUtf16String::Uninitialized(len); +inline TUtf16String UTF8ToWide(const char* text, size_t len, const CodePage& cp) { + TUtf16String w = TUtf16String::Uninitialized(len); size_t written = 0; if (UTF8ToWide(text, len, w.begin(), written)) w.remove(written); diff --git a/library/cpp/charset/wide_ut.cpp b/library/cpp/charset/wide_ut.cpp index 78947d51ba..518b885b6c 100644 --- a/library/cpp/charset/wide_ut.cpp +++ b/library/cpp/charset/wide_ut.cpp @@ -32,16 +32,16 @@ namespace { "\xd1\x80\xd1\x81\xd1\x82\xd1\x83\xd1\x84\xd1\x85\xd1\x86\xd1\x87" "\xd1\x88\xd1\x89\xd1\x8a\xd1\x8b\xd1\x8c\xd1\x8d\xd1\x8e\xd1\x8f"; - TString CreateYandexText() { + TString CreateYandexText() { const int len = 256; char text[len] = {0}; for (int i = 0; i < len; ++i) { text[i] = static_cast<char>(i); } - return TString(text, len); + return TString(text, len); } - TUtf16String CreateUnicodeText() { + TUtf16String CreateUnicodeText() { const int len = 256; wchar16 text[len] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x00 - 0x0F @@ -65,10 +65,10 @@ namespace { text[i] = static_cast<wchar16>(i + 0x0350); // 0x0410 - 0x044F } } - return TUtf16String(text, len); + return TUtf16String(text, len); } - TString CreateUTF8Text() { + TString CreateUTF8Text() { char text[] = { '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', @@ -95,12 +95,12 @@ namespace { '\xd0', '\xbf', '\xd1', '\x80', '\xd1', '\x81', '\xd1', '\x82', '\xd1', '\x83', '\xd1', '\x84', '\xd1', '\x85', '\xd1', '\x86', '\xd1', '\x87', '\xd1', '\x88', '\xd1', '\x89', '\xd1', '\x8a', '\xd1', '\x8b', '\xd1', '\x8c', '\xd1', '\x8d', '\xd1', '\x8e', '\xd1', '\x8f'}; - return TString(text, Y_ARRAY_SIZE(text)); + return TString(text, Y_ARRAY_SIZE(text)); } //! use this function to dump UTF8 text into a file in case of any changes // void DumpUTF8Text() { - // TString s = WideToUTF8(UnicodeText); + // TString s = WideToUTF8(UnicodeText); // std::ofstream f("utf8.txt"); // f << std::hex; // for (int i = 0; i < (int)s.size(); ++i) { @@ -117,9 +117,9 @@ namespace { class TConversionTest: public TTestBase { private: //! @note every of the text can have zeros in the middle - const TString YandexText; - const TUtf16String UnicodeText; - const TString UTF8Text; + const TString YandexText; + const TUtf16String UnicodeText; + const TString UTF8Text; private: UNIT_TEST_SUITE(TConversionTest); @@ -171,7 +171,7 @@ UNIT_TEST_SUITE_REGISTRATION(TConversionTest); } while (false) void TConversionTest::TestCharToWide() { - TUtf16String w = CharToWide(YandexText, CODES_YANDEX); + TUtf16String w = CharToWide(YandexText, CODES_YANDEX); UNIT_ASSERT(w.size() == 256); UNIT_ASSERT(w.size() == UnicodeText.size()); @@ -182,7 +182,7 @@ void TConversionTest::TestCharToWide() { } void TConversionTest::TestWideToChar() { - TString s = WideToChar(UnicodeText, CODES_YANDEX); + TString s = WideToChar(UnicodeText, CODES_YANDEX); UNIT_ASSERT(s.size() == 256); UNIT_ASSERT(s.size() == YandexText.size()); @@ -193,18 +193,18 @@ void TConversionTest::TestWideToChar() { } static void TestSurrogates(const char* str, const wchar16* wide, size_t wideSize, ECharset enc) { - TUtf16String w = UTF8ToWide(str); + TUtf16String w = UTF8ToWide(str); UNIT_ASSERT(w.size() == wideSize); UNIT_ASSERT(!memcmp(w.c_str(), wide, wideSize)); - TString s = WideToChar(w, enc); + TString s = WideToChar(w, enc); UNIT_ASSERT(s == str); } void TConversionTest::TestYandexEncoding() { - TUtf16String w = UTF8ToWide(utf8CyrillicAlphabet, strlen(utf8CyrillicAlphabet), csYandex); + TUtf16String w = UTF8ToWide(utf8CyrillicAlphabet, strlen(utf8CyrillicAlphabet), csYandex); UNIT_ASSERT(w == wideCyrillicAlphabet); w = UTF8ToWide(yandexCyrillicAlphabet, strlen(yandexCyrillicAlphabet), csYandex); UNIT_ASSERT(w == wideCyrillicAlphabet); @@ -217,7 +217,7 @@ void TConversionTest::TestYandexEncoding() { const char* yandexNonBMP2 = "ab?n"; UNIT_ASSERT(yandexNonBMP2 == WideToChar(wNonBMPDummy2, Y_ARRAY_SIZE(wNonBMPDummy2), CODES_YANDEX)); - TString temp; + TString temp; temp.resize(Y_ARRAY_SIZE(wNonBMPDummy2)); size_t read = 0; size_t written = 0; @@ -229,7 +229,7 @@ void TConversionTest::TestYandexEncoding() { } void TConversionTest::TestRecodeIntoString() { - TString sYandex(UnicodeText.size() * 4, 'x'); + TString sYandex(UnicodeText.size() * 4, 'x'); const char* sdata = sYandex.data(); TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX); UNIT_ASSERT(sYandex == YandexText); // same content @@ -238,7 +238,7 @@ void TConversionTest::TestRecodeIntoString() { UNIT_ASSERT(sYandex.size() == sres.size()); // same size TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX); - TUtf16String sUnicode; + TUtf16String sUnicode; sUnicode.reserve(YandexText.size() * 4); const wchar16* wdata = sUnicode.data(); TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX); @@ -247,7 +247,7 @@ void TConversionTest::TestRecodeIntoString() { UNIT_ASSERT(sUnicode.data() == wres.data()); // same buffer UNIT_ASSERT(sUnicode.size() == wres.size()); // same size - TString sUtf8 = " "; + TString sUtf8 = " "; size_t scap = sUtf8.capacity(); sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8); UNIT_ASSERT(sUtf8 == UTF8Text); // same content @@ -258,7 +258,7 @@ void TConversionTest::TestRecodeIntoString() { sUnicode.clear(); wdata = sUnicode.data(); - TUtf16String copy = sUnicode; // increase ref-counter + TUtf16String copy = sUnicode; // increase ref-counter wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8); UNIT_ASSERT(sUnicode == UnicodeText); // same content #ifndef TSTRING_IS_STD_STRING @@ -268,8 +268,8 @@ void TConversionTest::TestRecodeIntoString() { UNIT_ASSERT(sUnicode.size() == wres.size()); // same content } -static TString GenerateJunk(size_t seed) { - TString res; +static TString GenerateJunk(size_t seed) { + TString res; size_t hash = NumericHash(seed); size_t size = hash % 1024; res.reserve(size); @@ -280,8 +280,8 @@ static TString GenerateJunk(size_t seed) { void TConversionTest::TestRecodeAppend() { { - TString s1, s2; - NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX); + TString s1, s2; + NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX); UNIT_ASSERT(s1.empty()); NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN); @@ -292,7 +292,7 @@ void TConversionTest::TestRecodeAppend() { s2 += WideToChar(UnicodeText, CODES_YANDEX); UNIT_ASSERT_EQUAL(s1, s2); - NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX); + NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX); UNIT_ASSERT_EQUAL(s1, s2); NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8); @@ -300,7 +300,7 @@ void TConversionTest::TestRecodeAppend() { UNIT_ASSERT_EQUAL(s1, s2); for (size_t i = 0; i < 100; ++i) { - TUtf16String junk = CharToWide(GenerateJunk(i), CODES_YANDEX); + TUtf16String junk = CharToWide(GenerateJunk(i), CODES_YANDEX); NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8); s2 += WideToUTF8(junk); UNIT_ASSERT_EQUAL(s1, s2); @@ -308,8 +308,8 @@ void TConversionTest::TestRecodeAppend() { } { - TUtf16String s1, s2; - NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX); + TUtf16String s1, s2; + NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX); UNIT_ASSERT(s1.empty()); NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN); @@ -320,7 +320,7 @@ void TConversionTest::TestRecodeAppend() { s2 += CharToWide(YandexText, CODES_YANDEX); UNIT_ASSERT_EQUAL(s1, s2); - NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX); + NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX); UNIT_ASSERT_EQUAL(s1, s2); NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8); @@ -328,7 +328,7 @@ void TConversionTest::TestRecodeAppend() { UNIT_ASSERT_EQUAL(s1, s2); for (size_t i = 0; i < 100; ++i) { - TString junk = GenerateJunk(i); + TString junk = GenerateJunk(i); NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX); s2 += CharToWide(junk, CODES_YANDEX); UNIT_ASSERT_EQUAL(s1, s2); @@ -347,7 +347,7 @@ void TConversionTest::TestRecode() { if (!SingleByteCodepage(enc)) continue; - using THash = THashSet<char>; + using THash = THashSet<char>; THash hash; for (int i = 0; i != 256; ++i) { diff --git a/library/cpp/charset/ya.make b/library/cpp/charset/ya.make index 7565566bf0..61af7a3243 100644 --- a/library/cpp/charset/ya.make +++ b/library/cpp/charset/ya.make @@ -11,7 +11,7 @@ SRCS( iconv.cpp recyr.hh recyr_int.hh - ci_string.cpp + ci_string.cpp wide.cpp ) |