diff options
author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /util/string/ascii.h |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'util/string/ascii.h')
-rw-r--r-- | util/string/ascii.h | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/util/string/ascii.h b/util/string/ascii.h new file mode 100644 index 00000000000..10344384d33 --- /dev/null +++ b/util/string/ascii.h @@ -0,0 +1,247 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/system/compat.h> +#include <util/generic/string.h> + +// ctype.h-like functions, locale-independent: +// IsAscii{Upper,Lower,Digit,Alpha,Alnum,Space} and +// AsciiTo{Upper,Lower} +// +// standard functions from <ctype.h> are locale dependent, +// and cause undefined behavior when called on chars outside [0..127] range + +namespace NPrivate { + enum ECharClass { + CC_SPACE = 1, + CC_UPPER = 2, + CC_LOWER = 4, + CC_DIGIT = 8, + CC_ALPHA = 16, + CC_ALNUM = 32, + CC_ISHEX = 64, + CC_PUNCT = 128, + }; + + extern const unsigned char ASCII_CLASS[256]; + extern const unsigned char ASCII_LOWER[256]; + + template <class T> + struct TDereference { + using type = T; + }; + +#ifndef TSTRING_IS_STD_STRING + template <class String> + struct TDereference<TBasicCharRef<String>> { + using type = typename String::value_type; + }; +#endif + + template <class T> + using TDereferenced = typename TDereference<T>::type; + + template <class T> + bool RangeOk(T c) noexcept { + static_assert(std::is_integral<T>::value, "Integral type character expected"); + + if (sizeof(T) == 1) { + return true; + } + + return c >= static_cast<T>(0) && c <= static_cast<T>(127); + } + +#ifndef TSTRING_IS_STD_STRING + template <class String> + bool RangeOk(const TBasicCharRef<String>& c) { + return RangeOk(static_cast<typename String::value_type>(c)); + } +#endif +} + +constexpr bool IsAscii(const int c) noexcept { + return !(c & ~0x7f); +} + +inline bool IsAsciiSpace(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_SPACE; +} + +inline bool IsAsciiUpper(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_UPPER; +} + +inline bool IsAsciiLower(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_LOWER; +} + +inline bool IsAsciiDigit(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_DIGIT; +} + +inline bool IsAsciiAlpha(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALPHA; +} + +inline bool IsAsciiAlnum(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALNUM; +} + +inline bool IsAsciiHex(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ISHEX; +} + +inline bool IsAsciiPunct(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_PUNCT; +} + +// some overloads + +template <class T> +inline bool IsAsciiSpace(T c) { + return ::NPrivate::RangeOk(c) && IsAsciiSpace(static_cast<unsigned char>(c)); +} + +template <class T> +inline bool IsAsciiUpper(T c) { + return ::NPrivate::RangeOk(c) && IsAsciiUpper(static_cast<unsigned char>(c)); +} + +template <class T> +inline bool IsAsciiLower(T c) { + return ::NPrivate::RangeOk(c) && IsAsciiLower(static_cast<unsigned char>(c)); +} + +template <class T> +inline bool IsAsciiDigit(T c) { + return ::NPrivate::RangeOk(c) && IsAsciiDigit(static_cast<unsigned char>(c)); +} + +template <class T> +inline bool IsAsciiAlpha(T c) { + return ::NPrivate::RangeOk(c) && IsAsciiAlpha(static_cast<unsigned char>(c)); +} + +template <class T> +inline bool IsAsciiAlnum(T c) { + return ::NPrivate::RangeOk(c) && IsAsciiAlnum(static_cast<unsigned char>(c)); +} + +template <class T> +inline bool IsAsciiHex(T c) { + return ::NPrivate::RangeOk(c) && IsAsciiHex(static_cast<unsigned char>(c)); +} + +template <class T> +inline bool IsAsciiPunct(T c) { + return ::NPrivate::RangeOk(c) && IsAsciiPunct(static_cast<unsigned char>(c)); +} + +// some extra helpers +inline ui8 AsciiToLower(ui8 c) noexcept { + return ::NPrivate::ASCII_LOWER[c]; +} + +inline char AsciiToLower(char c) noexcept { + return (char)AsciiToLower((ui8)c); +} + +template <class T> +inline ::NPrivate::TDereferenced<T> AsciiToLower(T c) noexcept { + return (c >= 0 && c <= 127) ? (::NPrivate::TDereferenced<T>)AsciiToLower((ui8)c) : c; +} + +template <class T> +inline ::NPrivate::TDereferenced<T> AsciiToUpper(T c) noexcept { + return IsAsciiLower(c) ? (c + ('A' - 'a')) : c; +} + +/** + * ASCII case-insensitive string comparison (for proper UTF8 strings + * case-insensitive comparison consider using @c library/cpp/charset). + * + * BUGS: Currently will NOT work properly with strings that contain + * 0-terminator character inside. See IGNIETFERRO-1641 for details. + * + * @return true iff @c s1 ans @c s2 are case-insensitively equal. + */ +static inline bool AsciiEqualsIgnoreCase(const char* s1, const char* s2) noexcept { + return stricmp(s1, s2) == 0; +} + +/** + * ASCII case-insensitive string comparison (for proper UTF8 strings + * case-insensitive comparison consider using @c library/cpp/charset). + * + * BUGS: Currently will NOT work properly with strings that contain + * 0-terminator character inside. See IGNIETFERRO-1641 for details. + * + * @return true iff @c s1 ans @c s2 are case-insensitively equal. + */ +static inline bool AsciiEqualsIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept { + return (s1.size() == s2.size()) && strnicmp(s1.data(), s2.data(), s1.size()) == 0; +} + +/** + * ASCII case-insensitive string comparison (for proper UTF8 strings + * case-insensitive comparison consider using @c library/cpp/charset). + * + * BUGS: Currently will NOT work properly with strings that contain + * 0-terminator character inside. See IGNIETFERRO-1641 for details. + * + * @return 0 if strings are equal, negative if @c s1 < @c s2 + * and positive otherwise. + * (same value as @c stricmp does). + */ +static inline int AsciiCompareIgnoreCase(const char* s1, const char* s2) noexcept { + return stricmp(s1, s2); +} + +/** + * ASCII case-insensitive string comparison (for proper UTF8 strings + * case-insensitive comparison consider using @c library/cpp/charset). + * + * BUGS: Currently will NOT work properly with strings that contain + * 0-terminator character inside. See IGNIETFERRO-1641 for details. + * + * @return + * - zero if strings are equal + * - negative if @c s1 < @c s2 + * - positive otherwise, + * similar to stricmp. + */ +Y_PURE_FUNCTION int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept; + +/** + * ASCII case-sensitive string comparison (for proper UTF8 strings + * case-sensitive comparison consider using @c library/cpp/charset). + * + * BUGS: Currently will NOT work properly with strings that contain + * 0-terminator character inside. See IGNIETFERRO-1641 for details. + * + * @return true iff @c s2 are case-sensitively prefix of @c s1. + */ +static inline bool AsciiHasPrefix(const TStringBuf s1, const TStringBuf s2) noexcept { + return (s1.size() >= s2.size()) && memcmp(s1.data(), s2.data(), s2.size()) == 0; +} + +/** + * ASCII case-insensitive string comparison (for proper UTF8 strings + * case-insensitive comparison consider using @c library/cpp/charset). + * + * @return true iff @c s2 are case-insensitively prefix of @c s1. + */ +static inline bool AsciiHasPrefixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept { + return (s1.size() >= s2.size()) && strnicmp(s1.data(), s2.data(), s2.size()) == 0; +} + +/** + * ASCII case-insensitive string comparison (for proper UTF8 strings + * case-insensitive comparison consider using @c library/cpp/charset). + * + * @return true iff @c s2 are case-insensitively suffix of @c s1. + */ +static inline bool AsciiHasSuffixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept { + return (s1.size() >= s2.size()) && strnicmp((s1.data() + (s1.size() - s2.size())), s2.data(), s2.size()) == 0; +} |