#pragma once
#include <util/system/defaults.h>
#include <util/system/compat.h>
#include <util/generic/string.h>
// ctype.h-like functions, locale-independent:
// IsAscii{Upper,Lower,Digit,Alpha,Alnum,Space} and
// AsciiTo{Upper,Lower}
//
// standard functions from <ctype.h> are locale dependent,
// and cause undefined behavior when called on chars outside [0..127] range
namespace NPrivate {
enum ECharClass {
CC_SPACE = 1,
CC_UPPER = 2,
CC_LOWER = 4,
CC_DIGIT = 8,
CC_ALPHA = 16,
CC_ALNUM = 32,
CC_ISHEX = 64,
CC_PUNCT = 128,
};
extern const unsigned char ASCII_CLASS[256];
extern const unsigned char ASCII_LOWER[256];
template <class T>
struct TDereference {
using type = T;
};
#ifndef TSTRING_IS_STD_STRING
template <class String>
struct TDereference<TBasicCharRef<String>> {
using type = typename String::value_type;
};
#endif
template <class T>
using TDereferenced = typename TDereference<T>::type;
template <class T>
bool RangeOk(T c) noexcept {
static_assert(std::is_integral<T>::value, "Integral type character expected");
if (sizeof(T) == 1) {
return true;
}
return c >= static_cast<T>(0) && c <= static_cast<T>(127);
}
#ifndef TSTRING_IS_STD_STRING
template <class String>
bool RangeOk(const TBasicCharRef<String>& c) {
return RangeOk(static_cast<typename String::value_type>(c));
}
#endif
}
constexpr bool IsAscii(const int c) noexcept {
return !(c & ~0x7f);
}
inline bool IsAsciiSpace(unsigned char c) {
return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_SPACE;
}
inline bool IsAsciiUpper(unsigned char c) {
return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_UPPER;
}
inline bool IsAsciiLower(unsigned char c) {
return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_LOWER;
}
inline bool IsAsciiDigit(unsigned char c) {
return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_DIGIT;
}
inline bool IsAsciiAlpha(unsigned char c) {
return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALPHA;
}
inline bool IsAsciiAlnum(unsigned char c) {
return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALNUM;
}
inline bool IsAsciiHex(unsigned char c) {
return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ISHEX;
}
inline bool IsAsciiPunct(unsigned char c) {
return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_PUNCT;
}
// some overloads
template <class T>
inline bool IsAsciiSpace(T c) {
return ::NPrivate::RangeOk(c) && IsAsciiSpace(static_cast<unsigned char>(c));
}
template <class T>
inline bool IsAsciiUpper(T c) {
return ::NPrivate::RangeOk(c) && IsAsciiUpper(static_cast<unsigned char>(c));
}
template <class T>
inline bool IsAsciiLower(T c) {
return ::NPrivate::RangeOk(c) && IsAsciiLower(static_cast<unsigned char>(c));
}
template <class T>
inline bool IsAsciiDigit(T c) {
return ::NPrivate::RangeOk(c) && IsAsciiDigit(static_cast<unsigned char>(c));
}
template <class T>
inline bool IsAsciiAlpha(T c) {
return ::NPrivate::RangeOk(c) && IsAsciiAlpha(static_cast<unsigned char>(c));
}
template <class T>
inline bool IsAsciiAlnum(T c) {
return ::NPrivate::RangeOk(c) && IsAsciiAlnum(static_cast<unsigned char>(c));
}
template <class T>
inline bool IsAsciiHex(T c) {
return ::NPrivate::RangeOk(c) && IsAsciiHex(static_cast<unsigned char>(c));
}
template <class T>
inline bool IsAsciiPunct(T c) {
return ::NPrivate::RangeOk(c) && IsAsciiPunct(static_cast<unsigned char>(c));
}
// some extra helpers
inline ui8 AsciiToLower(ui8 c) noexcept {
return ::NPrivate::ASCII_LOWER[c];
}
inline char AsciiToLower(char c) noexcept {
return (char)AsciiToLower((ui8)c);
}
template <class T>
inline ::NPrivate::TDereferenced<T> AsciiToLower(T c) noexcept {
return (c >= 0 && c <= 127) ? (::NPrivate::TDereferenced<T>)AsciiToLower((ui8)c) : c;
}
template <class T>
inline ::NPrivate::TDereferenced<T> AsciiToUpper(T c) noexcept {
return IsAsciiLower(c) ? (c + ('A' - 'a')) : c;
}
/**
* ASCII case-insensitive string comparison (for proper UTF8 strings
* case-insensitive comparison consider using @c library/cpp/charset).
*
* BUGS: Currently will NOT work properly with strings that contain
* 0-terminator character inside. See IGNIETFERRO-1641 for details.
*
* @return true iff @c s1 ans @c s2 are case-insensitively equal.
*/
static inline bool AsciiEqualsIgnoreCase(const char* s1, const char* s2) noexcept {
return stricmp(s1, s2) == 0;
}
/**
* ASCII case-insensitive string comparison (for proper UTF8 strings
* case-insensitive comparison consider using @c library/cpp/charset).
*
* BUGS: Currently will NOT work properly with strings that contain
* 0-terminator character inside. See IGNIETFERRO-1641 for details.
*
* @return true iff @c s1 ans @c s2 are case-insensitively equal.
*/
static inline bool AsciiEqualsIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
if (s1.size() != s2.size()) {
return false;
}
if (s1.empty()) {
return true;
}
return strnicmp(s1.data(), s2.data(), s1.size()) == 0;
}
/**
* ASCII case-insensitive string comparison (for proper UTF8 strings
* case-insensitive comparison consider using @c library/cpp/charset).
*
* BUGS: Currently will NOT work properly with strings that contain
* 0-terminator character inside. See IGNIETFERRO-1641 for details.
*
* @return 0 if strings are equal, negative if @c s1 < @c s2
* and positive otherwise.
* (same value as @c stricmp does).
*/
static inline int AsciiCompareIgnoreCase(const char* s1, const char* s2) noexcept {
return stricmp(s1, s2);
}
/**
* ASCII case-insensitive string comparison (for proper UTF8 strings
* case-insensitive comparison consider using @c library/cpp/charset).
*
* BUGS: Currently will NOT work properly with strings that contain
* 0-terminator character inside. See IGNIETFERRO-1641 for details.
*
* @return
* - zero if strings are equal
* - negative if @c s1 < @c s2
* - positive otherwise,
* similar to stricmp.
*/
Y_PURE_FUNCTION int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept;
/**
* ASCII case-sensitive string comparison (for proper UTF8 strings
* case-sensitive comparison consider using @c library/cpp/charset).
*
* BUGS: Currently will NOT work properly with strings that contain
* 0-terminator character inside. See IGNIETFERRO-1641 for details.
*
* @return true iff @c s2 are case-sensitively prefix of @c s1.
*/
static inline bool AsciiHasPrefix(const TStringBuf s1, const TStringBuf s2) noexcept {
return (s1.size() >= s2.size()) && memcmp(s1.data(), s2.data(), s2.size()) == 0;
}
/**
* ASCII case-insensitive string comparison (for proper UTF8 strings
* case-insensitive comparison consider using @c library/cpp/charset).
*
* @return true iff @c s2 are case-insensitively prefix of @c s1.
*/
static inline bool AsciiHasPrefixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
return (s1.size() >= s2.size()) && strnicmp(s1.data(), s2.data(), s2.size()) == 0;
}
/**
* ASCII case-insensitive string comparison (for proper UTF8 strings
* case-insensitive comparison consider using @c library/cpp/charset).
*
* @return true iff @c s2 are case-insensitively suffix of @c s1.
*/
static inline bool AsciiHasSuffixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
return (s1.size() >= s2.size()) && strnicmp((s1.data() + (s1.size() - s2.size())), s2.data(), s2.size()) == 0;
}