#pragma once

#include "doccodes.h"

#include <util/charset/recode_result.h>
#include <util/charset/unidata.h> // all wchar32 functions
#include <util/charset/utf8.h>
#include <util/generic/string.h>
#include <util/generic/ylimits.h>
#include <util/generic/yexception.h>
#include <util/system/yassert.h>
#include <util/system/defaults.h>

#include <cctype>

struct CodePage;
struct Recoder;
struct Encoder;

/*****************************************************************\
*                    struct CodePage                              *
\*****************************************************************/
struct CodePage {
    ECharset CPEnum;       // int MIBEnum;
    const char* Names[30]; // name[0] -- preferred mime-name
    wchar32 unicode[256];
    const char* DefaultChar; //[CCL_NUM]

    bool IsLower(unsigned char ch) const {
        return ::IsLower(unicode[ch]);
    }
    bool IsUpper(unsigned char ch) const {
        return ::IsUpper(unicode[ch]);
    }
    bool IsAlpha(unsigned char ch) const {
        return ::IsAlpha(unicode[ch]);
    }
    bool IsDigit(unsigned char ch) const {
        return ::IsDigit(unicode[ch]);
    }
    bool IsXdigit(unsigned char ch) const {
        return ::IsXdigit(unicode[ch]);
    }
    bool IsAlnum(unsigned char ch) const {
        return ::IsAlnum(unicode[ch]);
    }
    bool IsSpace(unsigned char ch) const {
        return ::IsSpace(unicode[ch]);
    }
    bool IsPunct(unsigned char ch) const {
        return ::IsPunct(unicode[ch]);
    }
    bool IsCntrl(unsigned char ch) const {
        return ::IsCntrl(unicode[ch]);
    }
    bool IsGraph(unsigned char ch) const {
        return ::IsGraph(unicode[ch]);
    }
    bool IsPrint(unsigned char ch) const {
        return ::IsPrint(unicode[ch]);
    }
    bool IsComposed(unsigned char ch) const {
        return ::IsComposed(unicode[ch]);
    }

    // return pointer to char after the last char
    char* ToLower(const char* begin, const char* end, char* to) const;
    char* ToLower(const char* begin, char* to) const;

    // return pointer to char after the last char
    char* ToUpper(const char* begin, const char* end, char* to) const;
    char* ToUpper(const char* begin, char* to) const;

    int stricmp(const char* s1, const char* s2) const;
    int strnicmp(const char* s1, const char* s2, size_t len) const;

    inline unsigned char ToUpper(unsigned char ch) const;
    inline unsigned char ToLower(unsigned char ch) const;
    inline unsigned char ToTitle(unsigned char ch) const;

    inline int ToDigit(unsigned char ch) const {
        return ::ToDigit(unicode[ch]);
    }

    static void Initialize();

    inline bool SingleByteCodepage() const {
        return DefaultChar != nullptr;
    }
    inline bool NativeCodepage() const {
        return SingleByteCodepage() || CPEnum == CODES_UTF8;
    }
};

class TCodePageHash;

namespace NCodepagePrivate {
    class TCodepagesMap {
    private:
        static const int DataShift = 2;
        static const int DataSize = CODES_MAX + DataShift;
        const CodePage* Data[DataSize];

    private:
        inline const CodePage* GetPrivate(ECharset e) const {
            Y_ASSERT(e + DataShift >= 0 && e + DataShift < DataSize);
            return Data[e + DataShift];
        }

        void SetData(const CodePage* cp);

    public:
        TCodepagesMap();

        inline const CodePage* Get(ECharset e) const {
            const CodePage* res = GetPrivate(e);
            if (!res->SingleByteCodepage()) {
                ythrow yexception() << "CodePage (" << (int)e << ") structure can only be used for single byte encodings";
            }

            return res;
        }

        inline bool SingleByteCodepage(ECharset e) const {
            return GetPrivate(e)->SingleByteCodepage();
        }
        inline bool NativeCodepage(ECharset e) const {
            return GetPrivate(e)->NativeCodepage();
        }
        inline const char* NameByCharset(ECharset e) const {
            return GetPrivate(e)->Names[0];
        }

        static const TCodepagesMap& Instance();

        friend class ::TCodePageHash;
    };

    inline bool NativeCodepage(ECharset e) {
        return ::NCodepagePrivate::TCodepagesMap::Instance().NativeCodepage(e);
    }
}

inline bool SingleByteCodepage(ECharset e) {
    return ::NCodepagePrivate::TCodepagesMap::Instance().SingleByteCodepage(e);
}

inline bool ValidCodepage(ECharset e) {
    return e >= 0 && e < CODES_MAX;
}

inline const CodePage* CodePageByCharset(ECharset e) {
    return ::NCodepagePrivate::TCodepagesMap::Instance().Get(e);
}

ECharset CharsetByName(TStringBuf name);

// Same as CharsetByName, but throws yexception() if name is invalid
ECharset CharsetByNameOrDie(TStringBuf name);

inline ECharset CharsetByCodePage(const CodePage* CP) {
    return CP->CPEnum;
}

inline const char* NameByCharset(ECharset e) {
    return ::NCodepagePrivate::TCodepagesMap::Instance().NameByCharset(e);
}

inline const char* NameByCharsetSafe(ECharset e) {
    if (CODES_UNKNOWN < e && e < CODES_MAX)
        return ::NCodepagePrivate::TCodepagesMap::Instance().NameByCharset(e);
    else
        ythrow yexception() << "unknown encoding: " << (int)e;
}

inline const char* NameByCodePage(const CodePage* CP) {
    return CP->Names[0];
}

inline const CodePage* CodePageByName(const char* name) {
    ECharset code = CharsetByName(name);
    if (code == CODES_UNKNOWN)
        return nullptr;

    return CodePageByCharset(code);
}

ECharset EncodingHintByName(const char* name);

/*****************************************************************\
*                    struct Encoder                               *
\*****************************************************************/
struct Encoder {
    char* Table[256];
    const char* DefaultChar;

    inline char Code(wchar32 ch) const {
        if (ch > 0xFFFF)
            return 0;
        return (unsigned char)Table[(ch >> 8) & 255][ch & 255];
    }

    inline char Tr(wchar32 ch) const {
        char code = Code(ch);
        if (code == 0 && ch != 0)
            code = DefaultChar[NUnicode::CharType(ch)];
        Y_ASSERT(code != 0 || ch == 0);
        return code;
    }

    inline unsigned char operator[](wchar32 ch) const {
        return Tr(ch);
    }

    void Tr(const wchar32* in, char* out, size_t len) const;
    void Tr(const wchar32* in, char* out) const;
    char* DefaultPlane;
};

/*****************************************************************\
*                    struct Recoder                               *
\*****************************************************************/
struct Recoder {
    unsigned char Table[257];

    void Create(const CodePage& source, const CodePage& target);
    void Create(const CodePage& source, const Encoder* wideTarget);

    void Create(const CodePage& page, wchar32 (*mapper)(wchar32));
    void Create(const CodePage& page, const Encoder* widePage, wchar32 (*mapper)(wchar32));

    inline unsigned char Tr(unsigned char c) const {
        return Table[c];
    }
    inline unsigned char operator[](unsigned char c) const {
        return Table[c];
    }
    void Tr(const char* in, char* out, size_t len) const;
    void Tr(const char* in, char* out) const;
    void Tr(char* in_out, size_t len) const;
    void Tr(char* in_out) const;
};

extern const struct Encoder& WideCharToYandex;

const Encoder& EncoderByCharset(ECharset enc);

namespace NCodepagePrivate {
    class TCodePageData {
    private:
        static const CodePage* const AllCodePages[];

        static const Recoder rcdr_to_yandex[];
        static const Recoder rcdr_from_yandex[];
        static const Recoder rcdr_to_lower[];
        static const Recoder rcdr_to_upper[];
        static const Recoder rcdr_to_title[];

        static const Encoder* const EncodeTo[];

        friend struct ::CodePage;
        friend class TCodepagesMap;
        friend RECODE_RESULT _recodeToYandex(ECharset, const char*, char*, size_t, size_t, size_t&, size_t&);
        friend RECODE_RESULT _recodeFromYandex(ECharset, const char*, char*, size_t, size_t, size_t&, size_t&);
        friend const Encoder& ::EncoderByCharset(ECharset enc);
    };
}

inline const Encoder& EncoderByCharset(ECharset enc) {
    if (!SingleByteCodepage(enc)) {
        ythrow yexception() << "Encoder structure can only be used for single byte encodings";
    }

    return *NCodepagePrivate::TCodePageData::EncodeTo[enc];
}

inline unsigned char CodePage::ToUpper(unsigned char ch) const {
    return NCodepagePrivate::TCodePageData::rcdr_to_upper[CPEnum].Table[ch];
}
inline unsigned char CodePage::ToLower(unsigned char ch) const {
    return NCodepagePrivate::TCodePageData::rcdr_to_lower[CPEnum].Table[ch];
}
inline unsigned char CodePage::ToTitle(unsigned char ch) const {
    return NCodepagePrivate::TCodePageData::rcdr_to_title[CPEnum].Table[ch];
}

extern const CodePage& csYandex;

/// these functions change (lowers) [end] position in case of utf-8
/// null character is NOT assumed or written at [*end]
void DecodeUnknownPlane(wchar16* start, wchar16*& end, const ECharset enc4unk);
void DecodeUnknownPlane(wchar32* start, wchar32*& end, const ECharset enc4unk);

inline void ToLower(char* s, size_t n, const CodePage& cp = csYandex) {
    char* const e = s + n;
    for (; s != e; ++s)
        *s = cp.ToLower(*s);
}

inline void ToUpper(char* s, size_t n, const CodePage& cp = csYandex) {
    char* const e = s + n;
    for (; s != e; ++s)
        *s = cp.ToUpper(*s);
}

inline TString ToLower(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) {
    s.Transform([&cp](size_t, char c) { return cp.ToLower(c); }, pos, n);
    return s;
}

inline TString ToUpper(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) {
    s.Transform([&cp](size_t, char c) { return cp.ToUpper(c); }, pos, n);
    return s;
}

inline TString ToTitle(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) {
    s.Transform(
        [pos, &cp](size_t i, char c) {
            return i == pos ? cp.ToTitle(c) : cp.ToLower(c);
        },
        pos,
        n);
    return s;
}