#include <util/generic/hash.h>
#include <util/string/ascii.h>
#include <util/string/cast.h>
#include <util/generic/hash_set.h>
#include <util/generic/yexception.h>

#include "parser.h"

//#define DEBUG_ME 1

TCppSaxParser::TText::TText()
    : Offset(0)
{
}

TCppSaxParser::TText::TText(ui64 offset)
    : Offset(offset)
{
}

TCppSaxParser::TText::TText(const TString& data, ui64 offset)
    : Data(data)
    , Offset(offset)
{
}

TCppSaxParser::TText::~TText() = default;

void TCppSaxParser::TText::Reset() noexcept {
    Offset += Data.length();
    Data.clear();
}

TCppSaxParser::TWorker::TWorker() noexcept = default;

TCppSaxParser::TWorker::~TWorker() = default;

class TCppSaxParser::TImpl {
    enum EState {
        Code,
        CommentBegin,
        String,
        Character,
        OneLineComment,
        MultiLineComment,
        MultiLineCommentEnd,
        Preprocessor
    };

public:
    typedef TCppSaxParser::TText TText;
    typedef TCppSaxParser::TWorker TWorker;

    inline TImpl(TWorker* worker)
        : State_(Code)
        , Worker_(worker)
        , SkipNext_(false)
        , Line_(0)
        , Column_(0)
    {
        Worker_->DoStart();
    }

    inline ~TImpl() = default;

    inline void Write(const void* data, size_t len) {
        ProcessInput((const char*)data, len);
    }

    inline void Finish() {
        if (!Text_.Data.empty()) {
            switch (State_) {
                case Code:
                    Worker_->DoCode(Text_);

                    break;

                case Preprocessor:
                    Worker_->DoPreprocessor(Text_);

                    break;

                case OneLineComment:
                    Worker_->DoOneLineComment(Text_);

                    break;

                default:
                    ThrowError();
            }
        }

        Worker_->DoEnd();
    }

private:
    inline void ProcessInput(const char* data, size_t len) {
        EState savedState = Code;
        while (len) {
            const char ch = *data;

            if (ch == '\n') {
                ++Line_;
                Column_ = 0;
            } else {
                ++Column_;
            }

#if DEBUG_ME
            Cerr << "char: " << ch << Endl;
            Cerr << "state before: " << (unsigned int)State_ << Endl;
#endif

        retry:
            switch (State_) {
                case Code: {
                    savedState = Code;
                    switch (ch) {
                        case '/':
                            State_ = CommentBegin;

                            break;

                        case '"':
                            Action(ch);
                            State_ = String;

                            break;

                        case '\'':
                            if (QuoteCharIsADigitSeparator()) {
                                Text_.Data += ch;
                                break;
                            }
                            Action(ch);
                            State_ = Character;

                            break;

                        case '#':
                            Action(ch);
                            State_ = Preprocessor;

                            break;

                        default:
                            Text_.Data += ch;

                            break;
                    }

                    break;
                }

                case CommentBegin: {
                    switch (ch) {
                        case '/':
                            State_ = savedState;
                            savedState = Code;
                            Action("//");
                            State_ = OneLineComment;

                            break;

                        case '*':
                            State_ = savedState;
                            Action("/*");
                            State_ = MultiLineComment;

                            break;

                        default:
                            Text_.Data += '/';
                            State_ = savedState;

                            goto retry;
                    }

                    break;
                }

                case OneLineComment: {
                    switch (ch) {
                        case '\n':
                            Action(ch);
                            State_ = Code;

                            break;

                        default:
                            Text_.Data += ch;

                            break;
                    }

                    break;
                }

                case MultiLineComment: {
                    switch (ch) {
                        case '*':
                            Text_.Data += ch;
                            State_ = MultiLineCommentEnd;

                            break;

                        case '\n':
                            Text_.Data += ch;
                            savedState = Code;

                            break;
                        default:
                            Text_.Data += ch;

                            break;
                    }

                    break;
                }

                case MultiLineCommentEnd: {
                    switch (ch) {
                        case '/':
                            Text_.Data += ch;
                            Action();
                            State_ = savedState;

                            break;

                        default:
                            State_ = MultiLineComment;

                            goto retry;
                    }

                    break;
                }

                case String: {
                    switch (ch) {
                        case '"':
                            Text_.Data += ch;

                            if (SkipNext_) {
                                SkipNext_ = false;
                            } else {
                                if (savedState == Code) {
                                    Action();
                                }
                                State_ = savedState;
                            }

                            break;

                        case '\\':
                            Text_.Data += ch;
                            SkipNext_ = !SkipNext_;

                            break;

                        default:
                            Text_.Data += ch;
                            SkipNext_ = false;

                            break;
                    }

                    break;
                }

                case Character: {
                    switch (ch) {
                        case '\'':
                            Text_.Data += ch;

                            if (SkipNext_) {
                                SkipNext_ = false;
                            } else {
                                if (savedState == Code) {
                                    Action();
                                }
                                State_ = savedState;
                            }

                            break;

                        case '\\':
                            Text_.Data += ch;
                            SkipNext_ = !SkipNext_;

                            break;

                        default:
                            Text_.Data += ch;
                            SkipNext_ = false;

                            break;
                    }

                    break;
                }

                case Preprocessor: {
                    savedState = Preprocessor;
                    switch (ch) {
                        case '/':
                            State_ = CommentBegin;

                            break;

                        case '\'':
                            Text_.Data += ch;
                            State_ = Character;

                            break;

                        case '"':
                            Text_.Data += ch;
                            State_ = String;

                            break;
                        case '\n':
                            Text_.Data += ch;

                            if (SkipNext_) {
                                SkipNext_ = false;
                            } else {
                                Action();
                                savedState = Code;
                                State_ = Code;
                            }

                            break;

                        case '\\':
                            Text_.Data += ch;
                            SkipNext_ = true;

                            break;

                        default:
                            Text_.Data += ch;
                            SkipNext_ = false;

                            break;
                    }

                    break;
                }

                default:
                    ThrowError();
            }

#if DEBUG_ME
            Cerr << "state after: " << (unsigned int)State_ << Endl;
#endif

            ++data;
            --len;
        }
    }

    // digit separator in integral literal (ex. 73'709'550'592)
    bool QuoteCharIsADigitSeparator() const {
        const TStringBuf data = Text_.Data;
        if (data.empty()) {
            return false;
        }
        if (!IsAsciiHex(data.back())) {
            return false;
        }
        // check for char literal prefix (ex. `u8'$'`)
        static constexpr TStringBuf literalPrefixes[] {
            "u8",
            "u",
            "U",
            "L",
        };
        for (const TStringBuf& literalPrefix : literalPrefixes) {
            if (TStringBuf prev; data.BeforeSuffix(literalPrefix, prev)) {
                if (!prev.empty() && (IsAsciiAlnum(prev.back()) || prev.back() == '_' || prev.back() == '$')) {
                    // some macro name ends with an `u8` sequence
                    continue;
                }
                // it is a prefixed character literal
                return false;
            }
        }
        return true;
    }

    inline void Action(char ch) {
        Action();
        Text_.Data += ch;
    }

    inline void Action(const char* st) {
        Action();
        Text_.Data += st;
    }

    inline void Action() {
        switch (State_) {
            case Code:
                Worker_->DoCode(Text_);

                break;

            case OneLineComment:
                Worker_->DoOneLineComment(Text_);

                break;

            case MultiLineCommentEnd:
                Worker_->DoMultiLineComment(Text_);

                break;

            case Preprocessor:
                Worker_->DoPreprocessor(Text_);

                break;

            case String:
                Worker_->DoString(Text_);

                break;

            case Character:
                Worker_->DoCharacter(Text_);

                break;

            default:
                ThrowError();
        }

        Text_.Reset();
    }

    inline void ThrowError() const {
        ythrow yexception() << "can not parse source(line = " << (unsigned)Line_ + 1 << ", column = " << (unsigned)Column_ + 1 << ")";
    }

private:
    EState State_;
    TWorker* Worker_;
    TText Text_;
    bool SkipNext_;
    ui64 Line_;
    ui64 Column_;
};

TCppSaxParser::TCppSaxParser(TWorker* worker)
    : Impl_(new TImpl(worker))
{
}

TCppSaxParser::~TCppSaxParser() = default;

void TCppSaxParser::DoWrite(const void* data, size_t len) {
    Impl_->Write(data, len);
}

void TCppSaxParser::DoFinish() {
    Impl_->Finish();
}

TCppSimpleSax::TCppSimpleSax() noexcept {
}

TCppSimpleSax::~TCppSimpleSax() = default;

void TCppSimpleSax::DoCode(const TText& text) {
    static const char char_types[] = {
        2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
        2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1,
        2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};

    static const char CWHITESPACE = 0;
    static const char CIDENTIFIER = 1;
    static const char CSYNTAX = 2;

    enum EState {
        WhiteSpace = CWHITESPACE,
        Identifier = CIDENTIFIER,
        Syntax = CSYNTAX
    };

    EState state = Identifier;
    TText cur(text.Offset);

    for (const auto& it : text.Data) {
        const unsigned char ch = *(const unsigned char*)(&it);
        const char type = char_types[ch];

        switch (state) {
            case Identifier: {
                switch (type) {
                    case CIDENTIFIER:
                        cur.Data += ch;

                        break;

                    default:
                        if (!cur.Data.empty()) {
                            DoIdentifier(cur);
                        }

                        cur.Reset();
                        cur.Data += ch;
                        state = (EState)type;

                        break;
                }

                break;
            }

            case WhiteSpace: {
                switch (type) {
                    case CWHITESPACE:
                        cur.Data += ch;

                        break;

                    default:
                        DoWhiteSpace(cur);
                        cur.Reset();
                        cur.Data += ch;
                        state = (EState)type;

                        break;
                }

                break;
            }

            case Syntax: {
                switch (type) {
                    case CSYNTAX:
                        cur.Data += ch;

                        break;

                    default:
                        DoSyntax(cur);
                        cur.Reset();
                        cur.Data += ch;
                        state = (EState)type;

                        break;
                }

                break;
            }
        }
    }

    if (!cur.Data.empty()) {
        switch (state) {
            case Identifier:
                DoIdentifier(cur);

                break;

            case WhiteSpace:
                DoWhiteSpace(cur);

                break;

            case Syntax:
                DoSyntax(cur);

                break;
        }
    }
}

class TCppFullSax::TImpl {
    typedef THashSet<TString> TKeyWords;

    class TRegExp {
    public:
        inline TRegExp(const char*) {
        }

        inline bool Match(const TString& /*s*/) const noexcept {
            return false;
        }
    };

public:
    inline TImpl()
        : OctNumber_("^[+-]?0[0-7]+$")
        , HexNumber_("^[+-]?0x[0-9A-Fa-f]+$")
        , DecNumber_("^[+-]?[0-9]+$")
        , FltNumber_("^[+-]?[0-9]*\\.[0-9]*$")
    {
        AddKeyword("extern");
        AddKeyword("static");
        AddKeyword("inline");
        AddKeyword("volatile");
        AddKeyword("asm");
        AddKeyword("const");
        AddKeyword("mutable");
        AddKeyword("char");
        AddKeyword("signed");
        AddKeyword("unsigned");
        AddKeyword("int");
        AddKeyword("short");
        AddKeyword("long");
        AddKeyword("double");
        AddKeyword("float");
        AddKeyword("bool");
        AddKeyword("class");
        AddKeyword("struct");
        AddKeyword("union");
        AddKeyword("void");
        AddKeyword("auto");
        AddKeyword("throw");
        AddKeyword("try");
        AddKeyword("catch");
        AddKeyword("for");
        AddKeyword("do");
        AddKeyword("if");
        AddKeyword("else");
        AddKeyword("while");
        AddKeyword("switch");
        AddKeyword("case");
        AddKeyword("default");
        AddKeyword("goto");
        AddKeyword("break");
        AddKeyword("continue");
        AddKeyword("virtual");
        AddKeyword("template");
        AddKeyword("typename");
        AddKeyword("enum");
        AddKeyword("public");
        AddKeyword("private");
        AddKeyword("protected");
        AddKeyword("using");
        AddKeyword("namespace");
        AddKeyword("typedef");
        AddKeyword("true");
        AddKeyword("false");
        AddKeyword("return");
        AddKeyword("new");
        AddKeyword("delete");
        AddKeyword("operator");
        AddKeyword("friend");
        AddKeyword("this");
    }

    inline ~TImpl() = default;

    inline void AddKeyword(const TString& keyword) {
        KeyWords_.insert(keyword);
    }

    inline bool IsKeyword(const TString& s) {
        return KeyWords_.find(s) != KeyWords_.end();
    }

    inline bool IsOctNumber(const TString& s) {
        return OctNumber_.Match(s);
    }

    inline bool IsHexNumber(const TString& s) {
        return HexNumber_.Match(s);
    }

    inline bool IsDecNumber(const TString& s) {
        return DecNumber_.Match(s);
    }

    inline bool IsFloatNumber(const TString& s) {
        return FltNumber_.Match(s);
    }

private:
    const TRegExp OctNumber_;
    const TRegExp HexNumber_;
    const TRegExp DecNumber_;
    const TRegExp FltNumber_;
    TKeyWords KeyWords_;
};

TCppFullSax::TCppFullSax()
    : Impl_(new TImpl())
{
}

TCppFullSax::~TCppFullSax() = default;

void TCppFullSax::AddKeyword(const TString& keyword) {
    Impl_->AddKeyword(keyword);
}

void TCppFullSax::DoIdentifier(const TText& text) {
    if (Impl_->IsKeyword(text.Data)) {
        DoKeyword(text);
    } else if (Impl_->IsOctNumber(text.Data)) {
        DoOctNumber(text);
    } else if (Impl_->IsHexNumber(text.Data)) {
        DoHexNumber(text);
    } else if (Impl_->IsDecNumber(text.Data)) {
        DoDecNumber(text);
    } else if (Impl_->IsFloatNumber(text.Data)) {
        DoFloatNumber(text);
    } else {
        DoName(text);
    }
}

void TCppFullSax::DoEnd() {
}

void TCppFullSax::DoStart() {
}

void TCppFullSax::DoString(const TText&) {
}

void TCppFullSax::DoCharacter(const TText&) {
}

void TCppFullSax::DoWhiteSpace(const TText&) {
}

void TCppFullSax::DoKeyword(const TText&) {
}

void TCppFullSax::DoName(const TText&) {
}

void TCppFullSax::DoOctNumber(const TText&) {
}

void TCppFullSax::DoHexNumber(const TText&) {
}

void TCppFullSax::DoDecNumber(const TText&) {
}

void TCppFullSax::DoFloatNumber(const TText&) {
}

void TCppFullSax::DoSyntax(const TText&) {
}

void TCppFullSax::DoOneLineComment(const TText&) {
}

void TCppFullSax::DoMultiLineComment(const TText&) {
}

void TCppFullSax::DoPreprocessor(const TText&) {
}