#pragma once

#include "public.h"
#include "zigzag.h"

#include <util/generic/vector.h>
#include <util/generic/maybe.h>
#include <util/generic/buffer.h>
#include <util/string/escape.h>
#include <util/string/cast.h>
#include <util/stream/input.h>

namespace NYson {
    namespace NDetail {
        ////////////////////////////////////////////////////////////////////////////////

        //! Indicates the beginning of a list.
        const char BeginListSymbol = '[';
        //! Indicates the end of a list.
        const char EndListSymbol = ']';

        //! Indicates the beginning of a map.
        const char BeginMapSymbol = '{';
        //! Indicates the end of a map.
        const char EndMapSymbol = '}';

        //! Indicates the beginning of an attribute map.
        const char BeginAttributesSymbol = '<';
        //! Indicates the end of an attribute map.
        const char EndAttributesSymbol = '>';

        //! Separates items in lists.
        const char ListItemSeparatorSymbol = ';';
        //! Separates items in maps, attributes.
        const char KeyedItemSeparatorSymbol = ';';
        //! Separates keys from values in maps.
        const char KeyValueSeparatorSymbol = '=';

        //! Indicates an entity.
        const char EntitySymbol = '#';

        //! Indicates end of stream.
        const char EndSymbol = '\0';

        //! Marks the beginning of a binary string literal.
        const char StringMarker = '\x01';
        //! Marks the beginning of a binary i64 literal.
        const char Int64Marker = '\x02';
        //! Marks the beginning of a binary double literal.
        const char DoubleMarker = '\x03';
        //! Marks true and false values of boolean.
        const char FalseMarker = '\x04';
        const char TrueMarker = '\x05';
        //! Marks the beginning of a binary ui64 literal.
        const char Uint64Marker = '\x06';

        ////////////////////////////////////////////////////////////////////////////////

        template <bool EnableLinePositionInfo>
        class TPositionInfo;

        template <>
        class TPositionInfo<true> {
        private:
            int Offset;
            int Line;
            int Column;

        public:
            TPositionInfo()
                : Offset(0)
                , Line(1)
                , Column(1)
            {
            }

            void OnRangeConsumed(const char* begin, const char* end) {
                Offset += end - begin;
                for (auto current = begin; current != end; ++current) {
                    ++Column;
                    if (*current == '\n') { //TODO: memchr
                        ++Line;
                        Column = 1;
                    }
                }
            }
        };

        template <>
        class TPositionInfo<false> {
        private:
            int Offset;

        public:
            TPositionInfo()
                : Offset(0)
            {
            }

            void OnRangeConsumed(const char* begin, const char* end) {
                Offset += end - begin;
            }
        };

        template <class TBlockStream, class TPositionBase>
        class TCharStream
           : public TBlockStream,
              public TPositionBase {
        public:
            TCharStream(const TBlockStream& blockStream)
                : TBlockStream(blockStream)
            {
            }

            bool IsEmpty() const {
                return TBlockStream::Begin() == TBlockStream::End();
            }

            template <bool AllowFinish>
            void Refresh() {
                while (IsEmpty() && !TBlockStream::IsFinished()) {
                    TBlockStream::RefreshBlock();
                }
                if (IsEmpty() && TBlockStream::IsFinished() && !AllowFinish) {
                    ythrow TYsonException() << "Premature end of yson stream";
                }
            }

            void Refresh() {
                return Refresh<false>();
            }

            template <bool AllowFinish>
            char GetChar() {
                Refresh<AllowFinish>();
                return !IsEmpty() ? *TBlockStream::Begin() : '\0';
            }

            char GetChar() {
                return GetChar<false>();
            }

            void Advance(size_t bytes) {
                TPositionBase::OnRangeConsumed(TBlockStream::Begin(), TBlockStream::Begin() + bytes);
                TBlockStream::Advance(bytes);
            }

            size_t Length() const {
                return TBlockStream::End() - TBlockStream::Begin();
            }
        };

        template <class TBaseStream>
        class TCodedStream
           : public TBaseStream {
        private:
            static const int MaxVarintBytes = 10;
            static const int MaxVarint32Bytes = 5;

            const ui8* BeginByte() const {
                return reinterpret_cast<const ui8*>(TBaseStream::Begin());
            }

            const ui8* EndByte() const {
                return reinterpret_cast<const ui8*>(TBaseStream::End());
            }

            // Following functions is an adaptation Protobuf code from coded_stream.cc
            bool ReadVarint32FromArray(ui32* value) {
                // Fast path:  We have enough bytes left in the buffer to guarantee that
                // this read won't cross the end, so we can skip the checks.
                const ui8* ptr = BeginByte();
                ui32 b;
                ui32 result;

                b = *(ptr++);
                result = (b & 0x7F);
                if (!(b & 0x80))
                    goto done;
                b = *(ptr++);
                result |= (b & 0x7F) << 7;
                if (!(b & 0x80))
                    goto done;
                b = *(ptr++);
                result |= (b & 0x7F) << 14;
                if (!(b & 0x80))
                    goto done;
                b = *(ptr++);
                result |= (b & 0x7F) << 21;
                if (!(b & 0x80))
                    goto done;
                b = *(ptr++);
                result |= b << 28;
                if (!(b & 0x80))
                    goto done;

                // If the input is larger than 32 bits, we still need to read it all
                // and discard the high-order bits.

                for (int i = 0; i < MaxVarintBytes - MaxVarint32Bytes; i++) {
                    b = *(ptr++);
                    if (!(b & 0x80))
                        goto done;
                }

                // We have overrun the maximum size of a Varint (10 bytes).  Assume
                // the data is corrupt.
                return false;

            done:
                TBaseStream::Advance(ptr - BeginByte());
                *value = result;
                return true;
            }

            bool ReadVarint32Fallback(ui32* value) {
                if (BeginByte() + MaxVarint32Bytes <= EndByte() ||
                    // Optimization:  If the Varint ends at exactly the end of the buffer,
                    // we can detect that and still use the fast path.
                    (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80)))
                {
                    return ReadVarint32FromArray(value);
                } else {
                    // Really slow case: we will incur the cost of an extra function call here,
                    // but moving this out of line reduces the size of this function, which
                    // improves the common case. In micro benchmarks, this is worth about 10-15%
                    return ReadVarint32Slow(value);
                }
            }

            bool ReadVarint32Slow(ui32* value) {
                ui64 result;
                // Directly invoke ReadVarint64Fallback, since we already tried to optimize
                // for one-byte Varints.
                if (ReadVarint64Fallback(&result)) {
                    *value = static_cast<ui32>(result);
                    return true;
                } else {
                    return false;
                }
            }

            bool ReadVarint64Slow(ui64* value) {
                // Slow path:  This read might cross the end of the buffer, so we
                // need to check and refresh the buffer if and when it does.

                ui64 result = 0;
                int count = 0;
                ui32 b;

                do {
                    if (count == MaxVarintBytes) {
                        return false;
                    }
                    while (BeginByte() == EndByte()) {
                        TBaseStream::Refresh();
                    }
                    b = *BeginByte();
                    result |= static_cast<ui64>(b & 0x7F) << (7 * count);
                    TBaseStream::Advance(1);
                    ++count;
                } while (b & 0x80);

                *value = result;
                return true;
            }

            bool ReadVarint64Fallback(ui64* value) {
                if (BeginByte() + MaxVarintBytes <= EndByte() ||
                    // Optimization:  If the Varint ends at exactly the end of the buffer,
                    // we can detect that and still use the fast path.
                    (BeginByte() < EndByte() && !(EndByte()[-1] & 0x80)))
                {
                    // Fast path:  We have enough bytes left in the buffer to guarantee that
                    // this read won't cross the end, so we can skip the checks.

                    const ui8* ptr = BeginByte();
                    ui32 b;

                    // Splitting into 32-bit pieces gives better performance on 32-bit
                    // processors.
                    ui32 part0 = 0, part1 = 0, part2 = 0;

                    b = *(ptr++);
                    part0 = (b & 0x7F);
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part0 |= (b & 0x7F) << 7;
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part0 |= (b & 0x7F) << 14;
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part0 |= (b & 0x7F) << 21;
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part1 = (b & 0x7F);
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part1 |= (b & 0x7F) << 7;
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part1 |= (b & 0x7F) << 14;
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part1 |= (b & 0x7F) << 21;
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part2 = (b & 0x7F);
                    if (!(b & 0x80))
                        goto done;
                    b = *(ptr++);
                    part2 |= (b & 0x7F) << 7;
                    if (!(b & 0x80))
                        goto done;

                    // We have overrun the maximum size of a Varint (10 bytes).  The data
                    // must be corrupt.
                    return false;

                done:
                    TBaseStream::Advance(ptr - BeginByte());
                    *value = (static_cast<ui64>(part0)) |
                             (static_cast<ui64>(part1) << 28) |
                             (static_cast<ui64>(part2) << 56);
                    return true;
                } else {
                    return ReadVarint64Slow(value);
                }
            }

        public:
            TCodedStream(const TBaseStream& baseStream)
                : TBaseStream(baseStream)
            {
            }

            bool ReadVarint64(ui64* value) {
                if (BeginByte() < EndByte() && *BeginByte() < 0x80) {
                    *value = *BeginByte();
                    TBaseStream::Advance(1);
                    return true;
                } else {
                    return ReadVarint64Fallback(value);
                }
            }

            bool ReadVarint32(ui32* value) {
                if (BeginByte() < EndByte() && *BeginByte() < 0x80) {
                    *value = *BeginByte();
                    TBaseStream::Advance(1);
                    return true;
                } else {
                    return ReadVarint32Fallback(value);
                }
            }
        };

        enum ENumericResult {
            Int64 = 0,
            Uint64 = 1,
            Double = 2
        };

        template <class TBlockStream, bool EnableLinePositionInfo>
        class TLexerBase
           : public TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>> {
        private:
            using TBaseStream = TCodedStream<TCharStream<TBlockStream, TPositionInfo<EnableLinePositionInfo>>>;
            TVector<char> Buffer_;
            TMaybe<ui64> MemoryLimit_;

            void CheckMemoryLimit() {
                if (MemoryLimit_ && Buffer_.capacity() > *MemoryLimit_) {
                    ythrow TYsonException()
                        << "Memory limit exceeded while parsing YSON stream: allocated "
                        << Buffer_.capacity() << ", limit " << (*MemoryLimit_);
                }
            }

        public:
            TLexerBase(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit)
                : TBaseStream(blockStream)
                , MemoryLimit_(memoryLimit)
            {
            }

        protected:
            /// Lexer routines

            template <bool AllowFinish>
            ENumericResult ReadNumeric(TStringBuf* value) {
                Buffer_.clear();
                ENumericResult result = ENumericResult::Int64;
                while (true) {
                    char ch = TBaseStream::template GetChar<AllowFinish>();
                    if (isdigit(ch) || ch == '+' || ch == '-') { // Seems like it can't be '+' or '-'
                        Buffer_.push_back(ch);
                    } else if (ch == '.' || ch == 'e' || ch == 'E') {
                        Buffer_.push_back(ch);
                        result = ENumericResult::Double;
                    } else if (ch == 'u') {
                        Buffer_.push_back(ch);
                        result = ENumericResult::Uint64;
                    } else if (isalpha(ch)) {
                        ythrow TYsonException() << "Unexpected '" << ch << "' in numeric literal";
                    } else {
                        break;
                    }
                    CheckMemoryLimit();
                    TBaseStream::Advance(1);
                }

                *value = TStringBuf(Buffer_.data(), Buffer_.size());
                return result;
            }

            template <bool AllowFinish>
            double ReadNanOrInf() {
                static const TStringBuf nanString = "nan";
                static const TStringBuf infString = "inf";
                static const TStringBuf plusInfString = "+inf";
                static const TStringBuf minusInfString = "-inf";

                TStringBuf expectedString;
                double expectedValue;
                char ch = TBaseStream::template GetChar<AllowFinish>();
                switch (ch) {
                    case '+':
                        expectedString = plusInfString;
                        expectedValue = std::numeric_limits<double>::infinity();
                        break;
                    case '-':
                        expectedString = minusInfString;
                        expectedValue = -std::numeric_limits<double>::infinity();
                        break;
                    case 'i':
                        expectedString = infString;
                        expectedValue = std::numeric_limits<double>::infinity();
                        break;
                    case 'n':
                        expectedString = nanString;
                        expectedValue = std::numeric_limits<double>::quiet_NaN();
                        break;
                    default:
                        ythrow TYsonException() << "Incorrect %-literal prefix: '" << ch << "'";
                }

                for (size_t i = 0; i < expectedString.size(); ++i) {
                    if (expectedString[i] != ch) {
                        ythrow TYsonException()
                            << "Incorrect %-literal prefix "
                            << "'" << expectedString.SubStr(0, i) << ch << "',"
                            << "expected " << expectedString;
                    }
                    TBaseStream::Advance(1);
                    ch = TBaseStream::template GetChar<AllowFinish>();
                }

                return expectedValue;
            }

            void ReadQuotedString(TStringBuf* value) {
                Buffer_.clear();
                while (true) {
                    if (TBaseStream::IsEmpty()) {
                        TBaseStream::Refresh();
                    }
                    char ch = *TBaseStream::Begin();
                    TBaseStream::Advance(1);
                    if (ch != '"') {
                        Buffer_.push_back(ch);
                    } else {
                        // We must count the number of '\' at the end of StringValue
                        // to check if it's not \"
                        int slashCount = 0;
                        int length = Buffer_.size();
                        while (slashCount < length && Buffer_[length - 1 - slashCount] == '\\') {
                            ++slashCount;
                        }
                        if (slashCount % 2 == 0) {
                            break;
                        } else {
                            Buffer_.push_back(ch);
                        }
                    }
                    CheckMemoryLimit();
                }

                auto unquotedValue = UnescapeC(Buffer_.data(), Buffer_.size());
                Buffer_.clear();
                Buffer_.insert(Buffer_.end(), unquotedValue.data(), unquotedValue.data() + unquotedValue.size());
                CheckMemoryLimit();
                *value = TStringBuf(Buffer_.data(), Buffer_.size());
            }

            template <bool AllowFinish>
            void ReadUnquotedString(TStringBuf* value) {
                Buffer_.clear();
                while (true) {
                    char ch = TBaseStream::template GetChar<AllowFinish>();
                    if (isalpha(ch) || isdigit(ch) ||
                        ch == '_' || ch == '-' || ch == '%' || ch == '.') {
                        Buffer_.push_back(ch);
                    } else {
                        break;
                    }
                    CheckMemoryLimit();
                    TBaseStream::Advance(1);
                }
                *value = TStringBuf(Buffer_.data(), Buffer_.size());
            }

            void ReadUnquotedString(TStringBuf* value) {
                return ReadUnquotedString<false>(value);
            }

            void ReadBinaryString(TStringBuf* value) {
                ui32 ulength = 0;
                if (!TBaseStream::ReadVarint32(&ulength)) {
                    ythrow TYsonException() << "Error parsing varint value";
                }

                i32 length = ZigZagDecode32(ulength);
                if (length < 0) {
                    ythrow TYsonException() << "Negative binary string literal length " << length;
                }

                if (TBaseStream::Begin() + length <= TBaseStream::End()) {
                    *value = TStringBuf(TBaseStream::Begin(), length);
                    TBaseStream::Advance(length);
                } else { // reading in Buffer
                    size_t needToRead = length;
                    Buffer_.clear();
                    while (needToRead) {
                        if (TBaseStream::IsEmpty()) {
                            TBaseStream::Refresh();
                            continue;
                        }
                        size_t readingBytes = Min(needToRead, TBaseStream::Length());

                        Buffer_.insert(Buffer_.end(), TBaseStream::Begin(), TBaseStream::Begin() + readingBytes);
                        CheckMemoryLimit();
                        needToRead -= readingBytes;
                        TBaseStream::Advance(readingBytes);
                    }
                    *value = TStringBuf(Buffer_.data(), Buffer_.size());
                }
            }

            template <bool AllowFinish>
            bool ReadBoolean() {
                Buffer_.clear();

                static TStringBuf trueString = "true";
                static TStringBuf falseString = "false";

                auto throwIncorrectBoolean = [&]() {
                    ythrow TYsonException() << "Incorrect boolean string " << TString(Buffer_.data(), Buffer_.size());
                };

                Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
                TBaseStream::Advance(1);
                if (Buffer_[0] == trueString[0]) {
                    for (size_t i = 1; i < trueString.size(); ++i) {
                        Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
                        TBaseStream::Advance(1);
                        if (Buffer_.back() != trueString[i]) {
                            throwIncorrectBoolean();
                        }
                    }
                    return true;
                } else if (Buffer_[0] == falseString[0]) {
                    for (size_t i = 1; i < falseString.size(); ++i) {
                        Buffer_.push_back(TBaseStream::template GetChar<AllowFinish>());
                        TBaseStream::Advance(1);
                        if (Buffer_.back() != falseString[i]) {
                            throwIncorrectBoolean();
                        }
                    }
                    return false;
                } else {
                    throwIncorrectBoolean();
                }

                Y_FAIL("unreachable");
                ;
            }

            void ReadBinaryInt64(i64* result) {
                ui64 uvalue;
                if (!TBaseStream::ReadVarint64(&uvalue)) {
                    ythrow TYsonException() << "Error parsing varint value";
                }
                *result = ZigZagDecode64(uvalue);
            }

            void ReadBinaryUint64(ui64* result) {
                ui64 uvalue;
                if (!TBaseStream::ReadVarint64(&uvalue)) {
                    ythrow TYsonException() << "Error parsing varint value";
                }
                *result = uvalue;
            }

            void ReadBinaryDouble(double* value) {
                size_t needToRead = sizeof(double);

                while (needToRead != 0) {
                    if (TBaseStream::IsEmpty()) {
                        TBaseStream::Refresh();
                        continue;
                    }

                    size_t chunkSize = Min(needToRead, TBaseStream::Length());
                    if (chunkSize == 0) {
                        ythrow TYsonException() << "Error parsing binary double literal";
                    }
                    std::copy(
                        TBaseStream::Begin(),
                        TBaseStream::Begin() + chunkSize,
                        reinterpret_cast<char*>(value) + (sizeof(double) - needToRead));
                    needToRead -= chunkSize;
                    TBaseStream::Advance(chunkSize);
                }
            }

            /// Helpers
            void SkipCharToken(char symbol) {
                char ch = SkipSpaceAndGetChar();
                if (ch != symbol) {
                    ythrow TYsonException() << "Expected '" << symbol << "' but found '" << ch << "'";
                }

                TBaseStream::Advance(1);
            }

            static bool IsSpaceFast(char ch) {
                static const ui8 lookupTable[] =
                    {
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
                return lookupTable[static_cast<ui8>(ch)];
            }

            template <bool AllowFinish>
            char SkipSpaceAndGetChar() {
                if (!TBaseStream::IsEmpty()) {
                    char ch = *TBaseStream::Begin();
                    if (!IsSpaceFast(ch)) {
                        return ch;
                    }
                }
                return SkipSpaceAndGetCharFallback<AllowFinish>();
            }

            char SkipSpaceAndGetChar() {
                return SkipSpaceAndGetChar<false>();
            }

            template <bool AllowFinish>
            char SkipSpaceAndGetCharFallback() {
                while (true) {
                    if (TBaseStream::IsEmpty()) {
                        if (TBaseStream::IsFinished()) {
                            return '\0';
                        }
                        TBaseStream::template Refresh<AllowFinish>();
                        continue;
                    }
                    if (!IsSpaceFast(*TBaseStream::Begin())) {
                        break;
                    }
                    TBaseStream::Advance(1);
                }
                return TBaseStream::template GetChar<AllowFinish>();
            }
        };

        ////////////////////////////////////////////////////////////////////////////////

    }

    ////////////////////////////////////////////////////////////////////////////////

    class TStringReader {
    private:
        const char* BeginPtr;
        const char* EndPtr;

    public:
        TStringReader()
            : BeginPtr(nullptr)
            , EndPtr(nullptr)
        {
        }

        TStringReader(const char* begin, const char* end)
            : BeginPtr(begin)
            , EndPtr(end)
        {
        }

        const char* Begin() const {
            return BeginPtr;
        }

        const char* End() const {
            return EndPtr;
        }

        void RefreshBlock() {
            Y_FAIL("unreachable");
        }

        void Advance(size_t bytes) {
            BeginPtr += bytes;
        }

        bool IsFinished() const {
            return true;
        }

        void SetBuffer(const char* begin, const char* end) {
            BeginPtr = begin;
            EndPtr = end;
        }
    };

    ////////////////////////////////////////////////////////////////////////////////

    class TStreamReader {
    public:
        TStreamReader(
            IInputStream* stream,
            char* buffer,
            size_t bufferSize)
            : Stream(stream)
            , Buffer(buffer)
            , BufferSize(bufferSize)
        {
            BeginPtr = EndPtr = Buffer;
            FinishFlag = false;
        }

        const char* Begin() const {
            return BeginPtr;
        }

        const char* End() const {
            return EndPtr;
        }

        void RefreshBlock() {
            size_t bytes = Stream->Read(Buffer, BufferSize);
            BeginPtr = Buffer;
            EndPtr = Buffer + bytes;
            FinishFlag = (bytes == 0);
        }

        void Advance(size_t bytes) {
            BeginPtr += bytes;
        }

        bool IsFinished() const {
            return FinishFlag;
        }

    private:
        IInputStream* Stream;
        char* Buffer;
        size_t BufferSize;

        const char* BeginPtr;
        const char* EndPtr;
        bool FinishFlag;
    };

    ////////////////////////////////////////////////////////////////////////////////

} // namespace NYson