diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/yson/lexer_detail.h | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/yson/lexer_detail.h')
-rw-r--r-- | library/cpp/yson/lexer_detail.h | 492 |
1 files changed, 246 insertions, 246 deletions
diff --git a/library/cpp/yson/lexer_detail.h b/library/cpp/yson/lexer_detail.h index 0bba30acdd..be61289cb1 100644 --- a/library/cpp/yson/lexer_detail.h +++ b/library/cpp/yson/lexer_detail.h @@ -4,58 +4,58 @@ #include "token.h" namespace NYson { - //////////////////////////////////////////////////////////////////////////////// - - namespace NDetail { - /*! \internal */ - //////////////////////////////////////////////////////////////////////////////// - - // EReadStartCase tree representation: - // Root = xb - // BinaryStringOrOtherSpecialToken = x0b - // BinaryString = 00b - // OtherSpecialToken = 10b - // Other = x1b - // BinaryScalar = xx01b - // BinaryInt64 = 0001b - // BinaryDouble = 0101b - // BinaryFalse = 1001b - // BinaryTrue = 1101b - // Other = xxx11b - // Quote = 00011b - // DigitOrMinus = 00111b - // String = 01011b - // Space = 01111b - // Plus = 10011b - // None = 10111b - // Percent = 11011b + //////////////////////////////////////////////////////////////////////////////// + + namespace NDetail { + /*! \internal */ + //////////////////////////////////////////////////////////////////////////////// + + // EReadStartCase tree representation: + // Root = xb + // BinaryStringOrOtherSpecialToken = x0b + // BinaryString = 00b + // OtherSpecialToken = 10b + // Other = x1b + // BinaryScalar = xx01b + // BinaryInt64 = 0001b + // BinaryDouble = 0101b + // BinaryFalse = 1001b + // BinaryTrue = 1101b + // Other = xxx11b + // Quote = 00011b + // DigitOrMinus = 00111b + // String = 01011b + // Space = 01111b + // Plus = 10011b + // None = 10111b + // Percent = 11011b enum EReadStartCase : unsigned { - BinaryString = 0, // = 00b - OtherSpecialToken = 2, // = 10b - - BinaryInt64 = 1, // = 001b - BinaryDouble = 5, // = 101b - BinaryFalse = 9, // = 1001b - BinaryTrue = 13, // = 1101b - BinaryUint64 = 17, // = 10001b - - Quote = 3, // = 00011b - DigitOrMinus = 7, // = 00111b - String = 11, // = 01011b - Space = 15, // = 01111b - Plus = 19, // = 10011b - None = 23, // = 10111b - Percent = 27 // = 11011b - }; - - template <class TBlockStream, bool EnableLinePositionInfo> - class TLexer - : public TLexerBase<TBlockStream, EnableLinePositionInfo> { - private: - using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; - - static EReadStartCase GetStartState(char ch) { + BinaryString = 0, // = 00b + OtherSpecialToken = 2, // = 10b + + BinaryInt64 = 1, // = 001b + BinaryDouble = 5, // = 101b + BinaryFalse = 9, // = 1001b + BinaryTrue = 13, // = 1101b + BinaryUint64 = 17, // = 10001b + + Quote = 3, // = 00011b + DigitOrMinus = 7, // = 00111b + String = 11, // = 01011b + Space = 15, // = 01111b + Plus = 19, // = 10011b + None = 23, // = 10111b + Percent = 27 // = 11011b + }; + + template <class TBlockStream, bool EnableLinePositionInfo> + class TLexer + : public TLexerBase<TBlockStream, EnableLinePositionInfo> { + private: + using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; + + static EReadStartCase GetStartState(char ch) { #define NN EReadStartCase::None #define BS EReadStartCase::BinaryString #define BI EReadStartCase::BinaryInt64 @@ -71,68 +71,68 @@ namespace NYson { #define PC EReadStartCase::Percent #define TT(name) (EReadStartCase(static_cast<ui8>(ETokenType::name) << 2) | EReadStartCase::OtherSpecialToken) - static const ui8 lookupTable[] = - { - NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - - // 32 - SP, // ' ' - NN, // '!' - QU, // '"' - TT(Hash), // '#' - NN, // '$' - PC, // '%' - NN, // '&' - NN, // "'" - TT(LeftParenthesis), // '(' - TT(RightParenthesis), // ')' - NN, // '*' - PL, // '+' - TT(Comma), // ',' - DM, // '-' - NN, // '.' - NN, // '/' - - // 48 - DM, DM, DM, DM, DM, DM, DM, DM, DM, DM, // '0' - '9' - TT(Colon), // ':' - TT(Semicolon), // ';' - TT(LeftAngle), // '<' - TT(Equals), // '=' - TT(RightAngle), // '>' - NN, // '?' - - // 64 - NN, // '@' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z' - TT(LeftBracket), // '[' - NN, // '\' - TT(RightBracket), // ']' - NN, // '^' - ST, // '_' - - // 96 - NN, // '`' - - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm' - ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z' - TT(LeftBrace), // '{' - NN, // '|' - TT(RightBrace), // '}' - NN, // '~' - NN, // '^?' non-printable - // 128 - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, - NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN}; + static const ui8 lookupTable[] = + { + NN, BS, BI, BD, BF, BT, BU, NN, NN, SP, SP, SP, SP, SP, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + // 32 + SP, // ' ' + NN, // '!' + QU, // '"' + TT(Hash), // '#' + NN, // '$' + PC, // '%' + NN, // '&' + NN, // "'" + TT(LeftParenthesis), // '(' + TT(RightParenthesis), // ')' + NN, // '*' + PL, // '+' + TT(Comma), // ',' + DM, // '-' + NN, // '.' + NN, // '/' + + // 48 + DM, DM, DM, DM, DM, DM, DM, DM, DM, DM, // '0' - '9' + TT(Colon), // ':' + TT(Semicolon), // ';' + TT(LeftAngle), // '<' + TT(Equals), // '=' + TT(RightAngle), // '>' + NN, // '?' + + // 64 + NN, // '@' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'A' - 'M' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'N' - 'Z' + TT(LeftBracket), // '[' + NN, // '\' + TT(RightBracket), // ']' + NN, // '^' + ST, // '_' + + // 96 + NN, // '`' + + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'a' - 'm' + ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, // 'n' - 'z' + TT(LeftBrace), // '{' + NN, // '|' + TT(RightBrace), // '}' + NN, // '~' + NN, // '^?' non-printable + // 128 + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, + NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN, NN}; #undef NN #undef BS @@ -144,153 +144,153 @@ namespace NYson { #undef PL #undef QU #undef TT - return static_cast<EReadStartCase>(lookupTable[static_cast<ui8>(ch)]); - } - - public: - TLexer(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) - : TBase(blockStream, memoryLimit) - { - } - - void GetToken(TToken* token) { - char ch1 = TBase::SkipSpaceAndGetChar(); - auto state = GetStartState(ch1); - auto stateBits = static_cast<unsigned>(state); - - if (ch1 == '\0') { - *token = TToken::EndOfStream; - return; - } - - if (stateBits & 1) { // Other = x1b - if (stateBits & 1 << 1) { // Other = xxx11b - if (state == EReadStartCase::Quote) { - TStringBuf value; - TBase::Advance(1); - TBase::ReadQuotedString(&value); - *token = TToken(value); - } else if (state == EReadStartCase::DigitOrMinus) { - ReadNumeric<true>(token); - } else if (state == EReadStartCase::Plus) { - TBase::Advance(1); - - char ch2 = TBase::template GetChar<true>(); - - if (!isdigit(ch2)) { - *token = TToken(ETokenType::Plus); - } else { - ReadNumeric<true>(token); - } - } else if (state == EReadStartCase::String) { - TStringBuf value; - TBase::template ReadUnquotedString<true>(&value); - *token = TToken(value); - } else if (state == EReadStartCase::Percent) { - TBase::Advance(1); - char ch3 = TBase::template GetChar<true>(); - if (ch3 == 't' || ch3 == 'f') { - *token = TToken(TBase::template ReadBoolean<true>()); - } else { - *token = TToken(TBase::template ReadNanOrInf<true>()); - } - } else { // None - Y_ASSERT(state == EReadStartCase::None); + return static_cast<EReadStartCase>(lookupTable[static_cast<ui8>(ch)]); + } + + public: + TLexer(const TBlockStream& blockStream, TMaybe<ui64> memoryLimit) + : TBase(blockStream, memoryLimit) + { + } + + void GetToken(TToken* token) { + char ch1 = TBase::SkipSpaceAndGetChar(); + auto state = GetStartState(ch1); + auto stateBits = static_cast<unsigned>(state); + + if (ch1 == '\0') { + *token = TToken::EndOfStream; + return; + } + + if (stateBits & 1) { // Other = x1b + if (stateBits & 1 << 1) { // Other = xxx11b + if (state == EReadStartCase::Quote) { + TStringBuf value; + TBase::Advance(1); + TBase::ReadQuotedString(&value); + *token = TToken(value); + } else if (state == EReadStartCase::DigitOrMinus) { + ReadNumeric<true>(token); + } else if (state == EReadStartCase::Plus) { + TBase::Advance(1); + + char ch2 = TBase::template GetChar<true>(); + + if (!isdigit(ch2)) { + *token = TToken(ETokenType::Plus); + } else { + ReadNumeric<true>(token); + } + } else if (state == EReadStartCase::String) { + TStringBuf value; + TBase::template ReadUnquotedString<true>(&value); + *token = TToken(value); + } else if (state == EReadStartCase::Percent) { + TBase::Advance(1); + char ch3 = TBase::template GetChar<true>(); + if (ch3 == 't' || ch3 == 'f') { + *token = TToken(TBase::template ReadBoolean<true>()); + } else { + *token = TToken(TBase::template ReadNanOrInf<true>()); + } + } else { // None + Y_ASSERT(state == EReadStartCase::None); ythrow TYsonException() << "Unexpected " << ch1; - } - } else { // BinaryScalar = x01b - TBase::Advance(1); - if (state == EReadStartCase::BinaryDouble) { - double value; - TBase::ReadBinaryDouble(&value); - *token = TToken(value); - } else if (state == EReadStartCase::BinaryInt64) { - i64 value; - TBase::ReadBinaryInt64(&value); - *token = TToken(value); - } else if (state == EReadStartCase::BinaryUint64) { - ui64 value; - TBase::ReadBinaryUint64(&value); - *token = TToken(value); - } else if (state == EReadStartCase::BinaryFalse) { - *token = TToken(false); - } else if (state == EReadStartCase::BinaryTrue) { - *token = TToken(true); - } else { - Y_FAIL("unreachable"); - } + } + } else { // BinaryScalar = x01b + TBase::Advance(1); + if (state == EReadStartCase::BinaryDouble) { + double value; + TBase::ReadBinaryDouble(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryInt64) { + i64 value; + TBase::ReadBinaryInt64(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryUint64) { + ui64 value; + TBase::ReadBinaryUint64(&value); + *token = TToken(value); + } else if (state == EReadStartCase::BinaryFalse) { + *token = TToken(false); + } else if (state == EReadStartCase::BinaryTrue) { + *token = TToken(true); + } else { + Y_FAIL("unreachable"); + } } - } else { // BinaryStringOrOtherSpecialToken = x0b + } else { // BinaryStringOrOtherSpecialToken = x0b TBase::Advance(1); - if (stateBits & 1 << 1) { // OtherSpecialToken = 10b - Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::OtherSpecialToken)); - *token = TToken(ETokenType(stateBits >> 2)); - } else { // BinaryString = 00b - Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::BinaryString)); - TStringBuf value; - TBase::ReadBinaryString(&value); - *token = TToken(value); + if (stateBits & 1 << 1) { // OtherSpecialToken = 10b + Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::OtherSpecialToken)); + *token = TToken(ETokenType(stateBits >> 2)); + } else { // BinaryString = 00b + Y_ASSERT((stateBits & 3) == static_cast<unsigned>(EReadStartCase::BinaryString)); + TStringBuf value; + TBase::ReadBinaryString(&value); + *token = TToken(value); } } } - template <bool AllowFinish> - void ReadNumeric(TToken* token) { - TStringBuf valueBuffer; - ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); + template <bool AllowFinish> + void ReadNumeric(TToken* token) { + TStringBuf valueBuffer; + ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); - if (numericResult == ENumericResult::Double) { - try { - *token = TToken(FromString<double>(valueBuffer)); - } catch (yexception&) { + if (numericResult == ENumericResult::Double) { + try { + *token = TToken(FromString<double>(valueBuffer)); + } catch (yexception&) { ythrow TYsonException() << "Error parsing double literal " << valueBuffer; - } - } else if (numericResult == ENumericResult::Int64) { - try { - *token = TToken(FromString<i64>(valueBuffer)); - } catch (yexception&) { + } + } else if (numericResult == ENumericResult::Int64) { + try { + *token = TToken(FromString<i64>(valueBuffer)); + } catch (yexception&) { ythrow TYsonException() << "Error parsing int64 literal " << valueBuffer; - } - } else if (numericResult == ENumericResult::Uint64) { - try { - *token = TToken(FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1))); - } catch (yexception&) { + } + } else if (numericResult == ENumericResult::Uint64) { + try { + *token = TToken(FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1))); + } catch (yexception&) { ythrow TYsonException() << "Error parsing uint64 literal " << valueBuffer; - } - } + } + } } - }; - //////////////////////////////////////////////////////////////////////////////// - /*! \endinternal */ + }; + //////////////////////////////////////////////////////////////////////////////// + /*! \endinternal */ } - class TStatelessYsonLexerImplBase { - public: - virtual size_t GetToken(const TStringBuf& data, TToken* token) = 0; - - virtual ~TStatelessYsonLexerImplBase() { - } - }; - - template <bool EnableLinePositionInfo> - class TStatelesYsonLexerImpl: public TStatelessYsonLexerImplBase { - private: - using TLexer = NDetail::TLexer<TStringReader, EnableLinePositionInfo>; - TLexer Lexer; - - public: - TStatelesYsonLexerImpl() - : Lexer(TStringReader(), Nothing()) - { - } - - size_t GetToken(const TStringBuf& data, TToken* token) override { - Lexer.SetBuffer(data.begin(), data.end()); - Lexer.GetToken(token); - return Lexer.Begin() - data.begin(); - } - }; - - //////////////////////////////////////////////////////////////////////////////// + class TStatelessYsonLexerImplBase { + public: + virtual size_t GetToken(const TStringBuf& data, TToken* token) = 0; + + virtual ~TStatelessYsonLexerImplBase() { + } + }; + + template <bool EnableLinePositionInfo> + class TStatelesYsonLexerImpl: public TStatelessYsonLexerImplBase { + private: + using TLexer = NDetail::TLexer<TStringReader, EnableLinePositionInfo>; + TLexer Lexer; + + public: + TStatelesYsonLexerImpl() + : Lexer(TStringReader(), Nothing()) + { + } + + size_t GetToken(const TStringBuf& data, TToken* token) override { + Lexer.SetBuffer(data.begin(), data.end()); + Lexer.GetToken(token); + return Lexer.Begin() - data.begin(); + } + }; + + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson |