diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/yson/parser_detail.h | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/yson/parser_detail.h')
-rw-r--r-- | library/cpp/yson/parser_detail.h | 646 |
1 files changed, 323 insertions, 323 deletions
diff --git a/library/cpp/yson/parser_detail.h b/library/cpp/yson/parser_detail.h index 44223caf12..a4cfdc71da 100644 --- a/library/cpp/yson/parser_detail.h +++ b/library/cpp/yson/parser_detail.h @@ -3,379 +3,379 @@ #include "detail.h" namespace NYson { - namespace NDetail { - //////////////////////////////////////////////////////////////////////////////// - - template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> - class TParser - : public TLexerBase<TBlockStream, EnableLinePositionInfo> { - private: - using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; - TConsumer* Consumer; - - public: - TParser(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) - : TBase(blockStream, memoryLimit) - , Consumer(consumer) - { - } - - void DoParse(EYsonType ysonType) { - switch (ysonType) { + namespace NDetail { + //////////////////////////////////////////////////////////////////////////////// + + template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> + class TParser + : public TLexerBase<TBlockStream, EnableLinePositionInfo> { + private: + using TBase = TLexerBase<TBlockStream, EnableLinePositionInfo>; + TConsumer* Consumer; + + public: + TParser(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) + : TBase(blockStream, memoryLimit) + , Consumer(consumer) + { + } + + void DoParse(EYsonType ysonType) { + switch (ysonType) { case ::NYson::EYsonType::Node: - ParseNode<true>(); - break; + ParseNode<true>(); + break; case ::NYson::EYsonType::ListFragment: - ParseListFragment<true>(EndSymbol); - break; + ParseListFragment<true>(EndSymbol); + break; case ::NYson::EYsonType::MapFragment: - ParseMapFragment<true>(EndSymbol); - break; + ParseMapFragment<true>(EndSymbol); + break; - default: - Y_FAIL("unreachable"); - } + default: + Y_FAIL("unreachable"); + } - while (!(TBase::IsFinished() && TBase::IsEmpty())) { - if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) { + while (!(TBase::IsFinished() && TBase::IsEmpty())) { + if (TBase::template SkipSpaceAndGetChar<true>() != EndSymbol) { ythrow TYsonException() << "Stray '" << (*TBase::Begin()) << "' found"; - } else if (!TBase::IsEmpty()) { - TBase::Advance(1); - } - } - } - - bool DoParseListFragment(bool first) { - bool ret = first ? first : ParseListSeparator<true>(EndSymbol); - return ret && ParseListItem<true>(EndSymbol); - } - - void ParseAttributes() { - Consumer->OnBeginAttributes(); - ParseMapFragment(EndAttributesSymbol); - TBase::SkipCharToken(EndAttributesSymbol); - Consumer->OnEndAttributes(); - } - - void ParseMap() { - Consumer->OnBeginMap(); - ParseMapFragment(EndMapSymbol); - TBase::SkipCharToken(EndMapSymbol); - Consumer->OnEndMap(); - } - - void ParseList() { - Consumer->OnBeginList(); - ParseListFragment(EndListSymbol); - TBase::SkipCharToken(EndListSymbol); - Consumer->OnEndList(); + } else if (!TBase::IsEmpty()) { + TBase::Advance(1); + } + } + } + + bool DoParseListFragment(bool first) { + bool ret = first ? first : ParseListSeparator<true>(EndSymbol); + return ret && ParseListItem<true>(EndSymbol); } - template <bool AllowFinish> - void ParseNode() { - return ParseNode<AllowFinish>(TBase::SkipSpaceAndGetChar()); - } - - template <bool AllowFinish> - void ParseNode(char ch) { - if (ch == BeginAttributesSymbol) { - TBase::Advance(1); - ParseAttributes(); - ch = TBase::SkipSpaceAndGetChar(); - } - - switch (ch) { - case BeginMapSymbol: - TBase::Advance(1); - ParseMap(); - break; - - case BeginListSymbol: - TBase::Advance(1); - ParseList(); - break; - - case '"': { - TBase::Advance(1); - TStringBuf value; - TBase::ReadQuotedString(&value); - Consumer->OnStringScalar(value); - break; - } - case StringMarker: { - TBase::Advance(1); - TStringBuf value; - TBase::ReadBinaryString(&value); - Consumer->OnStringScalar(value); - break; - } - case Int64Marker: { - TBase::Advance(1); - i64 value; - TBase::ReadBinaryInt64(&value); - Consumer->OnInt64Scalar(value); - break; - } - case Uint64Marker: { - TBase::Advance(1); - ui64 value; - TBase::ReadBinaryUint64(&value); - Consumer->OnUint64Scalar(value); - break; - } - case DoubleMarker: { - TBase::Advance(1); - double value; - TBase::ReadBinaryDouble(&value); - Consumer->OnDoubleScalar(value); - break; - } - case FalseMarker: { - TBase::Advance(1); - Consumer->OnBooleanScalar(false); - break; - } - case TrueMarker: { - TBase::Advance(1); - Consumer->OnBooleanScalar(true); - break; - } - case EntitySymbol: - TBase::Advance(1); - Consumer->OnEntity(); - break; - - default: { - if (isdigit((unsigned char)ch) || ch == '-' || ch == '+') { // case of '+' is handled in AfterPlus state - ReadNumeric<AllowFinish>(); - } else if (isalpha((unsigned char)ch) || ch == '_') { - TStringBuf value; - TBase::template ReadUnquotedString<AllowFinish>(&value); - Consumer->OnStringScalar(value); - } else if (ch == '%') { - TBase::Advance(1); - ch = TBase::template GetChar<AllowFinish>(); - if (ch == 't' || ch == 'f') { - Consumer->OnBooleanScalar(TBase::template ReadBoolean<AllowFinish>()); - } else { - Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>()); - } - } else { + void ParseAttributes() { + Consumer->OnBeginAttributes(); + ParseMapFragment(EndAttributesSymbol); + TBase::SkipCharToken(EndAttributesSymbol); + Consumer->OnEndAttributes(); + } + + void ParseMap() { + Consumer->OnBeginMap(); + ParseMapFragment(EndMapSymbol); + TBase::SkipCharToken(EndMapSymbol); + Consumer->OnEndMap(); + } + + void ParseList() { + Consumer->OnBeginList(); + ParseListFragment(EndListSymbol); + TBase::SkipCharToken(EndListSymbol); + Consumer->OnEndList(); + } + + template <bool AllowFinish> + void ParseNode() { + return ParseNode<AllowFinish>(TBase::SkipSpaceAndGetChar()); + } + + template <bool AllowFinish> + void ParseNode(char ch) { + if (ch == BeginAttributesSymbol) { + TBase::Advance(1); + ParseAttributes(); + ch = TBase::SkipSpaceAndGetChar(); + } + + switch (ch) { + case BeginMapSymbol: + TBase::Advance(1); + ParseMap(); + break; + + case BeginListSymbol: + TBase::Advance(1); + ParseList(); + break; + + case '"': { + TBase::Advance(1); + TStringBuf value; + TBase::ReadQuotedString(&value); + Consumer->OnStringScalar(value); + break; + } + case StringMarker: { + TBase::Advance(1); + TStringBuf value; + TBase::ReadBinaryString(&value); + Consumer->OnStringScalar(value); + break; + } + case Int64Marker: { + TBase::Advance(1); + i64 value; + TBase::ReadBinaryInt64(&value); + Consumer->OnInt64Scalar(value); + break; + } + case Uint64Marker: { + TBase::Advance(1); + ui64 value; + TBase::ReadBinaryUint64(&value); + Consumer->OnUint64Scalar(value); + break; + } + case DoubleMarker: { + TBase::Advance(1); + double value; + TBase::ReadBinaryDouble(&value); + Consumer->OnDoubleScalar(value); + break; + } + case FalseMarker: { + TBase::Advance(1); + Consumer->OnBooleanScalar(false); + break; + } + case TrueMarker: { + TBase::Advance(1); + Consumer->OnBooleanScalar(true); + break; + } + case EntitySymbol: + TBase::Advance(1); + Consumer->OnEntity(); + break; + + default: { + if (isdigit((unsigned char)ch) || ch == '-' || ch == '+') { // case of '+' is handled in AfterPlus state + ReadNumeric<AllowFinish>(); + } else if (isalpha((unsigned char)ch) || ch == '_') { + TStringBuf value; + TBase::template ReadUnquotedString<AllowFinish>(&value); + Consumer->OnStringScalar(value); + } else if (ch == '%') { + TBase::Advance(1); + ch = TBase::template GetChar<AllowFinish>(); + if (ch == 't' || ch == 'f') { + Consumer->OnBooleanScalar(TBase::template ReadBoolean<AllowFinish>()); + } else { + Consumer->OnDoubleScalar(TBase::template ReadNanOrInf<AllowFinish>()); + } + } else { ythrow TYsonException() << "Unexpected '" << ch << "' while parsing node"; - } - } - } + } + } + } } - - void ParseKey() { - return ParseKey(TBase::SkipSpaceAndGetChar()); + + void ParseKey() { + return ParseKey(TBase::SkipSpaceAndGetChar()); } - - void ParseKey(char ch) { - switch (ch) { - case '"': { - TBase::Advance(1); - TStringBuf value; - TBase::ReadQuotedString(&value); - Consumer->OnKeyedItem(value); - break; - } - case StringMarker: { - TBase::Advance(1); - TStringBuf value; - TBase::ReadBinaryString(&value); - Consumer->OnKeyedItem(value); - break; - } - default: { - if (isalpha(ch) || ch == '_') { - TStringBuf value; - TBase::ReadUnquotedString(&value); - Consumer->OnKeyedItem(value); - } else { + + void ParseKey(char ch) { + switch (ch) { + case '"': { + TBase::Advance(1); + TStringBuf value; + TBase::ReadQuotedString(&value); + Consumer->OnKeyedItem(value); + break; + } + case StringMarker: { + TBase::Advance(1); + TStringBuf value; + TBase::ReadBinaryString(&value); + Consumer->OnKeyedItem(value); + break; + } + default: { + if (isalpha(ch) || ch == '_') { + TStringBuf value; + TBase::ReadUnquotedString(&value); + Consumer->OnKeyedItem(value); + } else { ythrow TYsonException() << "Unexpected '" << ch << "' while parsing key"; - } - } - } + } + } + } } - template <bool AllowFinish> - void ParseMapFragment(char endSymbol) { - char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - while (ch != endSymbol) { - ParseKey(ch); - ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - if (ch == KeyValueSeparatorSymbol) { - TBase::Advance(1); + template <bool AllowFinish> + void ParseMapFragment(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + while (ch != endSymbol) { + ParseKey(ch); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == KeyValueSeparatorSymbol) { + TBase::Advance(1); } else { ythrow TYsonException() << "Expected '" << KeyValueSeparatorSymbol << "' but '" << ch << "' found"; } - ParseNode<AllowFinish>(); - ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - if (ch == KeyedItemSeparatorSymbol) { - TBase::Advance(1); - ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - } else if (ch != endSymbol) { + ParseNode<AllowFinish>(); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == KeyedItemSeparatorSymbol) { + TBase::Advance(1); + ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + } else if (ch != endSymbol) { ythrow TYsonException() << "Expected '" << KeyedItemSeparatorSymbol << "' or '" << endSymbol << "' but '" << ch << "' found"; - } + } } } - void ParseMapFragment(char endSymbol) { - ParseMapFragment<false>(endSymbol); + void ParseMapFragment(char endSymbol) { + ParseMapFragment<false>(endSymbol); + } + + template <bool AllowFinish> + bool ParseListItem(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch != endSymbol) { + Consumer->OnListItem(); + ParseNode<AllowFinish>(ch); + return true; + } + return false; } - - template <bool AllowFinish> - bool ParseListItem(char endSymbol) { - char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - if (ch != endSymbol) { - Consumer->OnListItem(); - ParseNode<AllowFinish>(ch); - return true; - } - return false; - } - - template <bool AllowFinish> - bool ParseListSeparator(char endSymbol) { - char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); - if (ch == ListItemSeparatorSymbol) { - TBase::Advance(1); - return true; - } else if (ch != endSymbol) { + + template <bool AllowFinish> + bool ParseListSeparator(char endSymbol) { + char ch = TBase::template SkipSpaceAndGetChar<AllowFinish>(); + if (ch == ListItemSeparatorSymbol) { + TBase::Advance(1); + return true; + } else if (ch != endSymbol) { ythrow TYsonException() << "Expected '" << ListItemSeparatorSymbol << "' or '" << endSymbol << "' but '" << ch << "' found"; - } - return false; + } + return false; } - - template <bool AllowFinish> - void ParseListFragment(char endSymbol) { - while (ParseListItem<AllowFinish>(endSymbol) && ParseListSeparator<AllowFinish>(endSymbol)) { + + template <bool AllowFinish> + void ParseListFragment(char endSymbol) { + while (ParseListItem<AllowFinish>(endSymbol) && ParseListSeparator<AllowFinish>(endSymbol)) { } } - void ParseListFragment(char endSymbol) { - ParseListFragment<false>(endSymbol); + void ParseListFragment(char endSymbol) { + ParseListFragment<false>(endSymbol); } - - template <bool AllowFinish> - void ReadNumeric() { - TStringBuf valueBuffer; - ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); - - if (numericResult == ENumericResult::Double) { - double value; - try { - value = FromString<double>(valueBuffer); - } catch (yexception& e) { - // This exception is wrapped in parser. + + template <bool AllowFinish> + void ReadNumeric() { + TStringBuf valueBuffer; + ENumericResult numericResult = TBase::template ReadNumeric<AllowFinish>(&valueBuffer); + + if (numericResult == ENumericResult::Double) { + double value; + try { + value = FromString<double>(valueBuffer); + } catch (yexception& e) { + // This exception is wrapped in parser. ythrow TYsonException() << "Failed to parse double literal '" << valueBuffer << "'" << e; - } - Consumer->OnDoubleScalar(value); - } else if (numericResult == ENumericResult::Int64) { - i64 value; - try { - value = FromString<i64>(valueBuffer); - } catch (yexception& e) { - // This exception is wrapped in parser. + } + Consumer->OnDoubleScalar(value); + } else if (numericResult == ENumericResult::Int64) { + i64 value; + try { + value = FromString<i64>(valueBuffer); + } catch (yexception& e) { + // This exception is wrapped in parser. ythrow TYsonException() << "Failed to parse int64 literal '" << valueBuffer << "'" << e; - } - Consumer->OnInt64Scalar(value); - } else if (numericResult == ENumericResult::Uint64) { - ui64 value; - try { - value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1)); - } catch (yexception& e) { - // This exception is wrapped in parser. + } + Consumer->OnInt64Scalar(value); + } else if (numericResult == ENumericResult::Uint64) { + ui64 value; + try { + value = FromString<ui64>(valueBuffer.SubStr(0, valueBuffer.size() - 1)); + } catch (yexception& e) { + // This exception is wrapped in parser. ythrow TYsonException() << "Failed to parse uint64 literal '" << valueBuffer << "'" << e; - } - Consumer->OnUint64Scalar(value); - } + } + Consumer->OnUint64Scalar(value); + } } - }; + }; - //////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// } - template <class TConsumer, class TBlockStream> - void ParseYsonStreamImpl( - const TBlockStream& blockStream, + template <class TConsumer, class TBlockStream> + void ParseYsonStreamImpl( + const TBlockStream& blockStream, NYT::NYson::IYsonConsumer* consumer, - EYsonType parsingMode, - bool enableLinePositionInfo, - TMaybe<ui64> memoryLimit) { - if (enableLinePositionInfo) { - using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>; - TImpl impl(blockStream, consumer, memoryLimit); - impl.DoParse(parsingMode); - } else { - using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>; - TImpl impl(blockStream, consumer, memoryLimit); - impl.DoParse(parsingMode); + EYsonType parsingMode, + bool enableLinePositionInfo, + TMaybe<ui64> memoryLimit) { + if (enableLinePositionInfo) { + using TImpl = NDetail::TParser<TConsumer, TBlockStream, true>; + TImpl impl(blockStream, consumer, memoryLimit); + impl.DoParse(parsingMode); + } else { + using TImpl = NDetail::TParser<TConsumer, TBlockStream, false>; + TImpl impl(blockStream, consumer, memoryLimit); + impl.DoParse(parsingMode); } } - class TStatelessYsonParserImplBase { - public: + class TStatelessYsonParserImplBase { + public: virtual void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) = 0; - virtual ~TStatelessYsonParserImplBase() { - } - }; - - template <class TConsumer, bool EnableLinePositionInfo> - class TStatelessYsonParserImpl - : public TStatelessYsonParserImplBase { - private: - using TParser = NDetail::TParser<TConsumer, TStringReader, EnableLinePositionInfo>; - TParser Parser; - - public: - TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit) - : Parser(TStringReader(), consumer, memoryLimit) - { + virtual ~TStatelessYsonParserImplBase() { } + }; - void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) override { - Parser.SetBuffer(data.begin(), data.end()); - Parser.DoParse(type); - } - }; - - class TYsonListParserImplBase { - public: - virtual bool Parse() = 0; + template <class TConsumer, bool EnableLinePositionInfo> + class TStatelessYsonParserImpl + : public TStatelessYsonParserImplBase { + private: + using TParser = NDetail::TParser<TConsumer, TStringReader, EnableLinePositionInfo>; + TParser Parser; - virtual ~TYsonListParserImplBase() { - } - }; - - template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> - class TYsonListParserImpl - : public TYsonListParserImplBase { - private: - using TParser = NDetail::TParser<TConsumer, TBlockStream, EnableLinePositionInfo>; - TParser Parser; - bool First = true; - - public: - TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) - : Parser(blockStream, consumer, memoryLimit) - { - } + public: + TStatelessYsonParserImpl(TConsumer* consumer, TMaybe<ui64> memoryLimit) + : Parser(TStringReader(), consumer, memoryLimit) + { + } - bool Parse() override { - bool ret = Parser.DoParseListFragment(First); - First = false; - return ret; + void Parse(const TStringBuf& data, EYsonType type = ::NYson::EYsonType::Node) override { + Parser.SetBuffer(data.begin(), data.end()); + Parser.DoParse(type); } - }; - - //////////////////////////////////////////////////////////////////////////////// + }; + + class TYsonListParserImplBase { + public: + virtual bool Parse() = 0; + + virtual ~TYsonListParserImplBase() { + } + }; + + template <class TConsumer, class TBlockStream, bool EnableLinePositionInfo> + class TYsonListParserImpl + : public TYsonListParserImplBase { + private: + using TParser = NDetail::TParser<TConsumer, TBlockStream, EnableLinePositionInfo>; + TParser Parser; + bool First = true; + + public: + TYsonListParserImpl(const TBlockStream& blockStream, TConsumer* consumer, TMaybe<ui64> memoryLimit) + : Parser(blockStream, consumer, memoryLimit) + { + } + + bool Parse() override { + bool ret = Parser.DoParseListFragment(First); + First = false; + return ret; + } + }; + + //////////////////////////////////////////////////////////////////////////////// } // namespace NYson |