diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/yaml-cpp/src/scanner.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/yaml-cpp/src/scanner.cpp')
-rw-r--r-- | contrib/libs/yaml-cpp/src/scanner.cpp | 386 |
1 files changed, 386 insertions, 0 deletions
diff --git a/contrib/libs/yaml-cpp/src/scanner.cpp b/contrib/libs/yaml-cpp/src/scanner.cpp new file mode 100644 index 0000000000..b5cfcc12b2 --- /dev/null +++ b/contrib/libs/yaml-cpp/src/scanner.cpp @@ -0,0 +1,386 @@ +#include <cassert> +#include <memory> + +#include "exp.h" +#include "scanner.h" +#include "token.h" +#include "yaml-cpp/exceptions.h" // IWYU pragma: keep + +namespace YAML { +Scanner::Scanner(std::istream& in) + : INPUT(in), + m_startedStream(false), + m_endedStream(false), + m_simpleKeyAllowed(false), + m_canBeJSONFlow(false) {} + +Scanner::~Scanner() {} + +bool Scanner::empty() { + EnsureTokensInQueue(); + return m_tokens.empty(); +} + +void Scanner::pop() { + EnsureTokensInQueue(); + if (!m_tokens.empty()) + m_tokens.pop(); +} + +Token& Scanner::peek() { + EnsureTokensInQueue(); + assert(!m_tokens.empty()); // should we be asserting here? I mean, we really + // just be checking + // if it's empty before peeking. + +#if 0 + static Token *pLast = 0; + if(pLast != &m_tokens.front()) + std::cerr << "peek: " << m_tokens.front() << "\n"; + pLast = &m_tokens.front(); +#endif + + return m_tokens.front(); +} + +Mark Scanner::mark() const { return INPUT.mark(); } + +void Scanner::EnsureTokensInQueue() { + while (1) { + if (!m_tokens.empty()) { + Token& token = m_tokens.front(); + + // if this guy's valid, then we're done + if (token.status == Token::VALID) { + return; + } + + // here's where we clean up the impossible tokens + if (token.status == Token::INVALID) { + m_tokens.pop(); + continue; + } + + // note: what's left are the unverified tokens + } + + // no token? maybe we've actually finished + if (m_endedStream) { + return; + } + + // no? then scan... + ScanNextToken(); + } +} + +void Scanner::ScanNextToken() { + if (m_endedStream) { + return; + } + + if (!m_startedStream) { + return StartStream(); + } + + // get rid of whitespace, etc. (in between tokens it should be irrelevent) + ScanToNextToken(); + + // maybe need to end some blocks + PopIndentToHere(); + + // ***** + // And now branch based on the next few characters! + // ***** + + // end of stream + if (!INPUT) { + return EndStream(); + } + + if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) { + return ScanDirective(); + } + + // document token + if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) { + return ScanDocStart(); + } + + if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) { + return ScanDocEnd(); + } + + // flow start/end/entry + if (INPUT.peek() == Keys::FlowSeqStart || + INPUT.peek() == Keys::FlowMapStart) { + return ScanFlowStart(); + } + + if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) { + return ScanFlowEnd(); + } + + if (INPUT.peek() == Keys::FlowEntry) { + return ScanFlowEntry(); + } + + // block/map stuff + if (Exp::BlockEntry().Matches(INPUT)) { + return ScanBlockEntry(); + } + + if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) { + return ScanKey(); + } + + if (GetValueRegex().Matches(INPUT)) { + return ScanValue(); + } + + // alias/anchor + if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) { + return ScanAnchorOrAlias(); + } + + // tag + if (INPUT.peek() == Keys::Tag) { + return ScanTag(); + } + + // special scalars + if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || + INPUT.peek() == Keys::FoldedScalar)) { + return ScanBlockScalar(); + } + + if (INPUT.peek() == '\'' || INPUT.peek() == '\"') { + return ScanQuotedScalar(); + } + + // plain scalars + if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()) + .Matches(INPUT)) { + return ScanPlainScalar(); + } + + // don't know what it is! + throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN); +} + +void Scanner::ScanToNextToken() { + while (1) { + // first eat whitespace + while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) { + if (InBlockContext() && Exp::Tab().Matches(INPUT)) { + m_simpleKeyAllowed = false; + } + INPUT.eat(1); + } + + // then eat a comment + if (Exp::Comment().Matches(INPUT)) { + // eat until line break + while (INPUT && !Exp::Break().Matches(INPUT)) { + INPUT.eat(1); + } + } + + // if it's NOT a line break, then we're done! + if (!Exp::Break().Matches(INPUT)) { + break; + } + + // otherwise, let's eat the line break and keep going + int n = Exp::Break().Match(INPUT); + INPUT.eat(n); + + // oh yeah, and let's get rid of that simple key + InvalidateSimpleKey(); + + // new line - we may be able to accept a simple key now + if (InBlockContext()) { + m_simpleKeyAllowed = true; + } + } +} + +/////////////////////////////////////////////////////////////////////// +// Misc. helpers + +// IsWhitespaceToBeEaten +// . We can eat whitespace if it's a space or tab +// . Note: originally tabs in block context couldn't be eaten +// "where a simple key could be allowed +// (i.e., not at the beginning of a line, or following '-', '?', or +// ':')" +// I think this is wrong, since tabs can be non-content whitespace; it's just +// that they can't contribute to indentation, so once you've seen a tab in a +// line, you can't start a simple key +bool Scanner::IsWhitespaceToBeEaten(char ch) { + if (ch == ' ') { + return true; + } + + if (ch == '\t') { + return true; + } + + return false; +} + +const RegEx& Scanner::GetValueRegex() const { + if (InBlockContext()) { + return Exp::Value(); + } + + return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow(); +} + +void Scanner::StartStream() { + m_startedStream = true; + m_simpleKeyAllowed = true; + std::unique_ptr<IndentMarker> pIndent( + new IndentMarker(-1, IndentMarker::NONE)); + m_indentRefs.push_back(std::move(pIndent)); + m_indents.push(&m_indentRefs.back()); +} + +void Scanner::EndStream() { + // force newline + if (INPUT.column() > 0) { + INPUT.ResetColumn(); + } + + PopAllIndents(); + PopAllSimpleKeys(); + + m_simpleKeyAllowed = false; + m_endedStream = true; +} + +Token* Scanner::PushToken(Token::TYPE type) { + m_tokens.push(Token(type, INPUT.mark())); + return &m_tokens.back(); +} + +Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const { + switch (type) { + case IndentMarker::SEQ: + return Token::BLOCK_SEQ_START; + case IndentMarker::MAP: + return Token::BLOCK_MAP_START; + case IndentMarker::NONE: + assert(false); + break; + } + assert(false); + throw std::runtime_error("yaml-cpp: internal error, invalid indent type"); +} + +Scanner::IndentMarker* Scanner::PushIndentTo(int column, + IndentMarker::INDENT_TYPE type) { + // are we in flow? + if (InFlowContext()) { + return 0; + } + + std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type)); + IndentMarker& indent = *pIndent; + const IndentMarker& lastIndent = *m_indents.top(); + + // is this actually an indentation? + if (indent.column < lastIndent.column) { + return 0; + } + if (indent.column == lastIndent.column && + !(indent.type == IndentMarker::SEQ && + lastIndent.type == IndentMarker::MAP)) { + return 0; + } + + // push a start token + indent.pStartToken = PushToken(GetStartTokenFor(type)); + + // and then the indent + m_indents.push(&indent); + m_indentRefs.push_back(std::move(pIndent)); + return &m_indentRefs.back(); +} + +void Scanner::PopIndentToHere() { + // are we in flow? + if (InFlowContext()) { + return; + } + + // now pop away + while (!m_indents.empty()) { + const IndentMarker& indent = *m_indents.top(); + if (indent.column < INPUT.column()) { + break; + } + if (indent.column == INPUT.column() && + !(indent.type == IndentMarker::SEQ && + !Exp::BlockEntry().Matches(INPUT))) { + break; + } + + PopIndent(); + } + + while (!m_indents.empty() && + m_indents.top()->status == IndentMarker::INVALID) { + PopIndent(); + } +} + +void Scanner::PopAllIndents() { + // are we in flow? + if (InFlowContext()) { + return; + } + + // now pop away + while (!m_indents.empty()) { + const IndentMarker& indent = *m_indents.top(); + if (indent.type == IndentMarker::NONE) { + break; + } + + PopIndent(); + } +} + +void Scanner::PopIndent() { + const IndentMarker& indent = *m_indents.top(); + m_indents.pop(); + + if (indent.status != IndentMarker::VALID) { + InvalidateSimpleKey(); + return; + } + + if (indent.type == IndentMarker::SEQ) { + m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark())); + } else if (indent.type == IndentMarker::MAP) { + m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark())); + } +} + +int Scanner::GetTopIndent() const { + if (m_indents.empty()) { + return 0; + } + return m_indents.top()->column; +} + +void Scanner::ThrowParserException(const std::string& msg) const { + Mark mark = Mark::null_mark(); + if (!m_tokens.empty()) { + const Token& token = m_tokens.front(); + mark = token.mark; + } + throw ParserException(mark, msg); +} +} // namespace YAML |