diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 13:26:22 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-30 15:44:45 +0300 |
commit | 0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch) | |
tree | 291d72dbd7e9865399f668c84d11ed86fb190bbf /contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp | |
parent | cb2c8d75065e5b3c47094067cb4aa407d4813298 (diff) | |
download | ydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz |
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp')
-rw-r--r-- | contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp new file mode 100644 index 0000000000..4eaff2c852 --- /dev/null +++ b/contrib/libs/antlr4_cpp_runtime/src/BufferedTokenStream.cpp @@ -0,0 +1,414 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "WritableToken.h" +#include "Lexer.h" +#include "RuleContext.h" +#include "misc/Interval.h" +#include "Exceptions.h" +#include "support/CPPUtils.h" + +#include "BufferedTokenStream.h" + +using namespace antlr4; +using namespace antlrcpp; + +BufferedTokenStream::BufferedTokenStream(TokenSource *tokenSource) : _tokenSource(tokenSource){ + InitializeInstanceFields(); +} + +TokenSource* BufferedTokenStream::getTokenSource() const { + return _tokenSource; +} + +size_t BufferedTokenStream::index() { + return _p; +} + +ssize_t BufferedTokenStream::mark() { + return 0; +} + +void BufferedTokenStream::release(ssize_t /*marker*/) { + // no resources to release +} + +void BufferedTokenStream::reset() { + seek(0); +} + +void BufferedTokenStream::seek(size_t index) { + lazyInit(); + _p = adjustSeekIndex(index); +} + +size_t BufferedTokenStream::size() { + return _tokens.size(); +} + +void BufferedTokenStream::consume() { + bool skipEofCheck = false; + if (!_needSetup) { + if (_fetchedEOF) { + // the last token in tokens is EOF. skip check if p indexes any + // fetched token except the last. + skipEofCheck = _p < _tokens.size() - 1; + } else { + // no EOF token in tokens. skip check if p indexes a fetched token. + skipEofCheck = _p < _tokens.size(); + } + } else { + // not yet initialized + skipEofCheck = false; + } + + if (!skipEofCheck && LA(1) == Token::EOF) { + throw IllegalStateException("cannot consume EOF"); + } + + if (sync(_p + 1)) { + _p = adjustSeekIndex(_p + 1); + } +} + +bool BufferedTokenStream::sync(size_t i) { + if (i + 1 < _tokens.size()) + return true; + size_t n = i - _tokens.size() + 1; // how many more elements we need? + + if (n > 0) { + size_t fetched = fetch(n); + return fetched >= n; + } + + return true; +} + +size_t BufferedTokenStream::fetch(size_t n) { + if (_fetchedEOF) { + return 0; + } + + size_t i = 0; + while (i < n) { + std::unique_ptr<Token> t(_tokenSource->nextToken()); + + if (is<WritableToken *>(t.get())) { + (static_cast<WritableToken *>(t.get()))->setTokenIndex(_tokens.size()); + } + + _tokens.push_back(std::move(t)); + ++i; + + if (_tokens.back()->getType() == Token::EOF) { + _fetchedEOF = true; + break; + } + } + + return i; +} + +Token* BufferedTokenStream::get(size_t i) const { + if (i >= _tokens.size()) { + throw IndexOutOfBoundsException(std::string("token index ") + + std::to_string(i) + + std::string(" out of range 0..") + + std::to_string(_tokens.size() - 1)); + } + return _tokens[i].get(); +} + +std::vector<Token *> BufferedTokenStream::get(size_t start, size_t stop) { + std::vector<Token *> subset; + + lazyInit(); + + if (_tokens.empty()) { + return subset; + } + + if (stop >= _tokens.size()) { + stop = _tokens.size() - 1; + } + for (size_t i = start; i <= stop; i++) { + Token *t = _tokens[i].get(); + if (t->getType() == Token::EOF) { + break; + } + subset.push_back(t); + } + return subset; +} + +size_t BufferedTokenStream::LA(ssize_t i) { + return LT(i)->getType(); +} + +Token* BufferedTokenStream::LB(size_t k) { + if (k > _p) { + return nullptr; + } + return _tokens[_p - k].get(); +} + +Token* BufferedTokenStream::LT(ssize_t k) { + lazyInit(); + if (k == 0) { + return nullptr; + } + if (k < 0) { + return LB(-k); + } + + size_t i = _p + k - 1; + sync(i); + if (i >= _tokens.size()) { // return EOF token + // EOF must be last token + return _tokens.back().get(); + } + + return _tokens[i].get(); +} + +ssize_t BufferedTokenStream::adjustSeekIndex(size_t i) { + return i; +} + +void BufferedTokenStream::lazyInit() { + if (_needSetup) { + setup(); + } +} + +void BufferedTokenStream::setup() { + _needSetup = false; + sync(0); + _p = adjustSeekIndex(0); +} + +void BufferedTokenStream::setTokenSource(TokenSource *tokenSource) { + _tokenSource = tokenSource; + _tokens.clear(); + _fetchedEOF = false; + _needSetup = true; +} + +std::vector<Token *> BufferedTokenStream::getTokens() { + std::vector<Token *> result; + for (auto &t : _tokens) + result.push_back(t.get()); + return result; +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop) { + return getTokens(start, stop, std::vector<size_t>()); +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, const std::vector<size_t> &types) { + lazyInit(); + if (stop >= _tokens.size() || start >= _tokens.size()) { + throw IndexOutOfBoundsException(std::string("start ") + + std::to_string(start) + + std::string(" or stop ") + + std::to_string(stop) + + std::string(" not in 0..") + + std::to_string(_tokens.size() - 1)); + } + + std::vector<Token *> filteredTokens; + + if (start > stop) { + return filteredTokens; + } + + for (size_t i = start; i <= stop; i++) { + Token *tok = _tokens[i].get(); + + if (types.empty() || std::find(types.begin(), types.end(), tok->getType()) != types.end()) { + filteredTokens.push_back(tok); + } + } + return filteredTokens; +} + +std::vector<Token *> BufferedTokenStream::getTokens(size_t start, size_t stop, size_t ttype) { + std::vector<size_t> s; + s.push_back(ttype); + return getTokens(start, stop, s); +} + +ssize_t BufferedTokenStream::nextTokenOnChannel(size_t i, size_t channel) { + sync(i); + if (i >= size()) { + return size() - 1; + } + + Token *token = _tokens[i].get(); + while (token->getChannel() != channel) { + if (token->getType() == Token::EOF) { + return i; + } + i++; + sync(i); + token = _tokens[i].get(); + } + return i; +} + +ssize_t BufferedTokenStream::previousTokenOnChannel(size_t i, size_t channel) { + sync(i); + if (i >= size()) { + // the EOF token is on every channel + return size() - 1; + } + + while (true) { + Token *token = _tokens[i].get(); + if (token->getType() == Token::EOF || token->getChannel() == channel) { + return i; + } + + if (i == 0) + return -1; + i--; + } + return i; +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex, ssize_t channel) { + lazyInit(); + if (tokenIndex >= _tokens.size()) { + throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); + } + + ssize_t nextOnChannel = nextTokenOnChannel(tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL); + size_t to; + size_t from = tokenIndex + 1; + // if none onchannel to right, nextOnChannel=-1 so set to = last token + if (nextOnChannel == -1) { + to = static_cast<ssize_t>(size() - 1); + } else { + to = nextOnChannel; + } + + return filterForChannel(from, to, channel); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToRight(size_t tokenIndex) { + return getHiddenTokensToRight(tokenIndex, -1); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex, ssize_t channel) { + lazyInit(); + if (tokenIndex >= _tokens.size()) { + throw IndexOutOfBoundsException(std::to_string(tokenIndex) + " not in 0.." + std::to_string(_tokens.size() - 1)); + } + + if (tokenIndex == 0) { + // Obviously no tokens can appear before the first token. + return { }; + } + + ssize_t prevOnChannel = previousTokenOnChannel(tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL); + if (prevOnChannel == static_cast<ssize_t>(tokenIndex - 1)) { + return { }; + } + // if none onchannel to left, prevOnChannel=-1 then from=0 + size_t from = static_cast<size_t>(prevOnChannel + 1); + size_t to = tokenIndex - 1; + + return filterForChannel(from, to, channel); +} + +std::vector<Token *> BufferedTokenStream::getHiddenTokensToLeft(size_t tokenIndex) { + return getHiddenTokensToLeft(tokenIndex, -1); +} + +std::vector<Token *> BufferedTokenStream::filterForChannel(size_t from, size_t to, ssize_t channel) { + std::vector<Token *> hidden; + for (size_t i = from; i <= to; i++) { + Token *t = _tokens[i].get(); + if (channel == -1) { + if (t->getChannel() != Lexer::DEFAULT_TOKEN_CHANNEL) { + hidden.push_back(t); + } + } else { + if (t->getChannel() == static_cast<size_t>(channel)) { + hidden.push_back(t); + } + } + } + + return hidden; +} + +bool BufferedTokenStream::isInitialized() const { + return !_needSetup; +} + +/** + * Get the text of all tokens in this buffer. + */ +std::string BufferedTokenStream::getSourceName() const +{ + return _tokenSource->getSourceName(); +} + +std::string BufferedTokenStream::getText() { + fill(); + return getText(misc::Interval(0U, size() - 1)); +} + +std::string BufferedTokenStream::getText(const misc::Interval &interval) { + lazyInit(); + size_t start = interval.a; + size_t stop = interval.b; + if (start == INVALID_INDEX || stop == INVALID_INDEX) { + return ""; + } + sync(stop); + if (stop >= _tokens.size()) { + stop = _tokens.size() - 1; + } + + std::stringstream ss; + for (size_t i = start; i <= stop; i++) { + Token *t = _tokens[i].get(); + if (t->getType() == Token::EOF) { + break; + } + ss << t->getText(); + } + return ss.str(); +} + +std::string BufferedTokenStream::getText(RuleContext *ctx) { + return getText(ctx->getSourceInterval()); +} + +std::string BufferedTokenStream::getText(Token *start, Token *stop) { + if (start != nullptr && stop != nullptr) { + return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex())); + } + + return ""; +} + +void BufferedTokenStream::fill() { + lazyInit(); + const size_t blockSize = 1000; + while (true) { + size_t fetched = fetch(blockSize); + if (fetched < blockSize) { + return; + } + } +} + +void BufferedTokenStream::InitializeInstanceFields() { + _needSetup = true; + _fetchedEOF = false; +} |