summaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2023-12-02 01:45:21 +0300
committerrobot-piglet <[email protected]>2023-12-02 02:42:50 +0300
commit9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp
parent32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
Intermediate changes
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp')
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp294
1 files changed, 294 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp b/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp
new file mode 100644
index 00000000000..b0385c56baf
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/Lexer.cpp
@@ -0,0 +1,294 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#include "atn/LexerATNSimulator.h"
+#include "Exceptions.h"
+#include "misc/Interval.h"
+#include "CommonTokenFactory.h"
+#include "LexerNoViableAltException.h"
+#include "ANTLRErrorListener.h"
+#include "support/CPPUtils.h"
+#include "CommonToken.h"
+
+#include "Lexer.h"
+
+#define DEBUG_LEXER 0
+
+using namespace antlrcpp;
+using namespace antlr4;
+
+Lexer::Lexer() : Recognizer() {
+ InitializeInstanceFields();
+ _input = nullptr;
+}
+
+Lexer::Lexer(CharStream *input) : Recognizer(), _input(input) {
+ InitializeInstanceFields();
+}
+
+void Lexer::reset() {
+ // wack Lexer state variables
+ _input->seek(0); // rewind the input
+
+ _syntaxErrors = 0;
+ token.reset();
+ type = Token::INVALID_TYPE;
+ channel = Token::DEFAULT_CHANNEL;
+ tokenStartCharIndex = INVALID_INDEX;
+ tokenStartCharPositionInLine = 0;
+ tokenStartLine = 0;
+ type = 0;
+ _text = "";
+
+ hitEOF = false;
+ mode = Lexer::DEFAULT_MODE;
+ modeStack.clear();
+
+ getInterpreter<atn::LexerATNSimulator>()->reset();
+}
+
+std::unique_ptr<Token> Lexer::nextToken() {
+ // Mark start location in char stream so unbuffered streams are
+ // guaranteed at least have text of current token
+ ssize_t tokenStartMarker = _input->mark();
+
+ auto onExit = finally([this, tokenStartMarker]{
+ // make sure we release marker after match or
+ // unbuffered char stream will keep buffering
+ _input->release(tokenStartMarker);
+ });
+
+ while (true) {
+ outerContinue:
+ if (hitEOF) {
+ emitEOF();
+ return std::move(token);
+ }
+
+ token.reset();
+ channel = Token::DEFAULT_CHANNEL;
+ tokenStartCharIndex = _input->index();
+ tokenStartCharPositionInLine = getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine();
+ tokenStartLine = getInterpreter<atn::LexerATNSimulator>()->getLine();
+ _text = "";
+ do {
+ type = Token::INVALID_TYPE;
+ size_t ttype;
+ try {
+ ttype = getInterpreter<atn::LexerATNSimulator>()->match(_input, mode);
+ } catch (LexerNoViableAltException &e) {
+ notifyListeners(e); // report error
+ recover(e);
+ ttype = SKIP;
+ }
+ if (_input->LA(1) == EOF) {
+ hitEOF = true;
+ }
+ if (type == Token::INVALID_TYPE) {
+ type = ttype;
+ }
+ if (type == SKIP) {
+ goto outerContinue;
+ }
+ } while (type == MORE);
+ if (token == nullptr) {
+ emit();
+ }
+ return std::move(token);
+ }
+}
+
+void Lexer::skip() {
+ type = SKIP;
+}
+
+void Lexer::more() {
+ type = MORE;
+}
+
+void Lexer::setMode(size_t m) {
+ mode = m;
+}
+
+void Lexer::pushMode(size_t m) {
+#if DEBUG_LEXER == 1
+ std::cout << "pushMode " << m << std::endl;
+#endif
+
+ modeStack.push_back(mode);
+ setMode(m);
+}
+
+size_t Lexer::popMode() {
+ if (modeStack.empty()) {
+ throw EmptyStackException();
+ }
+#if DEBUG_LEXER == 1
+ std::cout << std::string("popMode back to ") << modeStack.back() << std::endl;
+#endif
+
+ setMode(modeStack.back());
+ modeStack.pop_back();
+ return mode;
+}
+
+
+TokenFactory<CommonToken>* Lexer::getTokenFactory() {
+ return _factory;
+}
+
+void Lexer::setInputStream(IntStream *input) {
+ reset();
+ _input = dynamic_cast<CharStream*>(input);
+}
+
+std::string Lexer::getSourceName() {
+ return _input->getSourceName();
+}
+
+CharStream* Lexer::getInputStream() {
+ return _input;
+}
+
+void Lexer::emit(std::unique_ptr<Token> newToken) {
+ token = std::move(newToken);
+}
+
+Token* Lexer::emit() {
+ emit(_factory->create({ this, _input }, type, _text, channel,
+ tokenStartCharIndex, getCharIndex() - 1, tokenStartLine, tokenStartCharPositionInLine));
+ return token.get();
+}
+
+Token* Lexer::emitEOF() {
+ size_t cpos = getCharPositionInLine();
+ size_t line = getLine();
+ emit(_factory->create({ this, _input }, EOF, "", Token::DEFAULT_CHANNEL, _input->index(), _input->index() - 1, line, cpos));
+ return token.get();
+}
+
+size_t Lexer::getLine() const {
+ return getInterpreter<atn::LexerATNSimulator>()->getLine();
+}
+
+size_t Lexer::getCharPositionInLine() {
+ return getInterpreter<atn::LexerATNSimulator>()->getCharPositionInLine();
+}
+
+void Lexer::setLine(size_t line) {
+ getInterpreter<atn::LexerATNSimulator>()->setLine(line);
+}
+
+void Lexer::setCharPositionInLine(size_t charPositionInLine) {
+ getInterpreter<atn::LexerATNSimulator>()->setCharPositionInLine(charPositionInLine);
+}
+
+size_t Lexer::getCharIndex() {
+ return _input->index();
+}
+
+std::string Lexer::getText() {
+ if (!_text.empty()) {
+ return _text;
+ }
+ return getInterpreter<atn::LexerATNSimulator>()->getText(_input);
+}
+
+void Lexer::setText(const std::string &text) {
+ _text = text;
+}
+
+std::unique_ptr<Token> Lexer::getToken() {
+ return std::move(token);
+}
+
+void Lexer::setToken(std::unique_ptr<Token> newToken) {
+ token = std::move(newToken);
+}
+
+void Lexer::setType(size_t ttype) {
+ type = ttype;
+}
+
+size_t Lexer::getType() {
+ return type;
+}
+
+void Lexer::setChannel(size_t newChannel) {
+ channel = newChannel;
+}
+
+size_t Lexer::getChannel() {
+ return channel;
+}
+
+std::vector<std::unique_ptr<Token>> Lexer::getAllTokens() {
+ std::vector<std::unique_ptr<Token>> tokens;
+ std::unique_ptr<Token> t = nextToken();
+ while (t->getType() != EOF) {
+ tokens.push_back(std::move(t));
+ t = nextToken();
+ }
+ return tokens;
+}
+
+void Lexer::recover(const LexerNoViableAltException &/*e*/) {
+ if (_input->LA(1) != EOF) {
+ // skip a char and try again
+ getInterpreter<atn::LexerATNSimulator>()->consume(_input);
+ }
+}
+
+void Lexer::notifyListeners(const LexerNoViableAltException & /*e*/) {
+ ++_syntaxErrors;
+ std::string text = _input->getText(misc::Interval(tokenStartCharIndex, _input->index()));
+ std::string msg = std::string("token recognition error at: '") + getErrorDisplay(text) + std::string("'");
+
+ ProxyErrorListener &listener = getErrorListenerDispatch();
+ listener.syntaxError(this, nullptr, tokenStartLine, tokenStartCharPositionInLine, msg, std::current_exception());
+}
+
+std::string Lexer::getErrorDisplay(const std::string &s) {
+ std::stringstream ss;
+ for (auto c : s) {
+ switch (c) {
+ case '\n':
+ ss << "\\n";
+ break;
+ case '\t':
+ ss << "\\t";
+ break;
+ case '\r':
+ ss << "\\r";
+ break;
+ default:
+ ss << c;
+ break;
+ }
+ }
+ return ss.str();
+}
+
+void Lexer::recover(RecognitionException * /*re*/) {
+ // TODO: Do we lose character or line position information?
+ _input->consume();
+}
+
+size_t Lexer::getNumberOfSyntaxErrors() {
+ return _syntaxErrors;
+}
+
+void Lexer::InitializeInstanceFields() {
+ _syntaxErrors = 0;
+ token = nullptr;
+ _factory = CommonTokenFactory::DEFAULT.get();
+ tokenStartCharIndex = INVALID_INDEX;
+ tokenStartLine = 0;
+ tokenStartCharPositionInLine = 0;
+ hitEOF = false;
+ channel = 0;
+ type = 0;
+ mode = Lexer::DEFAULT_MODE;
+}