aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/Lexer.h
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2023-12-02 01:45:21 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2023-12-02 02:42:50 +0300
commit9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree9f88a486917d371d099cd712efd91b4c122d209d /contrib/libs/antlr4_cpp_runtime/src/Lexer.h
parent32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
downloadydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz
Intermediate changes
Diffstat (limited to 'contrib/libs/antlr4_cpp_runtime/src/Lexer.h')
-rw-r--r--contrib/libs/antlr4_cpp_runtime/src/Lexer.h196
1 files changed, 196 insertions, 0 deletions
diff --git a/contrib/libs/antlr4_cpp_runtime/src/Lexer.h b/contrib/libs/antlr4_cpp_runtime/src/Lexer.h
new file mode 100644
index 0000000000..77033ad9e6
--- /dev/null
+++ b/contrib/libs/antlr4_cpp_runtime/src/Lexer.h
@@ -0,0 +1,196 @@
+/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+#pragma once
+
+#include "Recognizer.h"
+#include "TokenSource.h"
+#include "CharStream.h"
+#include "Token.h"
+
+namespace antlr4 {
+
+ /// A lexer is recognizer that draws input symbols from a character stream.
+ /// lexer grammars result in a subclass of this object. A Lexer object
+ /// uses simplified match() and error recovery mechanisms in the interest
+ /// of speed.
+ class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource {
+ public:
+ static constexpr size_t DEFAULT_MODE = 0;
+ static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1;
+ static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2;
+
+ static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
+ static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL;
+ static constexpr size_t MIN_CHAR_VALUE = 0;
+ static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF;
+
+ CharStream *_input; // Pure reference, usually from statically allocated instance.
+
+ protected:
+ /// How to create token objects.
+ TokenFactory<CommonToken> *_factory;
+
+ public:
+ /// The goal of all lexer rules/methods is to create a token object.
+ /// This is an instance variable as multiple rules may collaborate to
+ /// create a single token. nextToken will return this object after
+ /// matching lexer rule(s). If you subclass to allow multiple token
+ /// emissions, then set this to the last token to be matched or
+ /// something nonnull so that the auto token emit mechanism will not
+ /// emit another token.
+
+ // Life cycle of a token is this:
+ // Created by emit() (via the token factory) or by action code, holding ownership of it.
+ // Ownership is handed over to the token stream when calling nextToken().
+ std::unique_ptr<Token> token;
+
+ /// <summary>
+ /// What character index in the stream did the current token start at?
+ /// Needed, for example, to get the text for current token. Set at
+ /// the start of nextToken.
+ /// </summary>
+ size_t tokenStartCharIndex;
+
+ /// <summary>
+ /// The line on which the first character of the token resides </summary>
+ size_t tokenStartLine;
+
+ /// The character position of first character within the line.
+ size_t tokenStartCharPositionInLine;
+
+ /// Once we see EOF on char stream, next token will be EOF.
+ /// If you have DONE : EOF ; then you see DONE EOF.
+ bool hitEOF;
+
+ /// The channel number for the current token.
+ size_t channel;
+
+ /// The token type for the current token.
+ size_t type;
+
+ // Use the vector as a stack.
+ std::vector<size_t> modeStack;
+ size_t mode;
+
+ Lexer();
+ Lexer(CharStream *input);
+ virtual ~Lexer() {}
+
+ virtual void reset();
+
+ /// Return a token from this source; i.e., match a token on the char stream.
+ virtual std::unique_ptr<Token> nextToken() override;
+
+ /// Instruct the lexer to skip creating a token for current lexer rule
+ /// and look for another token. nextToken() knows to keep looking when
+ /// a lexer rule finishes with token set to SKIP_TOKEN. Recall that
+ /// if token == null at end of any token rule, it creates one for you
+ /// and emits it.
+ virtual void skip();
+ virtual void more();
+ virtual void setMode(size_t m);
+ virtual void pushMode(size_t m);
+ virtual size_t popMode();
+
+ template<typename T1>
+ void setTokenFactory(TokenFactory<T1> *factory) {
+ this->_factory = factory;
+ }
+
+ virtual TokenFactory<CommonToken>* getTokenFactory() override;
+
+ /// Set the char stream and reset the lexer
+ virtual void setInputStream(IntStream *input) override;
+
+ virtual std::string getSourceName() override;
+
+ virtual CharStream* getInputStream() override;
+
+ /// By default does not support multiple emits per nextToken invocation
+ /// for efficiency reasons. Subclasses can override this method, nextToken,
+ /// and getToken (to push tokens into a list and pull from that list
+ /// rather than a single variable as this implementation does).
+ virtual void emit(std::unique_ptr<Token> newToken);
+
+ /// The standard method called to automatically emit a token at the
+ /// outermost lexical rule. The token object should point into the
+ /// char buffer start..stop. If there is a text override in 'text',
+ /// use that to set the token's text. Override this method to emit
+ /// custom Token objects or provide a new factory.
+ virtual Token* emit();
+
+ virtual Token* emitEOF();
+
+ virtual size_t getLine() const override;
+
+ virtual size_t getCharPositionInLine() override;
+
+ virtual void setLine(size_t line);
+
+ virtual void setCharPositionInLine(size_t charPositionInLine);
+
+ /// What is the index of the current character of lookahead?
+ virtual size_t getCharIndex();
+
+ /// Return the text matched so far for the current token or any
+ /// text override.
+ virtual std::string getText();
+
+ /// Set the complete text of this token; it wipes any previous
+ /// changes to the text.
+ virtual void setText(const std::string &text);
+
+ /// Override if emitting multiple tokens.
+ virtual std::unique_ptr<Token> getToken();
+
+ virtual void setToken(std::unique_ptr<Token> newToken);
+
+ virtual void setType(size_t ttype);
+
+ virtual size_t getType();
+
+ virtual void setChannel(size_t newChannel);
+
+ virtual size_t getChannel();
+
+ virtual const std::vector<std::string>& getChannelNames() const = 0;
+
+ virtual const std::vector<std::string>& getModeNames() const = 0;
+
+ /// Return a list of all Token objects in input char stream.
+ /// Forces load of all tokens. Does not include EOF token.
+ virtual std::vector<std::unique_ptr<Token>> getAllTokens();
+
+ virtual void recover(const LexerNoViableAltException &e);
+
+ virtual void notifyListeners(const LexerNoViableAltException &e);
+
+ virtual std::string getErrorDisplay(const std::string &s);
+
+ /// Lexers can normally match any char in it's vocabulary after matching
+ /// a token, so do the easy thing and just kill a character and hope
+ /// it all works out. You can instead use the rule invocation stack
+ /// to do sophisticated error recovery if you are in a fragment rule.
+ virtual void recover(RecognitionException *re);
+
+ /// <summary>
+ /// Gets the number of syntax errors reported during parsing. This value is
+ /// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
+ /// </summary>
+ /// <seealso cref= #notifyListeners </seealso>
+ virtual size_t getNumberOfSyntaxErrors();
+
+ protected:
+ /// You can set the text for the current token to override what is in
+ /// the input char buffer (via setText()).
+ std::string _text;
+
+ private:
+ size_t _syntaxErrors;
+ void InitializeInstanceFields();
+ };
+
+} // namespace antlr4