aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/atn/LexerActionExecutor.h
blob: 28bb1e28ecf1fe372e41ccd4639d4bb53dc904f9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

#pragma once

#include "CharStream.h"
#include "atn/LexerAction.h"

namespace antlr4 {
namespace atn {

  /// Represents an executor for a sequence of lexer actions which traversed during
  /// the matching operation of a lexer rule (token).
  ///
  /// <para>The executor tracks position information for position-dependent lexer actions
  /// efficiently, ensuring that actions appearing only at the end of the rule do
  /// not cause bloating of the <seealso cref="DFA"/> created for the lexer.</para>
  class ANTLR4CPP_PUBLIC LexerActionExecutor final : public std::enable_shared_from_this<LexerActionExecutor> {
  public:
    /// <summary>
    /// Constructs an executor for a sequence of <seealso cref="LexerAction"/> actions. </summary>
    /// <param name="lexerActions"> The lexer actions to execute. </param>
    explicit LexerActionExecutor(std::vector<Ref<const LexerAction>> lexerActions);

    /// <summary>
    /// Creates a <seealso cref="LexerActionExecutor"/> which executes the actions for
    /// the input {@code lexerActionExecutor} followed by a specified
    /// {@code lexerAction}.
    /// </summary>
    /// <param name="lexerActionExecutor"> The executor for actions already traversed by
    /// the lexer while matching a token within a particular
    /// <seealso cref="LexerATNConfig"/>. If this is {@code null}, the method behaves as
    /// though it were an empty executor. </param>
    /// <param name="lexerAction"> The lexer action to execute after the actions
    /// specified in {@code lexerActionExecutor}.
    /// </param>
    /// <returns> A <seealso cref="LexerActionExecutor"/> for executing the combine actions
    /// of {@code lexerActionExecutor} and {@code lexerAction}. </returns>
    static Ref<const LexerActionExecutor> append(const Ref<const LexerActionExecutor> &lexerActionExecutor,
                                                 Ref<const LexerAction> lexerAction);

    /// <summary>
    /// Creates a <seealso cref="LexerActionExecutor"/> which encodes the current offset
    /// for position-dependent lexer actions.
    ///
    /// <para>Normally, when the executor encounters lexer actions where
    /// <seealso cref="LexerAction#isPositionDependent"/> returns {@code true}, it calls
    /// <seealso cref="IntStream#seek"/> on the input <seealso cref="CharStream"/> to set the input
    /// position to the <em>end</em> of the current token. This behavior provides
    /// for efficient DFA representation of lexer actions which appear at the end
    /// of a lexer rule, even when the lexer rule matches a variable number of
    /// characters.</para>
    ///
    /// <para>Prior to traversing a match transition in the ATN, the current offset
    /// from the token start index is assigned to all position-dependent lexer
    /// actions which have not already been assigned a fixed offset. By storing
    /// the offsets relative to the token start index, the DFA representation of
    /// lexer actions which appear in the middle of tokens remains efficient due
    /// to sharing among tokens of the same length, regardless of their absolute
    /// position in the input stream.</para>
    ///
    /// <para>If the current executor already has offsets assigned to all
    /// position-dependent lexer actions, the method returns {@code this}.</para>
    /// </summary>
    /// <param name="offset"> The current offset to assign to all position-dependent
    /// lexer actions which do not already have offsets assigned.
    /// </param>
    /// <returns> A <seealso cref="LexerActionExecutor"/> which stores input stream offsets
    /// for all position-dependent lexer actions. </returns>
    Ref<const LexerActionExecutor> fixOffsetBeforeMatch(int offset) const;

    /// <summary>
    /// Gets the lexer actions to be executed by this executor. </summary>
    /// <returns> The lexer actions to be executed by this executor. </returns>
    const std::vector<Ref<const LexerAction>>& getLexerActions() const;

    /// <summary>
    /// Execute the actions encapsulated by this executor within the context of a
    /// particular <seealso cref="Lexer"/>.
    ///
    /// <para>This method calls <seealso cref="IntStream#seek"/> to set the position of the
    /// {@code input} <seealso cref="CharStream"/> prior to calling
    /// <seealso cref="LexerAction#execute"/> on a position-dependent action. Before the
    /// method returns, the input position will be restored to the same position
    /// it was in when the method was invoked.</para>
    /// </summary>
    /// <param name="lexer"> The lexer instance. </param>
    /// <param name="input"> The input stream which is the source for the current token.
    /// When this method is called, the current <seealso cref="IntStream#index"/> for
    /// {@code input} should be the start of the following token, i.e. 1
    /// character past the end of the current token. </param>
    /// <param name="startIndex"> The token start index. This value may be passed to
    /// <seealso cref="IntStream#seek"/> to set the {@code input} position to the beginning
    /// of the token. </param>
    void execute(Lexer *lexer, CharStream *input, size_t startIndex) const;

    size_t hashCode() const;

    bool equals(const LexerActionExecutor &other) const;

  private:
    const std::vector<Ref<const LexerAction>> _lexerActions;
    mutable std::atomic<size_t> _hashCode;
  };

  inline bool operator==(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) {
    return lhs.equals(rhs);
  }

  inline bool operator!=(const LexerActionExecutor &lhs, const LexerActionExecutor &rhs) {
    return !operator==(lhs, rhs);
  }

}  // namespace atn
}  // namespace antlr4

namespace std {

  template <>
  struct hash<::antlr4::atn::LexerActionExecutor> {
    size_t operator()(const ::antlr4::atn::LexerActionExecutor &lexerActionExecutor) const {
      return lexerActionExecutor.hashCode();
    }
  };

}  // namespace std