aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/antlr4_cpp_runtime/src/Vocabulary.h
blob: af5b24388041cc3be8775d45c78fb912bf6efdd6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD 3-clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

#pragma once

#include "antlr4-common.h"

namespace antlr4 {
namespace dfa {

  /// This class provides a default implementation of the <seealso cref="Vocabulary"/>
  /// interface.
  class ANTLR4CPP_PUBLIC Vocabulary final {
  public:
    /// Gets an empty <seealso cref="Vocabulary"/> instance.
    ///
    /// <para>
    /// No literal or symbol names are assigned to token types, so
    /// <seealso cref="#getDisplayName(int)"/> returns the numeric value for all tokens
    /// except <seealso cref="Token#EOF"/>.</para>
    [[deprecated("Use the default constructor of Vocabulary instead.")]] static const Vocabulary EMPTY_VOCABULARY;

    Vocabulary() {}

    Vocabulary(const Vocabulary&) = default;

    /// <summary>
    /// Constructs a new instance of <seealso cref="Vocabulary"/> from the specified
    /// literal and symbolic token names.
    /// </summary>
    /// <param name="literalNames"> The literal names assigned to tokens, or {@code null}
    /// if no literal names are assigned. </param>
    /// <param name="symbolicNames"> The symbolic names assigned to tokens, or
    /// {@code null} if no symbolic names are assigned.
    /// </param>
    /// <seealso cref= #getLiteralName(int) </seealso>
    /// <seealso cref= #getSymbolicName(int) </seealso>
    Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames);

    /// <summary>
    /// Constructs a new instance of <seealso cref="Vocabulary"/> from the specified
    /// literal, symbolic, and display token names.
    /// </summary>
    /// <param name="literalNames"> The literal names assigned to tokens, or {@code null}
    /// if no literal names are assigned. </param>
    /// <param name="symbolicNames"> The symbolic names assigned to tokens, or
    /// {@code null} if no symbolic names are assigned. </param>
    /// <param name="displayNames"> The display names assigned to tokens, or {@code null}
    /// to use the values in {@code literalNames} and {@code symbolicNames} as
    /// the source of display names, as described in
    /// <seealso cref="#getDisplayName(int)"/>.
    /// </param>
    /// <seealso cref= #getLiteralName(int) </seealso>
    /// <seealso cref= #getSymbolicName(int) </seealso>
    /// <seealso cref= #getDisplayName(int) </seealso>
    Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames,
               std::vector<std::string> displayNames);

    /// <summary>
    /// Returns the highest token type value. It can be used to iterate from
    /// zero to that number, inclusively, thus querying all stored entries. </summary>
    /// <returns> the highest token type value </returns>
    constexpr size_t getMaxTokenType() const { return _maxTokenType; }

    /// <summary>
    /// Gets the string literal associated with a token type. The string returned
    /// by this method, when not {@code null}, can be used unaltered in a parser
    /// grammar to represent this token type.
    ///
    /// <para>The following table shows examples of lexer rules and the literal
    /// names assigned to the corresponding token types.</para>
    ///
    /// <table>
    ///  <tr>
    ///   <th>Rule</th>
    ///   <th>Literal Name</th>
    ///   <th>Java String Literal</th>
    ///  </tr>
    ///  <tr>
    ///   <td>{@code THIS : 'this';}</td>
    ///   <td>{@code 'this'}</td>
    ///   <td>{@code "'this'"}</td>
    ///  </tr>
    ///  <tr>
    ///   <td>{@code SQUOTE : '\'';}</td>
    ///   <td>{@code '\''}</td>
    ///   <td>{@code "'\\''"}</td>
    ///  </tr>
    ///  <tr>
    ///   <td>{@code ID : [A-Z]+;}</td>
    ///   <td>n/a</td>
    ///   <td>{@code null}</td>
    ///  </tr>
    /// </table>
    /// </summary>
    /// <param name="tokenType"> The token type.
    /// </param>
    /// <returns> The string literal associated with the specified token type, or
    /// {@code null} if no string literal is associated with the type. </returns>
    std::string_view getLiteralName(size_t tokenType) const;

    /// <summary>
    /// Gets the symbolic name associated with a token type. The string returned
    /// by this method, when not {@code null}, can be used unaltered in a parser
    /// grammar to represent this token type.
    ///
    /// <para>This method supports token types defined by any of the following
    /// methods:</para>
    ///
    /// <ul>
    ///  <li>Tokens created by lexer rules.</li>
    ///  <li>Tokens defined in a <code>tokens{}</code> block in a lexer or parser
    ///  grammar.</li>
    ///  <li>The implicitly defined {@code EOF} token, which has the token type
    ///  <seealso cref="Token#EOF"/>.</li>
    /// </ul>
    ///
    /// <para>The following table shows examples of lexer rules and the literal
    /// names assigned to the corresponding token types.</para>
    ///
    /// <table>
    ///  <tr>
    ///   <th>Rule</th>
    ///   <th>Symbolic Name</th>
    ///  </tr>
    ///  <tr>
    ///   <td>{@code THIS : 'this';}</td>
    ///   <td>{@code THIS}</td>
    ///  </tr>
    ///  <tr>
    ///   <td>{@code SQUOTE : '\'';}</td>
    ///   <td>{@code SQUOTE}</td>
    ///  </tr>
    ///  <tr>
    ///   <td>{@code ID : [A-Z]+;}</td>
    ///   <td>{@code ID}</td>
    ///  </tr>
    /// </table>
    /// </summary>
    /// <param name="tokenType"> The token type.
    /// </param>
    /// <returns> The symbolic name associated with the specified token type, or
    /// {@code null} if no symbolic name is associated with the type. </returns>
    std::string_view getSymbolicName(size_t tokenType) const;

    /// <summary>
    /// Gets the display name of a token type.
    ///
    /// <para>ANTLR provides a default implementation of this method, but
    /// applications are free to override the behavior in any manner which makes
    /// sense for the application. The default implementation returns the first
    /// result from the following list which produces a non-{@code null}
    /// result.</para>
    ///
    /// <ol>
    ///  <li>The result of <seealso cref="#getLiteralName"/></li>
    ///  <li>The result of <seealso cref="#getSymbolicName"/></li>
    ///  <li>The result of <seealso cref="Integer#toString"/></li>
    /// </ol>
    /// </summary>
    /// <param name="tokenType"> The token type.
    /// </param>
    /// <returns> The display name of the token type, for use in error reporting or
    /// other user-visible messages which reference specific token types. </returns>
    std::string getDisplayName(size_t tokenType) const;

  private:
    std::vector<std::string> const _literalNames;
    std::vector<std::string> const _symbolicNames;
    std::vector<std::string> const _displayNames;
    const size_t _maxTokenType = 0;
  };

} // namespace atn
} // namespace antlr4