1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
//
// CodeCompletionCore.hpp
//
// C++ port of antlr4-c3 (TypeScript) by Mike Lischke
// Licensed under the MIT License.
//
#pragma once
#include <Parser.h>
#include <ParserRuleContext.h>
#include <Token.h>
#include <Vocabulary.h>
#include <atn/ATNState.h>
#include <atn/PredicateTransition.h>
#include <atn/RuleStartState.h>
#include <atn/Transition.h>
#include <misc/IntervalSet.h>
#include <atomic>
#include <chrono>
#include <cstddef>
#include <map>
#include <optional>
#include <string>
#include <typeindex>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace c3 {
using TokenList = std::vector<size_t>;
using RuleList = std::vector<size_t>;
struct CandidateRule {
size_t startTokenIndex;
RuleList ruleList;
friend bool operator==(const CandidateRule& lhs, const CandidateRule& rhs) = default;
};
/**
* All the candidates which have been found. Tokens and rules are separated.
* – Token entries include a list of tokens that directly follow them (see also
* the "following" member in the FollowSetWithPath class). – Rule entries
* include the index of the starting token within the evaluated rule, along with
* a call stack of rules found during evaluation. – cancelled will be true if
* the collectCandidates() was cancelled or timed out.
*/
struct CandidatesCollection {
std::map<size_t, TokenList> tokens;
std::map<size_t, CandidateRule> rules;
bool isCancelled;
friend bool operator==(const CandidatesCollection& lhs, const CandidatesCollection& rhs) =
default;
};
/**
* Optional parameters for `CodeCompletionCore`.
*/
struct Parameters {
/** An option parser rule context to limit the search space. */
const antlr4::ParserRuleContext* context = nullptr;
/** If non-zero, the number of milliseconds until collecting times out. */
std::optional<std::chrono::milliseconds> timeout = std::nullopt;
/**
* If set to a non-`NULL` atomic boolean, and that boolean value is set to
* true while the function is executing, then collecting candidates will abort
* as soon as possible.
*/
std::atomic<bool>* isCancelled = nullptr;
};
struct DebugOptions {
/**
* Not dependent on showDebugOutput.
* Prints the collected rules + tokens to terminal.
*/
bool showResult = false;
/**
* Enables printing ATN state info to terminal.
*/
bool showDebugOutput = false;
/**
* Only relevant when showDebugOutput is true.
* Enables transition printing for a state.
*/
bool showTransitions = false;
/**
* Only relevant when showDebugOutput is true.
* Enables call stack printing for each rule recursion.
*/
bool showRuleStack = false;
};
class CodeCompletionCore {
private:
struct PipelineEntry {
antlr4::atn::ATNState* state;
size_t tokenListIndex;
};
struct RuleWithStartToken {
size_t startTokenIndex;
size_t ruleIndex;
};
using RuleWithStartTokenList = std::vector<RuleWithStartToken>;
/**
* A record for a follow set along with the path at which this set was found.
* If there is only a single symbol in the interval set then we also collect and
* store tokens which follow this symbol directly in its rule (i.e. there is no
* intermediate rule transition). Only single label transitions are considered.
* This is useful if you have a chain of tokens which can be suggested as a
* whole, because there is a fixed sequence in the grammar.
*/
struct FollowSetWithPath {
antlr4::misc::IntervalSet intervals;
RuleList path;
TokenList following;
};
/**
* A list of follow sets (for a given state number) + all of them combined for
* quick hit tests + whether they are exhaustive (false if subsequent
* yet-unprocessed rules could add further tokens to the follow set, true
* otherwise). This data is static in nature (because the used ATN states are
* part of a static struct: the ATN). Hence it can be shared between all C3
* instances, however it depends on the actual parser class (type).
*/
struct FollowSetsHolder {
std::vector<FollowSetWithPath> sets;
antlr4::misc::IntervalSet combined;
bool isExhaustive;
};
using FollowSetsPerState = std::unordered_map<size_t, FollowSetsHolder>;
/** Token stream position info after a rule was processed. */
using RuleEndStatus = std::unordered_set<size_t>;
public:
explicit CodeCompletionCore(antlr4::Parser* parser);
/**
* Tailoring of the result:
* Tokens which should not appear in the candidates set.
*/
std::unordered_set<size_t> ignoredTokens; // NOLINT: public field
/**
* Rules which replace any candidate token they contain.
* This allows to return descriptive rules (e.g. className, instead of
* ID/identifier).
*/
std::unordered_set<size_t> preferredRules; // NOLINT: public field
/**
* Specify if preferred rules should translated top-down (higher index rule
* returns first) or bottom-up (lower index rule returns first).
*/
bool translateRulesTopDown = false; // NOLINT: public field
/**
* Print human readable ATN state and other info.
*/
DebugOptions debugOptions; // NOLINT: public field
/**
* This is the main entry point. The caret token index specifies the token
* stream index for the token which currently covers the caret (or any other
* position you want to get code completion candidates for). Optionally you
* can pass in a parser rule context which limits the ATN walk to only that or
* called rules. This can significantly speed up the retrieval process but
* might miss some candidates (if they are outside of the given context).
*
* @param caretTokenIndex The index of the token at the caret position.
* @param parameters Optional parameters.
* @returns The collection of completion candidates. If cancelled or timed
* out, the returned collection will have its 'cancelled' value set to true
* and the collected candidates may be incomplete.
*/
CandidatesCollection collectCandidates(size_t caretTokenIndex, Parameters parameters = {});
private:
static thread_local std::unordered_map<std::type_index, FollowSetsPerState> followSetsByATN;
static std::vector<std::string> atnStateTypeMap;
antlr4::Parser* parser;
const antlr4::atn::ATN* atn;
const antlr4::dfa::Vocabulary* vocabulary;
const std::vector<std::string>* ruleNames;
std::vector<const antlr4::Token*> tokens;
std::vector<int> precedenceStack;
size_t tokenStartIndex = 0;
size_t statesProcessed = 0;
/**
* A mapping of rule index + token stream position to end token positions.
* A rule which has been visited before with the same input position will
* always produce the same output positions.
*/
std::unordered_map<size_t, std::unordered_map<size_t, RuleEndStatus>> shortcutMap;
/** The collected candidates (rules and tokens). */
c3::CandidatesCollection candidates;
std::optional<std::chrono::milliseconds> timeout;
std::atomic<bool>* cancel;
std::chrono::steady_clock::time_point timeoutStart;
bool checkPredicate(const antlr4::atn::PredicateTransition* transition);
bool translateStackToRuleIndex(RuleWithStartTokenList const& ruleWithStartTokenList);
bool translateToRuleIndex(size_t index, RuleWithStartTokenList const& ruleWithStartTokenList);
static std::vector<size_t> getFollowingTokens(const antlr4::atn::Transition* transition);
FollowSetsHolder determineFollowSets(antlr4::atn::ATNState* start, antlr4::atn::ATNState* stop);
bool collectFollowSets(
antlr4::atn::ATNState* state,
antlr4::atn::ATNState* stopState,
std::vector<FollowSetWithPath>& followSets,
std::vector<antlr4::atn::ATNState*>& stateStack,
std::vector<size_t>& ruleStack
);
RuleEndStatus processRule(
antlr4::atn::RuleStartState* startState,
size_t tokenListIndex,
RuleWithStartTokenList& callStack,
int precedence,
size_t indentation,
bool& timedOut
);
antlr4::misc::IntervalSet allUserTokens() const;
std::string generateBaseDescription(antlr4::atn::ATNState* state);
void printDescription(
size_t indentation,
antlr4::atn::ATNState* state,
std::string const& baseDescription,
size_t tokenIndex
);
void printRuleState(RuleWithStartTokenList const& stack);
void printOverallResults();
};
} // namespace c3
|