contrib/libs/clang16/lib/Format/NamespaceEndCommentsFixer.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376

//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
/// fixes namespace end comments.
///
//===----------------------------------------------------------------------===//

#include "NamespaceEndCommentsFixer.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Regex.h"

#define DEBUG_TYPE "namespace-end-comments-fixer"

namespace clang {
namespace format {

namespace {
// Iterates all tokens starting from StartTok to EndTok and apply Fn to all
// tokens between them including StartTok and EndTok. Returns the token after
// EndTok.
const FormatToken *
processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
              tok::TokenKind EndTok,
              llvm::function_ref<void(const FormatToken *)> Fn) {
  if (!Tok || Tok->isNot(StartTok))
    return Tok;
  int NestLevel = 0;
  do {
    if (Tok->is(StartTok))
      ++NestLevel;
    else if (Tok->is(EndTok))
      --NestLevel;
    if (Fn)
      Fn(Tok);
    Tok = Tok->getNextNonComment();
  } while (Tok && NestLevel > 0);
  return Tok;
}

const FormatToken *skipAttribute(const FormatToken *Tok) {
  if (!Tok)
    return nullptr;
  if (Tok->is(tok::kw___attribute)) {
    Tok = Tok->getNextNonComment();
    Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
  } else if (Tok->is(tok::l_square)) {
    Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
  }
  return Tok;
}

// Computes the name of a namespace given the namespace token.
// Returns "" for anonymous namespace.
std::string computeName(const FormatToken *NamespaceTok) {
  assert(NamespaceTok &&
         NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
         "expecting a namespace token");
  std::string name;
  const FormatToken *Tok = NamespaceTok->getNextNonComment();
  if (NamespaceTok->is(TT_NamespaceMacro)) {
    // Collects all the non-comment tokens between opening parenthesis
    // and closing parenthesis or comma.
    assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
    Tok = Tok->getNextNonComment();
    while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
      name += Tok->TokenText;
      Tok = Tok->getNextNonComment();
    }
    return name;
  }
  Tok = skipAttribute(Tok);

  std::string FirstNSName;
  // For `namespace [[foo]] A::B::inline C {` or
  // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
  // Peek for the first '::' (or '{' or '(')) and then return all tokens from
  // one token before that up until the '{'. A '(' might be a macro with
  // arguments.
  const FormatToken *FirstNSTok = nullptr;
  while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
    if (FirstNSTok)
      FirstNSName += FirstNSTok->TokenText;
    FirstNSTok = Tok;
    Tok = Tok->getNextNonComment();
  }

  if (FirstNSTok)
    Tok = FirstNSTok;
  Tok = skipAttribute(Tok);

  FirstNSTok = nullptr;
  // Add everything from '(' to ')'.
  auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
  bool IsPrevColoncolon = false;
  bool HasColoncolon = false;
  bool IsPrevInline = false;
  bool NameFinished = false;
  // If we found '::' in name, then it's the name. Otherwise, we can't tell
  // which one is name. For example, `namespace A B {`.
  while (Tok && Tok->isNot(tok::l_brace)) {
    if (FirstNSTok) {
      if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
        if (FirstNSTok->is(tok::l_paren)) {
          FirstNSTok = Tok =
              processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
          continue;
        }
        if (FirstNSTok->isNot(tok::coloncolon)) {
          NameFinished = true;
          break;
        }
      }
      name += FirstNSTok->TokenText;
      IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
      HasColoncolon = HasColoncolon || IsPrevColoncolon;
      if (FirstNSTok->is(tok::kw_inline)) {
        name += " ";
        IsPrevInline = true;
      }
    }
    FirstNSTok = Tok;
    Tok = Tok->getNextNonComment();
    const FormatToken *TokAfterAttr = skipAttribute(Tok);
    if (TokAfterAttr != Tok)
      FirstNSTok = Tok = TokAfterAttr;
  }
  if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
    name += FirstNSTok->TokenText;
  if (FirstNSName.empty() || HasColoncolon)
    return name;
  return name.empty() ? FirstNSName : FirstNSName + " " + name;
}

std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
                                  const FormatToken *NamespaceTok,
                                  unsigned SpacesToAdd) {
  return "";
  std::string text = "//";
  text.append(SpacesToAdd, ' ');
  text += NamespaceTok->TokenText;
  if (NamespaceTok->is(TT_NamespaceMacro))
    text += "(";
  else if (!NamespaceName.empty())
    text += ' ';
  text += NamespaceName;
  if (NamespaceTok->is(TT_NamespaceMacro))
    text += ")";
  if (AddNewline)
    text += '\n';
  return text;
}

bool hasEndComment(const FormatToken *RBraceTok) {
  return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
}

bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
                     const FormatToken *NamespaceTok) {
  assert(hasEndComment(RBraceTok));
  const FormatToken *Comment = RBraceTok->Next;

  // Matches a valid namespace end comment.
  // Valid namespace end comments don't need to be edited.
  static const llvm::Regex NamespaceCommentPattern =
      llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
                  "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
                  llvm::Regex::IgnoreCase);
  static const llvm::Regex NamespaceMacroCommentPattern =
      llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
                  "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
                  llvm::Regex::IgnoreCase);

  SmallVector<StringRef, 8> Groups;
  if (NamespaceTok->is(TT_NamespaceMacro) &&
      NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
    StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
    // The name of the macro must be used.
    if (NamespaceTokenText != NamespaceTok->TokenText)
      return false;
  } else if (NamespaceTok->isNot(tok::kw_namespace) ||
             !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
    // Comment does not match regex.
    return false;
  }
  StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
  // Anonymous namespace comments must not mention a namespace name.
  if (NamespaceName.empty() && !NamespaceNameInComment.empty())
    return false;
  StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
  // Named namespace comments must not mention anonymous namespace.
  if (!NamespaceName.empty() && !AnonymousInComment.empty())
    return false;
  if (NamespaceNameInComment == NamespaceName)
    return true;

  // Has namespace comment flowed onto the next line.
  // } // namespace
  //   // verylongnamespacenamethatdidnotfitonthepreviouscommentline
  if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
    return false;

  static const llvm::Regex CommentPattern = llvm::Regex(
      "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);

  // Pull out just the comment text.
  if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
    return false;
  NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";

  return NamespaceNameInComment == NamespaceName;
}

void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
                   const SourceManager &SourceMgr,
                   tooling::Replacements *Fixes) {
  auto EndLoc = RBraceTok->Tok.getEndLoc();
  auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
  if (Err) {
    llvm::errs() << "Error while adding namespace end comment: "
                 << llvm::toString(std::move(Err)) << "\n";
  }
}

void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
                      const SourceManager &SourceMgr,
                      tooling::Replacements *Fixes) {
  assert(hasEndComment(RBraceTok));
  const FormatToken *Comment = RBraceTok->Next;
  auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
                                             Comment->Tok.getEndLoc());
  auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
  if (Err) {
    llvm::errs() << "Error while updating namespace end comment: "
                 << llvm::toString(std::move(Err)) << "\n";
  }
}
} // namespace

const FormatToken *
getNamespaceToken(const AnnotatedLine *Line,
                  const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
  if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
    return nullptr;
  size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
  if (StartLineIndex == UnwrappedLine::kInvalidIndex)
    return nullptr;
  assert(StartLineIndex < AnnotatedLines.size());
  const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
  if (NamespaceTok->is(tok::l_brace)) {
    // "namespace" keyword can be on the line preceding '{', e.g. in styles
    // where BraceWrapping.AfterNamespace is true.
    if (StartLineIndex > 0) {
      NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
      if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
        return nullptr;
    }
  }

  return NamespaceTok->getNamespaceToken();
}

StringRef
getNamespaceTokenText(const AnnotatedLine *Line,
                      const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
  const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
  return NamespaceTok ? NamespaceTok->TokenText : StringRef();
}

NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
                                                     const FormatStyle &Style)
    : TokenAnalyzer(Env, Style) {}

std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
    TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
    FormatTokenLexer &Tokens) {
  const SourceManager &SourceMgr = Env.getSourceManager();
  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
  tooling::Replacements Fixes;

  // Spin through the lines and ensure we have balanced braces.
  int Braces = 0;
  for (AnnotatedLine *Line : AnnotatedLines) {
    FormatToken *Tok = Line->First;
    while (Tok) {
      Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
      Tok = Tok->Next;
    }
  }
  // Don't attempt to comment unbalanced braces or this can
  // lead to comments being placed on the closing brace which isn't
  // the matching brace of the namespace. (occurs during incomplete editing).
  if (Braces != 0)
    return {Fixes, 0};

  std::string AllNamespaceNames;
  size_t StartLineIndex = SIZE_MAX;
  StringRef NamespaceTokenText;
  unsigned int CompactedNamespacesCount = 0;
  for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
    const AnnotatedLine *EndLine = AnnotatedLines[I];
    const FormatToken *NamespaceTok =
        getNamespaceToken(EndLine, AnnotatedLines);
    if (!NamespaceTok)
      continue;
    FormatToken *RBraceTok = EndLine->First;
    if (RBraceTok->Finalized)
      continue;
    RBraceTok->Finalized = true;
    const FormatToken *EndCommentPrevTok = RBraceTok;
    // Namespaces often end with '};'. In that case, attach namespace end
    // comments to the semicolon tokens.
    if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
      EndCommentPrevTok = RBraceTok->Next;
    if (StartLineIndex == SIZE_MAX)
      StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
    std::string NamespaceName = computeName(NamespaceTok);
    if (Style.CompactNamespaces) {
      if (CompactedNamespacesCount == 0)
        NamespaceTokenText = NamespaceTok->TokenText;
      if ((I + 1 < E) &&
          NamespaceTokenText ==
              getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
          StartLineIndex - CompactedNamespacesCount - 1 ==
              AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
          !AnnotatedLines[I + 1]->First->Finalized) {
        if (hasEndComment(EndCommentPrevTok)) {
          // remove end comment, it will be merged in next one
          updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
        }
        ++CompactedNamespacesCount;
        if (!NamespaceName.empty())
          AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
        continue;
      }
      NamespaceName += AllNamespaceNames;
      CompactedNamespacesCount = 0;
      AllNamespaceNames = std::string();
    }
    // The next token in the token stream after the place where the end comment
    // token must be. This is either the next token on the current line or the
    // first token on the next line.
    const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
    if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
      EndCommentNextTok = EndCommentNextTok->Next;
    if (!EndCommentNextTok && I + 1 < E)
      EndCommentNextTok = AnnotatedLines[I + 1]->First;
    bool AddNewline = EndCommentNextTok &&
                      EndCommentNextTok->NewlinesBefore == 0 &&
                      EndCommentNextTok->isNot(tok::eof);
    const std::string EndCommentText =
        computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
                              Style.SpacesInLineCommentPrefix.Minimum);
    if (!hasEndComment(EndCommentPrevTok)) {
      bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1;
      if (!isShort)
        addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
    } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
                                NamespaceTok)) {
      updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
    }
    StartLineIndex = SIZE_MAX;
  }
  return {Fixes, 0};
}

} // namespace format
} // namespace clang