aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/clang16/tools/extra/clang-tidy/modernize/RawStringLiteralCheck.cpp
blob: 57e4c4863d3b7028bdc22e0cd3d9a1674d6a3cef (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "RawStringLiteralCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Lex/Lexer.h"

using namespace clang::ast_matchers;

namespace clang::tidy::modernize {

namespace {

bool containsEscapes(StringRef HayStack, StringRef Escapes) {
  size_t BackSlash = HayStack.find('\\');
  if (BackSlash == StringRef::npos)
    return false;

  while (BackSlash != StringRef::npos) {
    if (!Escapes.contains(HayStack[BackSlash + 1]))
      return false;
    BackSlash = HayStack.find('\\', BackSlash + 2);
  }

  return true;
}

bool isRawStringLiteral(StringRef Text) {
  // Already a raw string literal if R comes before ".
  const size_t QuotePos = Text.find('"');
  assert(QuotePos != StringRef::npos);
  return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
}

bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
                               const StringLiteral *Literal,
                               const CharsBitSet &DisallowedChars) {
  // FIXME: Handle L"", u8"", u"" and U"" literals.
  if (!Literal->isOrdinary())
    return false;

  for (const unsigned char C : Literal->getBytes())
    if (DisallowedChars.test(C))
      return false;

  CharSourceRange CharRange = Lexer::makeFileCharRange(
      CharSourceRange::getTokenRange(Literal->getSourceRange()),
      *Result.SourceManager, Result.Context->getLangOpts());
  StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
                                        Result.Context->getLangOpts());
  if (Text.empty() || isRawStringLiteral(Text))
    return false;

  return containsEscapes(Text, R"('\"?x01)");
}

bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
  return Bytes.find(Delimiter.empty()
                        ? std::string(R"lit()")lit")
                        : (")" + Delimiter + R"(")")) != StringRef::npos;
}

std::string asRawStringLiteral(const StringLiteral *Literal,
                               const std::string &DelimiterStem) {
  const StringRef Bytes = Literal->getBytes();
  std::string Delimiter;
  for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
    Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
  }

  if (Delimiter.empty())
    return (R"(R"()" + Bytes + R"lit()")lit").str();

  return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str();
}

} // namespace

RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
                                             ClangTidyContext *Context)
    : ClangTidyCheck(Name, Context),
      DelimiterStem(Options.get("DelimiterStem", "lit")),
      ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
  // Non-printing characters are disallowed:
  // \007 = \a bell
  // \010 = \b backspace
  // \011 = \t horizontal tab
  // \012 = \n new line
  // \013 = \v vertical tab
  // \014 = \f form feed
  // \015 = \r carriage return
  // \177 = delete
  for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
                                         "\b\t\n\v\f\r\016\017"
                                         "\020\021\022\023\024\025\026\027"
                                         "\030\031\032\033\034\035\036\037"
                                         "\177",
                                         33))
    DisallowedChars.set(C);

  // Non-ASCII are disallowed too.
  for (unsigned int C = 0x80u; C <= 0xFFu; ++C)
    DisallowedChars.set(static_cast<unsigned char>(C));
}

void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
  Options.store(Opts, "DelimiterStem", DelimiterStem);
  Options.store(Opts, "ReplaceShorterLiterals", ReplaceShorterLiterals);
}

void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
  Finder->addMatcher(
      stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this);
}

void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
  const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
  if (Literal->getBeginLoc().isMacroID())
    return;

  if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
    std::string Replacement = asRawStringLiteral(Literal, DelimiterStem);
    if (ReplaceShorterLiterals ||
        Replacement.length() <=
            Lexer::MeasureTokenLength(Literal->getBeginLoc(),
                                      *Result.SourceManager, getLangOpts()))
      replaceWithRawStringLiteral(Result, Literal, Replacement);
  }
}

void RawStringLiteralCheck::replaceWithRawStringLiteral(
    const MatchFinder::MatchResult &Result, const StringLiteral *Literal,
    StringRef Replacement) {
  CharSourceRange CharRange = Lexer::makeFileCharRange(
      CharSourceRange::getTokenRange(Literal->getSourceRange()),
      *Result.SourceManager, getLangOpts());
  diag(Literal->getBeginLoc(),
       "escaped string literal can be written as a raw string literal")
      << FixItHint::CreateReplacement(CharRange, Replacement);
}

} // namespace clang::tidy::modernize