aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/llvm16/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
blob: 142b9c38e5fcb3aec2361a3fef51e196b48681b5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
//===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of SanitizerBinaryMetadata.
//
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

#include <array>
#include <cstdint>

using namespace llvm;

#define DEBUG_TYPE "sanmd"

namespace {

//===--- Constants --------------------------------------------------------===//

constexpr uint32_t kVersionBase = 1;                // occupies lower 16 bits
constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
constexpr int kCtorDtorPriority = 2;

// Pairs of names of initialization callback functions and which section
// contains the relevant metadata.
class MetadataInfo {
public:
  const StringRef FunctionPrefix;
  const StringRef SectionSuffix;
  const uint32_t FeatureMask;

  static const MetadataInfo Covered;
  static const MetadataInfo Atomics;

private:
  // Forbid construction elsewhere.
  explicit constexpr MetadataInfo(StringRef FunctionPrefix,
                                  StringRef SectionSuffix, uint32_t Feature)
      : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix),
        FeatureMask(Feature) {}
};
const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered",
                                         kSanitizerBinaryMetadataCoveredSection,
                                         kSanitizerBinaryMetadataNone};
const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics",
                                         kSanitizerBinaryMetadataAtomicsSection,
                                         kSanitizerBinaryMetadataAtomics};

// The only instances of MetadataInfo are the constants above, so a set of
// them may simply store pointers to them. To deterministically generate code,
// we need to use a set with stable iteration order, such as SetVector.
using MetadataInfoSet = SetVector<const MetadataInfo *>;

//===--- Command-line options ---------------------------------------------===//

cl::opt<bool> ClWeakCallbacks(
    "sanitizer-metadata-weak-callbacks",
    cl::desc("Declare callbacks extern weak, and only call if non-null."),
    cl::Hidden, cl::init(true));

cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
                            cl::desc("Emit PCs for covered functions."),
                            cl::Hidden, cl::init(false));
cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
                            cl::desc("Emit PCs for atomic operations."),
                            cl::Hidden, cl::init(false));
cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
                        cl::desc("Emit PCs for start of functions that are "
                                 "subject for use-after-return checking"),
                        cl::Hidden, cl::init(false));

//===--- Statistics -------------------------------------------------------===//

STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");

//===----------------------------------------------------------------------===//

// Apply opt overrides.
SanitizerBinaryMetadataOptions &&
transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
  Opts.Covered |= ClEmitCovered;
  Opts.Atomics |= ClEmitAtomics;
  Opts.UAR |= ClEmitUAR;
  return std::move(Opts);
}

class SanitizerBinaryMetadata {
public:
  SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts)
      : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
        TargetTriple(M.getTargetTriple()), IRB(M.getContext()) {
    // FIXME: Make it work with other formats.
    assert(TargetTriple.isOSBinFormatELF() && "ELF only");
  }

  bool run();

private:
  // Return enabled feature mask of per-instruction metadata.
  uint32_t getEnabledPerInstructionFeature() const {
    uint32_t FeatureMask = 0;
    if (Options.Atomics)
      FeatureMask |= MetadataInfo::Atomics.FeatureMask;
    return FeatureMask;
  }

  uint32_t getVersion() const {
    uint32_t Version = kVersionBase;
    const auto CM = Mod.getCodeModel();
    if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
      Version |= kVersionPtrSizeRel;
    return Version;
  }

  void runOn(Function &F, MetadataInfoSet &MIS);

  // Determines which set of metadata to collect for this instruction.
  //
  // Returns true if covered metadata is required to unambiguously interpret
  // other metadata. For example, if we are interested in atomics metadata, any
  // function with memory operations (atomic or not) requires covered metadata
  // to determine if a memory operation is atomic or not in modules compiled
  // with SanitizerBinaryMetadata.
  bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
             uint32_t &FeatureMask);

  // Get start/end section marker pointer.
  GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);

  // Returns the target-dependent section name.
  StringRef getSectionName(StringRef SectionSuffix);

  // Returns the section start marker name.
  Twine getSectionStart(StringRef SectionSuffix);

  // Returns the section end marker name.
  Twine getSectionEnd(StringRef SectionSuffix);

  Module &Mod;
  const SanitizerBinaryMetadataOptions Options;
  const Triple TargetTriple;
  IRBuilder<> IRB;
};

bool SanitizerBinaryMetadata::run() {
  MetadataInfoSet MIS;

  for (Function &F : Mod)
    runOn(F, MIS);

  if (MIS.empty())
    return false;

  //
  // Setup constructors and call all initialization functions for requested
  // metadata features.
  //

  auto *Int8PtrTy = IRB.getInt8PtrTy();
  auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy);
  auto *Int32Ty = IRB.getInt32Ty();
  const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy};
  auto *Version = ConstantInt::get(Int32Ty, getVersion());

  for (const MetadataInfo *MI : MIS) {
    const std::array<Value *, InitTypes.size()> InitArgs = {
        Version,
        getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy),
        getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy),
    };
    // We declare the _add and _del functions as weak, and only call them if
    // there is a valid symbol linked. This allows building binaries with
    // semantic metadata, but without having callbacks. When a tool that wants
    // the metadata is linked which provides the callbacks, they will be called.
    Function *Ctor =
        createSanitizerCtorAndInitFunctions(
            Mod, (MI->FunctionPrefix + ".module_ctor").str(),
            (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
            /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
            .first;
    Function *Dtor =
        createSanitizerCtorAndInitFunctions(
            Mod, (MI->FunctionPrefix + ".module_dtor").str(),
            (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
            /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
            .first;
    Constant *CtorData = nullptr;
    Constant *DtorData = nullptr;
    if (TargetTriple.supportsCOMDAT()) {
      // Use COMDAT to deduplicate constructor/destructor function.
      Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
      Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
      CtorData = Ctor;
      DtorData = Dtor;
    }
    appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData);
    appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData);
  }

  return true;
}

void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
  if (F.empty())
    return;
  if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
    return;
  // Don't touch available_externally functions, their actual body is elsewhere.
  if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
    return;

  MDBuilder MDB(F.getContext());

  // The metadata features enabled for this function, stored along covered
  // metadata (if enabled).
  uint32_t FeatureMask = getEnabledPerInstructionFeature();
  // Don't emit unnecessary covered metadata for all functions to save space.
  bool RequiresCovered = false;
  // We can only understand if we need to set UAR feature after looking
  // at the instructions. So we need to check instructions even if FeatureMask
  // is empty.
  if (FeatureMask || Options.UAR) {
    for (BasicBlock &BB : F)
      for (Instruction &I : BB)
        RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
  }

  if (F.isVarArg())
    FeatureMask &= ~kSanitizerBinaryMetadataUAR;
  if (FeatureMask & kSanitizerBinaryMetadataUAR) {
    RequiresCovered = true;
    NumMetadataUAR++;
  }

  // Covered metadata is always emitted if explicitly requested, otherwise only
  // if some other metadata requires it to unambiguously interpret it for
  // modules compiled with SanitizerBinaryMetadata.
  if (Options.Covered || (FeatureMask && RequiresCovered)) {
    NumMetadataCovered++;
    const auto *MI = &MetadataInfo::Covered;
    MIS.insert(MI);
    const StringRef Section = getSectionName(MI->SectionSuffix);
    // The feature mask will be placed after the size (32 bit) of the function,
    // so in total one covered entry will use `sizeof(void*) + 4 + 4`.
    Constant *CFM = IRB.getInt32(FeatureMask);
    F.setMetadata(LLVMContext::MD_pcsections,
                  MDB.createPCSections({{Section, {CFM}}}));
  }
}

bool isUARSafeCall(CallInst *CI) {
  auto *F = CI->getCalledFunction();
  // There are no intrinsic functions that leak arguments.
  // If the called function does not return, the current function
  // does not return as well, so no possibility of use-after-return.
  // Sanitizer function also don't leak or don't return.
  // It's safe to both pass pointers to local variables to them
  // and to tail-call them.
  return F && (F->isIntrinsic() || F->doesNotReturn() ||
               F->getName().startswith("__asan_") ||
               F->getName().startswith("__hwsan_") ||
               F->getName().startswith("__ubsan_") ||
               F->getName().startswith("__msan_") ||
               F->getName().startswith("__tsan_"));
}

bool hasUseAfterReturnUnsafeUses(Value &V) {
  for (User *U : V.users()) {
    if (auto *I = dyn_cast<Instruction>(U)) {
      if (I->isLifetimeStartOrEnd() || I->isDroppable())
        continue;
      if (auto *CI = dyn_cast<CallInst>(U)) {
        if (isUARSafeCall(CI))
          continue;
      }
      if (isa<LoadInst>(U))
        continue;
      if (auto *SI = dyn_cast<StoreInst>(U)) {
        // If storing TO the alloca, then the address isn't taken.
        if (SI->getOperand(1) == &V)
          continue;
      }
      if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
        if (!hasUseAfterReturnUnsafeUses(*GEPI))
          continue;
      } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
        if (!hasUseAfterReturnUnsafeUses(*BCI))
          continue;
      }
    }
    return true;
  }
  return false;
}

bool useAfterReturnUnsafe(Instruction &I) {
  if (isa<AllocaInst>(I))
    return hasUseAfterReturnUnsafeUses(I);
  // Tail-called functions are not necessary intercepted
  // at runtime because there is no call instruction.
  // So conservatively mark the caller as requiring checking.
  else if (auto *CI = dyn_cast<CallInst>(&I))
    return CI->isTailCall() && !isUARSafeCall(CI);
  return false;
}

bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
                                    MDBuilder &MDB, uint32_t &FeatureMask) {
  SmallVector<const MetadataInfo *, 1> InstMetadata;
  bool RequiresCovered = false;

  if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
    if (useAfterReturnUnsafe(I))
      FeatureMask |= kSanitizerBinaryMetadataUAR;
  }

  if (Options.Atomics && I.mayReadOrWriteMemory()) {
    auto SSID = getAtomicSyncScopeID(&I);
    if (SSID.has_value() && *SSID != SyncScope::SingleThread) {
      NumMetadataAtomics++;
      InstMetadata.push_back(&MetadataInfo::Atomics);
    }
    RequiresCovered = true;
  }

  // Attach MD_pcsections to instruction.
  if (!InstMetadata.empty()) {
    MIS.insert(InstMetadata.begin(), InstMetadata.end());
    SmallVector<MDBuilder::PCSection, 1> Sections;
    for (const auto &MI : InstMetadata)
      Sections.push_back({getSectionName(MI->SectionSuffix), {}});
    I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
  }

  return RequiresCovered;
}

GlobalVariable *
SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
  // Use ExternalWeak so that if all sections are discarded due to section
  // garbage collection, the linker will not report undefined symbol errors.
  auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
                                    GlobalVariable::ExternalWeakLinkage,
                                    /*Initializer=*/nullptr, MarkerName);
  Marker->setVisibility(GlobalValue::HiddenVisibility);
  return Marker;
}

StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
  // FIXME: Other TargetTriple (req. string pool)
  return SectionSuffix;
}

Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
  return "__start_" + SectionSuffix;
}

Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
  return "__stop_" + SectionSuffix;
}

} // namespace

SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
    SanitizerBinaryMetadataOptions Opts)
    : Options(std::move(Opts)) {}

PreservedAnalyses
SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
  SanitizerBinaryMetadata Pass(M, Options);
  if (Pass.run())
    return PreservedAnalyses::none();
  return PreservedAnalyses::all();
}